diff --git a/swebench-bash-only.jsonl b/swebench-bash-only.jsonl new file mode 100644 index 0000000..0f2f2be --- /dev/null +++ b/swebench-bash-only.jsonl @@ -0,0 +1,14000 @@ +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15863", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-16450", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-7233", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-12096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7910", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13647", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11292", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13417", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7521", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-8399", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8475", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14608", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-10081", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16116", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-11618", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-5809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15851", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15572", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12143", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-7530", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14995", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13363", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13012", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16527", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-16766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15103", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-7166", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-5262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13343", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14915", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14580", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7571", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15731", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14765", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-19954", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14752", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15987", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16819", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-12713", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pytest-dev__pytest-7982", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16662", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-13279", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-23534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-16429", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Llama 4 Maverick Instruct", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12050", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15863", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-16450", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-7233", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13647", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11292", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13417", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-8399", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12209", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11133", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25287", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-7671", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15467", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-23824", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-10081", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14373", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15104", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24026", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-5809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16255", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13670", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14238", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15572", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14995", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24213", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13363", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16527", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-9296", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13480", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-22914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-16766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-26342", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11603", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-6202", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13410", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-7166", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-12907", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-20859", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13343", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-14711", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14915", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14580", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7571", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15731", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14765", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-19954", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-16886", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11119", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20154", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14752", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-26113", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-23412", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16819", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-12713", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14855", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pytest-dev__pytest-7982", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-17029", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16662", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-22719", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-13989", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-13279", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-23534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-7336", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "astropy__astropy-14309", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11099", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-16429", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "sympy__sympy-21847", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 3.7 Sonnet (20250219)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15572", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-6197", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14369", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Sonnet (20250514)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15863", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11749", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14182", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11603", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14752", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "sympy__sympy-21847", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 2.5 Pro (2025-05-06)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12308", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12209", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11749", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-5809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15503", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11163", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o3 (2025-04-16)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12308", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-7233", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-4970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15467", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11790", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16485", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-12907", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15987", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-23412", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-12713", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11728", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "o4-mini (2025-04-16)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-4970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12325", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15563", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-20676", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14369", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4 Opus (20250514)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-1921", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12209", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13974", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12325", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-7166", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8056", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-18763", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7571", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15563", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-20676", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16819", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11728", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen3-Coder 480B/A35B Instruct", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12050", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15863", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-16450", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-7233", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-12096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7910", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13647", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11292", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13417", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7521", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-8399", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12209", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11133", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8475", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15467", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-23824", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14608", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-10081", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16116", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-11618", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15104", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11749", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24026", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15851", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15572", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12143", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14995", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24213", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13363", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13012", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-16766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15103", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-26342", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11603", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-6202", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-7166", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16485", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-12907", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-5262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-20859", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13343", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-5631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-14711", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-18763", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-6903", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14915", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14580", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7571", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15731", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14765", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14752", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15987", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-23412", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-12713", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14855", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16662", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-22719", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-13279", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-4075", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "astropy__astropy-14309", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11099", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-16429", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-11999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Qwen2.5-Coder 32B Instruct", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12325", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 (2025-08-07) (medium reasoning)", "example_id": "django__django-15973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12308", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13343", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 mini (2025-08-07) (medium reasoning)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12050", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11292", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23824", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16116", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11749", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14238", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24213", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-9296", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13480", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11603", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7166", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14580", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-19954", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20154", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16819", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16662", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-23534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5 nano (2025-08-07) (medium reasoning)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13579", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-12096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13647", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11292", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13417", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7521", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-17087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8475", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-7671", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15499", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14608", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-10081", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16116", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-11618", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15104", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-5809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14238", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12143", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-23299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13363", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-23314", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13012", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-22914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15103", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-26342", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13410", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16485", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-12907", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13343", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14580", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8269", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-16792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-19954", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11119", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-26113", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16819", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-13989", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-13279", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "sympy__sympy-21847", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "gpt-oss-120b", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12050", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-16450", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-12096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13417", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25287", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13757", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13821", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15104", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14007", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14508", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4687", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13012", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13480", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-22914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15103", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-26342", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-14711", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7571", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15731", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-19954", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11119", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16082", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14672", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-20676", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14855", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pytest-dev__pytest-7982", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11163", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-16429", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Instruct", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12708", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14089", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-2317", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14434", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pallets__flask-5014", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-5631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-12713", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-14623", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-13279", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11179", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.5 (2025-08-22)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-4970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15280", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11790", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13798", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-6197", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14365", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11885", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15503", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15563", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11138", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Sonnet (20250929)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12308", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-1724", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13344", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13974", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11734", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14365", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-1766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14725", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11728", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "matplotlib__matplotlib-24177", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-14248", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-11999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Gemini 3 Pro Preview (2025-11-18)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-17318", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15851", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13012", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-17655", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1 (2025-11-13) (medium reasoning)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25960", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15280", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-21568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11790", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4687", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15503", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15957", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "psf__requests-6028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15127", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14725", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "matplotlib__matplotlib-24177", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11138", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Claude 4.5 Opus medium (20251101)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10097", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4687", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14182", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15161", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11885", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15127", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.1-codex (medium reasoning)", "example_id": "django__django-15973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-7671", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8035", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-8898", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15732", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8459", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13925", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "mwaskom__seaborn-3187", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-7393", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15127", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-20676", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14725", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-12419", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14369", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Minimax M2", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-12096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13647", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7521", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-21612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12193", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11133", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10914", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11790", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19783", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-1142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-10435", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9281", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-14711", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15987", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14855", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4966", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-7440", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14369", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-9698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "DeepSeek V3.2 Reasoner", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-7080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16136", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4094", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12273", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-17139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-3305", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15572", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11815", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-16766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13023", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-22714", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14559", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14539", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-23412", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-24066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-7336", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GLM-4.6 (T=1)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-3151", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-7080", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13372", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15280", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11276", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13297", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13837", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-20826", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13121", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11749", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-23262", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16145", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11951", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15268", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-3677", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14351", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-12481", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11555", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15345", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13480", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19637", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13410", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12304", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-20488", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15561", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16642", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-2905", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-6903", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13516", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13109", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "mwaskom__seaborn-3069", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14369", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11477", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral (2512)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15863", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7324", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-21379", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-7080", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19040", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10880", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24443", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-18211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13810", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12125", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13974", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7205", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15809", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16255", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14122", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14349", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12143", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16032", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25311", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15022", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-9296", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13807", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11603", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13569", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12965", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16100", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15525", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15382", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11885", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13453", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13878", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7432", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14096", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-3095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11239", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13346", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15563", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "matplotlib__matplotlib-24177", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-23534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16595", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14500", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6744", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11477", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Devstral small (2512)", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13512", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-18189", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25332", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-3993", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9258", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-7080", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13615", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14140", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6721", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10449", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11206", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15375", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-22871", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15280", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19495", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13974", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-17318", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-23950", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-14976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-22456", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-22865", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-2931", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11333", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11451", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-5414", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14365", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8621", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-6386", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13741", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20801", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-10051", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15017", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13158", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13877", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13406", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15875", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12039", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-25775", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-26291", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13031", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24562", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14771", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16901", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15987", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-15976", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13786", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-20676", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11728", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "matplotlib__matplotlib-24177", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-12489", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-17084", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11477", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-13091", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "Kimi K2 Thinking", "example_id": "django__django-15973", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11532", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14376", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25960", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-7080", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11740", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12663", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14534", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-21612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-5787", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15280", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12774", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14608", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12858", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15732", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15380", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-17318", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-23413", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16899", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11433", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13401", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16560", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11964", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12143", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-14531", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-6197", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-9296", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14365", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11066", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14182", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13410", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15278", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "mwaskom__seaborn-3187", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15128", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-20859", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11095", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11885", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15503", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15037", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14404", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12754", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13112", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-18698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15814", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13933", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13195", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11848", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-22719", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11728", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "matplotlib__matplotlib-24177", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "pydata__xarray-6599", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11)", "example_id": "django__django-15973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-10356", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11532", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12050", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-3151", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13512", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-18189", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13028", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11211", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25332", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11087", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15863", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14376", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4695", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-16450", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14493", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25960", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-1921", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12308", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13579", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12708", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-7277", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-3993", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9258", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7324", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-7233", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-21379", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-7080", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-12096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9367", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13615", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7910", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11400", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11740", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7462", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13647", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14140", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12663", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-4970", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13236", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11292", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13417", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20916", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7521", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14598", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-21612", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-2317", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10449", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-8399", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14434", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24570", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12209", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12193", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25479", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-7229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16136", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11133", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4356", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11206", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16493", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15375", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19040", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13372", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9602", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10466", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16139", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-5787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11265", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-25973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-22871", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-23476", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25287", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-1724", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15280", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13794", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24443", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-21568", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-18211", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-26194", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-17087", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8475", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14170", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14894", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-7671", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16938", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15467", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13810", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15098", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11490", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-9288", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12774", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15499", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19495", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8035", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14141", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9320", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-23824", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14608", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13837", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-20826", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24970", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14983", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13344", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-10081", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16116", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13757", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7889", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16502", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-25747", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-11618", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24870", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-8898", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7985", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12125", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12858", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13974", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14373", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15732", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8459", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15380", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8548", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13121", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7205", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-17318", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13089", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14787", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4094", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14710", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13821", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14629", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15104", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-23950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-25232", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11749", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-23413", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24026", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-5809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16899", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15851", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10097", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-14053", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11734", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15809", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11790", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-14976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14007", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11433", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13401", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7490", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-10908", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-10844", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12273", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13977", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-12973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-17139", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8638", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16255", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-6528", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12276", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-3305", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13670", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14122", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-22456", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12325", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16667", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-23262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-25102", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14238", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16631", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16560", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16145", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pallets__flask-5014", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20428", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15572", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14349", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-11310", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12143", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-11578", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14508", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11149", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13964", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8120", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11815", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19783", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-22865", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-2931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11333", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11451", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-5414", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11951", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-7530", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15252", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16256", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15268", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13925", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13135", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16263", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14995", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13568", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-3677", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4687", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13496", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14351", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-8707", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13398", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7454", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13798", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-12481", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16032", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12155", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-18199", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-1142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13212", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11555", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24213", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10614", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6992", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-23299", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25311", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13363", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-14531", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-23314", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15345", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13012", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15022", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16527", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-11445", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-6197", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-9296", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-10297", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13807", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14365", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9230", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13124", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13480", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15695", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10999", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-22914", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-16766", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24149", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15103", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14182", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11299", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8621", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-26342", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-21930", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16950", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11880", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11603", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-6202", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7590", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19637", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-4551", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13410", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-6386", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15278", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13741", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13023", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-8872", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13569", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15554", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-7166", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6938", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16485", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-12907", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-22080", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20801", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12304", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-10051", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-25931", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-12585", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-20488", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "mwaskom__seaborn-3187", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15128", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13033", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15017", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15561", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15161", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12965", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15277", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-22714", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8056", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-10435", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-1766", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-5262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9281", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-20859", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13158", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13877", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13343", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15525", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-17630", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14559", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-5631", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-26466", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16642", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15930", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-7393", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13406", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-5840", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13658", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15875", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12039", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15382", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-14711", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11885", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-25775", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-4661", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-15100", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-17655", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-18763", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-26291", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13031", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24562", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-2905", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-26208", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14315", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-7606", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13453", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7236", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13878", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-6903", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-13033", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14915", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14539", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15503", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15037", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16612", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14404", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12754", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9229", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8265", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13590", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14580", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7571", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8269", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13112", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13513", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7432", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15731", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-16792", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14034", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14096", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14765", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-18698", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14771", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15957", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-3095", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-19954", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-16886", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11239", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13820", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4629", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13315", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16901", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13346", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12262", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24661", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11119", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "psf__requests-6028", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20154", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13142", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14752", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15563", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-16597", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15987", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-15976", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10973", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15127", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13439", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7748", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13516", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-10554", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16082", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13786", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-21596", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-26113", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14672", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13109", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11141", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-23412", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13551", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15368", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15814", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13933", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-12682", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-20676", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8593", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13852", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16819", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-12713", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14855", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14155", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-26323", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13328", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-11510", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13195", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15916", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11820", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-24066", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9591", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pytest-dev__pytest-7982", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14725", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-17029", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-14623", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6461", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16662", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9461", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11848", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-22719", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4966", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11728", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24627", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-7440", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-12419", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-13989", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11163", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "matplotlib__matplotlib-24177", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14011", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-13279", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-23534", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-7336", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "mwaskom__seaborn-3069", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "scikit-learn__scikit-learn-13779", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11179", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-4075", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16595", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-14248", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pylint-dev__pylint-4604", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14369", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-20438", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14500", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9673", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "astropy__astropy-14309", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-9698", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-12489", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6744", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-17084", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "pydata__xarray-6599", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11138", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11477", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-13091", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11099", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-16429", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-14792", "pass1": 0, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-11999", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sphinx-doc__sphinx-8721", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "sympy__sympy-21847", "pass1": 1, "count": 1} +{"benchmark_id": "swebench-bash-only", "model": "GPT-5.2 (2025-12-11) (high reasoning)", "example_id": "django__django-15973", "pass1": 0, "count": 1} diff --git a/swebench-multimodal.jsonl b/swebench-multimodal.jsonl new file mode 100644 index 0000000..be48953 --- /dev/null +++ b/swebench-multimodal.jsonl @@ -0,0 +1,6120 @@ +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1677","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12329","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5011","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13155","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-717","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12172","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-2756","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12194","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13654","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-14242","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-15243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6675","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12420","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1542","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-877","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13069","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6239","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-4182","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3824","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3454","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1299","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13981","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-8940","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-14262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10340","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13333","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4431","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15234","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15685","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-03-10)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1143","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12430","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12445","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12302","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-1373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-12652","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6964","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1337","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3118","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-1029","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2705","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2195","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12384","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7270","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6906","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-16347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-14587","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-3442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-2689","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1200","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6960","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13317","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"grommet__grommet-6246","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1206","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o3 (2025-04-16)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-717","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1578","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6691","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-2024","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-2756","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-2610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-475","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1679","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6964","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10545","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1509","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-4182","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1299","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14414","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-14262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-9436","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-14587","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1586","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-8120","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4167","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13317","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1172","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"prettier__prettier-4153","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"bpmn-io__bpmn-js-1382","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Agentless Lite + Claude-3.5 Sonnet","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12329","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5011","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-717","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-2756","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13654","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-14242","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-15243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12420","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-877","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6239","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-1029","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-4182","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3824","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3454","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-8940","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-14262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4431","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Zencoder (2025-04-01)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-2610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1085","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6964","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2984","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1221","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1080","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2195","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1299","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-14262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7270","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-9348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4167","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1853","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13317","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1172","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4o (2024-08-06)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12445","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-2610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1067","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-1373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6964","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1509","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2705","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2195","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1151","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3454","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-14262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-9348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6906","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-8120","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1887","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12398","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1172","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + GPT 4.1 (2025-04-14)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1143","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-2756","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1602","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6964","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4816","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-4182","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6520","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1151","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-5156","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3457","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-8940","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6906","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-9436","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-3442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11664","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-8120","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Globant Code Fixer Agent","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12329","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5011","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12430","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-1373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12420","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1337","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-877","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4184","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2984","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1221","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3118","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2705","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1500","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-4115","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3824","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1299","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8092","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-9436","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-16347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4431","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1887","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1853","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12398","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3381","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-8536","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Refact.ai Agent","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12329","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5011","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1143","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-717","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-5039","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7046","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9402","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-2756","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14627","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1238","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-4806","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1337","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1221","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5791","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6520","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3824","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-8940","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6906","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-16347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-3442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11664","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1200","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4431","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1853","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"eslint__eslint-12472","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12398","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13317","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-8536","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"Codefuse_Pycfuse_SVR","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-9083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12329","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10982","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7046","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9402","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2946","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-11884","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1179","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1238","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4184","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1221","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2705","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-4182","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2195","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-9436","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-16347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13364","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-2689","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1200","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7350","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1887","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12398","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3381","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-8536","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1172","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-Sonnet 4)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-9083","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5011","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12402","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10982","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-2024","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8945","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9402","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8912","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2861","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9136","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14627","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-2610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2703","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-15243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-475","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6976","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-12652","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-2061","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1238","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12420","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1085","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1337","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-877","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-1029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6227","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-4036","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5688","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1196","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2195","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1151","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3454","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12384","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-11407","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-8940","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7270","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13364","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-14800","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1200","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-8120","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-17618","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4167","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1853","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"eslint__eslint-12472","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12398","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13317","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-8536","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1206","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"bpmn-io__bpmn-js-1382","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"OpenHands-Versa (Claude-3.7 Sonnet)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7722","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-9083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1677","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15484","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-10176","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9903","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12329","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-11489","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1143","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4680","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8022","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15787","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13155","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1928","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-9389","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7353","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14932","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-5091","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4862","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2841","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6722","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12402","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-717","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4286","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15365","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10982","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-14239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-2553","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15114","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4055","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1786","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2740","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11401","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-6989","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1578","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6691","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15271","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7478","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4991","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6949","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14051","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1083","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6293","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-2024","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12373","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-5039","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1119","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3372","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7046","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12410","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4732","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3316","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-2695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3559","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6282","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-2124","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-2778","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14015","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1623","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2686","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3207","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-5330","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13119","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-3342","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3070","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-14479","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9402","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8912","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-896","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2861","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-2756","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-7025","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-5010","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4952","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-15092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1563","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-11068","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7288","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9136","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14627","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12194","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-7210","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1438","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13654","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9451","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2946","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2678","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-2610","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3000","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4354","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14483","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3347","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-870","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6566","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13421","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-14242","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12141","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12962","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6749","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-4202","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1192","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-14961","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-15054","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-966","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15168","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-7356","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1719","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1330","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-101","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-6659","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13198","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-4301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1617","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1198","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3034","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4273","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1847","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15683","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12695","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3253","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6600","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4820","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-16237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-15243","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-5173","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3141","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4226","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-475","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2972","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12302","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15466","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3947","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3312","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-1373","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1755","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-8515","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1895","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-94","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2029","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13851","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6976","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3626","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1602","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-12652","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3001","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1638","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-8492","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-11884","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1679","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10694","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1011","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2684","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1179","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-2061","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-14688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3367","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1238","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2958","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1584","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6675","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2622","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3516","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-6388","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10214","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-4806","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12420","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1085","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4801","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7212","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14066","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1174","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9700","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6964","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1337","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1542","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-877","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4028","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4184","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13893","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8452","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1446","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13069","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1236","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1747","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1916","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2984","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6490","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1640","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8130","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-7554","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3237","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5871","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12965","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2765","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6239","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4816","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4834","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2811","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-3583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1221","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3174","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2919","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8469","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4999","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7063","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3118","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11226","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13020","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-4021","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1365","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-8850","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15229","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1080","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13974","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-1029","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6227","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10723","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2705","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-9307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2932","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5925","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5791","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-4182","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6520","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-4036","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5688","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-12970","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4741","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-10295","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-4115","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1607","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3610","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2195","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3824","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10301","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1168","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-6922","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3454","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9189","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4678","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12384","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1644","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1299","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8092","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2927","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1720","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1570","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1802","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13981","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-5156","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2131","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15614","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3362","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14414","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-11407","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3457","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1500","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-8940","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1941","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-3853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-14262","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7270","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9994","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-13185","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-5243","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2713","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1659","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1092","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-15197","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13196","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-9348","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4307","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10225","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1788","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6906","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-14515","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7619","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3355","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1585","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9291","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-6694","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-4568","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3139","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13150","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-9436","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-9866","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1655","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-16347","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4708","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14599","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-2583","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3438","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13364","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-14800","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6494","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-3606","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-14587","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2782","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12393","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13212","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-3442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10340","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11649","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2164","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-14033","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3411","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-2689","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-11579","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-5084","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11664","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3351","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-14476","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4430","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1636","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8222","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2704","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13527","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9074","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1200","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4431","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-9514","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1586","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-8120","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-10505","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-17618","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6410","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4695","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11088","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2923","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-5035","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3644","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6960","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-4539","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7687","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13547","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11545","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1434","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1064","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-16332","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13975","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-5485","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-9333","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14619","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15234","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-5064","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9334","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-5547","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3249","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7350","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12435","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1742","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12027","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-1650","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11416","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4167","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2897","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1807","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14945","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-3050","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3287","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2703","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1442","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1063","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10599","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-12177","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-4859","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13669","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-1549","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11352","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11743","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2969","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"scratchfoundation__scratch-gui-8891","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1557","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2899","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-10478","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3154","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13269","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-12467","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1887","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4260","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11761","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13013","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-3692","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1348","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-895","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15796","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8279","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8477","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13823","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1853","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-6726","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3278","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-4754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10262","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1567","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2649","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2792","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6307","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-1720","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3218","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-6319","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1572","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2785","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-114","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-6902","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3724","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-9151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"eslint__eslint-12472","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2754","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-14672","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12398","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7012","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-9812","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-665","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3918","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12151","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13317","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"grommet__grommet-6246","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3928","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3381","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3345","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11377","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-2680","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-8536","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1206","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1256","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1172","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2750","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"prettier__prettier-4153","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"PrismJS__prism-1573","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-11047","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14100","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11621","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-8296","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-3445","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3203","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-13224","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7908","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15685","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"bpmn-io__bpmn-js-1382","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12442","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-3283","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13648","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"alibaba-fusion__next-2860","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-11613","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-10188","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"quarto-dev__quarto-cli-5292","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-2727","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-13509","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-12067","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-12412","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-2885","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-11738","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"GoogleChrome__lighthouse-2016","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"carbon-design-system__carbon-7768","pass1":0,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"highlightjs__highlight.js-3018","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-14332","pass1":1,"count":1} +{"benchmark_id":"swebench-multimodal","model":"GUIRepair + o4-mini (2025-04-16)","example_id":"openlayers__openlayers-15825","pass1":1,"count":1} diff --git a/terminal-bench-1.0.jsonl b/terminal-bench-1.0.jsonl new file mode 100644 index 0000000..eaaaf21 --- /dev/null +++ b/terminal-bench-1.0.jsonl @@ -0,0 +1,2000 @@ +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"qemu-startup","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"simple-sheets-put","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"path-tracing","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"fix-pandas-version","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"new-encrypt-command","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"extract-safely","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250711_openhands_claude-4-sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"qemu-startup","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"eval-mteb","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"simple-web-scraper","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"processing-pipeline","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"password-recovery","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"hello-world","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"git-workflow-hack","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"new-encrypt-command","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"csv-to-parquet","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"oom","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"extract-safely","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"simple-sheets-put","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250811_cursor-cli_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"eval-mteb","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"qemu-startup","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"simple-web-scraper","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"fix-permissions","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"fix-pandas-version","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"git-workflow-hack","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"simple-sheets-put","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"processing-pipeline","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"crack-7z-hash","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"organization-json-generator","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"hello-world","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"prove-plus-comm","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"csv-to-parquet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"new-encrypt-command","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"extract-safely","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250825_swe-agent-mini_claude-4-sonnet","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"conda-env-conflict-resolution","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"swe-bench-astropy-1","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"blind-maze-explorer-5x5","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"grid-pattern-transform","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"blind-maze-explorer-algorithm","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"cron-broken-network","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"crack-7z-hash.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"oom","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"tmux-advanced-workflow","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"jupyter-notebook-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"extract-safely","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"build-tcc-qemu","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"new-encrypt-command","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"security-vulhub-minio","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"get-bitcoin-nodes","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-opus","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"download-youtube","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"new-encrypt-command","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"git-multibranch","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"create-bucket","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"extract-safely","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"password-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250829_goose_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"csv-to-parquet","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"extract-safely","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"eval-mteb","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"simple-web-scraper","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"fibonacci-server","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"path-tracing","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"prove-plus-comm","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"oom","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"processing-pipeline","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"organization-json-generator","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"grid-pattern-transform","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"organization-json-generator","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"swe-bench-langcodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"modernize-fortran-build","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"crack-7z-hash","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"processing-pipeline","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"prove-plus-comm","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"simple-web-scraper","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"new-encrypt-command","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"cartpole-rl-training","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"pytorch-model-cli.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"swe-bench-fsspec","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"crack-7z-hash.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"openssl-selfsigned-cert","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"eval-mteb.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"extract-safely","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"csv-to-parquet","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"heterogeneous-dates","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"conda-env-conflict-resolution","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"configure-git-webserver","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"eval-mteb","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"tmux-advanced-workflow","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"download-youtube","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"jupyter-notebook-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"blind-maze-explorer-algorithm","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"git-workflow-hack","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"sqlite-with-gcov","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"get-bitcoin-nodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"simple-sheets-put","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"crack-7z-hash.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"oom","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250902_orchestrator_qwen-3-coder-480B","example_id":"qemu-startup","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"qemu-startup","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"configure-git-webserver","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"fix-git","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"create-bucket","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"solana-data","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"organization-json-generator","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"password-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"path-tracing","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"oom","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"vim-terminal-task","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250911_chaterm_claude-4-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"blind-maze-explorer-algorithm","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"incompatible-python-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"cron-broken-network","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"security-vulhub-minio","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"oom","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"password-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"simple-sheets-put","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"sanitize-git-repo","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"solana-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"fix-git","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"jupyter-notebook-server","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"sanitize-git-repo.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"build-tcc-qemu","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"new-encrypt-command","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"tmux-advanced-workflow","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"get-bitcoin-nodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"git-workflow-hack","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"processing-pipeline","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"configure-git-webserver","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"sqlite-db-truncate","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-1-opus","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"eval-mteb","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"oom","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"solana-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"extract-safely","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250923_droid_claude-4-sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"swe-bench-fsspec","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"pytorch-model-cli.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"pytorch-model-cli.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"hello-world","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"openssl-selfsigned-cert","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"password-recovery","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"oom","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"simple-web-scraper","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"hf-model-inference","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"modernize-fortran-build","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"sqlite-db-truncate","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"solana-data","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"create-bucket","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"organization-json-generator","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"crack-7z-hash.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"jupyter-notebook-server","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"git-workflow-hack","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"build-tcc-qemu","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"simple-sheets-put","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"cartpole-rl-training","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"configure-git-webserver","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"heterogeneous-dates","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250924_droid_gpt-5","example_id":"vim-terminal-task","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"download-youtube","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"hf-model-inference","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"nginx-request-logging","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"cartpole-rl-training","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"chess-best-move","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"oom","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"crack-7z-hash.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"solana-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"build-tcc-qemu","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"tmux-advanced-workflow","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"configure-git-webserver","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"processing-pipeline","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"fibonacci-server","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20250926_ante_claude-4-sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"solana-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"password-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251001_droid_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"conda-env-conflict-resolution","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"eval-mteb.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"swe-bench-fsspec","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"blind-maze-explorer-algorithm","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"grid-pattern-transform","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"qemu-startup","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"download-youtube","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"pytorch-model-cli.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"pytorch-model-cli","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"organization-json-generator","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"configure-git-webserver","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"processing-pipeline","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"fibonacci-server","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"get-bitcoin-nodes","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"crack-7z-hash.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"jupyter-notebook-server","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"decommissioning-service-with-sensitive-data","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"sqlite-with-gcov","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"new-encrypt-command","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"cartpole-rl-training","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"solana-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"heterogeneous-dates","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251007_camel-agent_gpt-4-1","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"create-bucket","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"oom","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"solana-data","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"password-recovery","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251010_Chaterm_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"solana-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"create-bucket","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"path-tracing","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"extract-safely","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251012_alpha_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"swe-bench-astropy-2","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"incompatible-python-fasttext","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"cron-broken-network","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"password-recovery","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"get-bitcoin-nodes","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"security-vulhub-minio","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"configure-git-webserver","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"fix-git","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"solana-data","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"oom","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"nginx-request-logging","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"chess-best-move","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251013_ante_claude_4-5_sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"create-bucket","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"solana-data","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"password-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"oom","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_Chaterm_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"conda-env-conflict-resolution","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"swe-bench-fsspec","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"blind-maze-explorer-algorithm","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"blind-maze-explorer-5x5","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"grid-pattern-transform","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"download-youtube","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"git-multibranch","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"pytorch-model-cli","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"sqlite-db-truncate","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"fix-git","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"git-workflow-hack","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"organization-json-generator","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"cartpole-rl-training","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"solana-data","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"path-tracing","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"polyglot-rust-c","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"prove-plus-comm","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"security-vulhub-minio","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"processing-pipeline","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"heterogeneous-dates","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"swe-bench-langcodes","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"vim-terminal-task","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251016_apex_agent_gpt-5","example_id":"jupyter-notebook-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"solana-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"oom","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"password-recovery","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251017_deepagent-desktop_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"swe-bench-fsspec","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"blind-maze-explorer-5x5","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"incompatible-python-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"count-dataset-tokens","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"qemu-startup","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"crack-7z-hash.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"sanitize-git-repo.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"solana-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"hf-model-inference","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"cartpole-rl-training","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"oom","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"decommissioning-service-with-sensitive-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"security-vulhub-minio","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"jupyter-notebook-server","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"configure-git-webserver","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"sanitize-git-repo","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"sqlite-db-truncate","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251019_apex_agent_claude-4-5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"eval-mteb","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"blind-maze-explorer-5x5","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"swe-bench-astropy-1","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"blind-maze-explorer-algorithm","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"incompatible-python-fasttext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"incompatible-python-fasttext.base_with_hint","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"grid-pattern-transform","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"qemu-startup","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"decommissioning-service-with-sensitive-data","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"crack-7z-hash","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"fibonacci-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"sanitize-git-repo","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"jupyter-notebook-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"crack-7z-hash.easy","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"heterogeneous-dates","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"new-encrypt-command","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"cartpole-rl-training","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"oom","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"solana-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"modernize-fortran-build","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"path-tracing-reverse","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"openssl-selfsigned-cert","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"processing-pipeline","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"get-bitcoin-nodes","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"simple-web-scraper","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"configure-git-webserver","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"sanitize-git-repo.hard","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251026_iflow-cli_Qwen3-Coder-480A30","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"swe-bench-fsspec","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"swe-bench-astropy-2","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"swe-bench-astropy-1","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"count-dataset-tokens","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"qemu-startup","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"pytorch-model-cli.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"pytorch-model-cli.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"decommissioning-service-with-sensitive-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"heterogeneous-dates","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"pytorch-model-cli","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"security-vulhub-minio","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"tmux-advanced-workflow","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"get-bitcoin-nodes","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"oom","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"hello-world","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"build-tcc-qemu","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"solana-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"sqlite-db-truncate","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"password-recovery","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"sanitize-git-repo","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"hf-model-inference","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"polyglot-rust-c","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"path-tracing","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"sanitize-git-repo.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"vim-terminal-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"cartpole-rl-training","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251108_abacusai-desktop_multiple","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"eval-mteb.hard","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"super-benchmark-upet","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"swe-bench-fsspec","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"swe-bench-astropy-2","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"swe-bench-astropy-1","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"blind-maze-explorer-5x5","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"blind-maze-explorer-algorithm.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"incompatible-python-fasttext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"grid-pattern-transform","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"blind-maze-explorer-algorithm","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"blind-maze-explorer-algorithm.easy","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"qemu-startup","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"fix-git","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"pytorch-model-cli","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"pytorch-model-cli.hard","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"password-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"decommissioning-service-with-sensitive-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"sanitize-git-repo.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"openssl-selfsigned-cert","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"swe-bench-langcodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"simple-sheets-put","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"hello-world","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"cartpole-rl-training","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"csv-to-parquet","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"solana-data","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"processing-pipeline","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"create-bucket","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"build-tcc-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"oom","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"sanitize-git-repo","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"fibonacci-server","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"jupyter-notebook-server","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"heterogeneous-dates","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"modernize-fortran-build","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"sqlite-with-gcov","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"get-bitcoin-nodes","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"configure-git-webserver","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"path-tracing","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"tmux-advanced-workflow","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"extract-safely","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"20251111_iflow-cli_Minimax-M2","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"fix-permissions","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"grid-pattern-transform","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"play-zork","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"intrusion-detection","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"blind-maze-explorer-5x5","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"download-youtube","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"run-pdp11-code","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"swe-bench-astropy-1","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"incompatible-python-fasttext.base_with_hint","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"blind-maze-explorer-algorithm.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"blind-maze-explorer-algorithm.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"incompatible-python-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"pytorch-model-cli.hard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"csv-to-parquet","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"password-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"blind-maze-explorer-algorithm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"count-dataset-tokens","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"hello-world","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"swe-bench-fsspec","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"security-vulhub-minio","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"modernize-fortran-build","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"git-multibranch","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"conda-env-conflict-resolution","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"pytorch-model-cli.easy","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"eval-mteb","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"cron-broken-network","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"eval-mteb.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"get-bitcoin-nodes","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"nginx-request-logging","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"solana-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"new-encrypt-command","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"organization-json-generator","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"heterogeneous-dates","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"git-workflow-hack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"qemu-startup","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"create-bucket","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"build-initramfs-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"build-tcc-qemu","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"crack-7z-hash.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"crack-7z-hash.easy","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"sanitize-git-repo","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"processing-pipeline","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"prove-plus-comm","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"swe-bench-astropy-2","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"build-linux-kernel-qemu","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"raman-fitting.easy","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"path-tracing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"fix-pandas-version","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"decommissioning-service-with-sensitive-data","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"simple-web-scraper","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"extract-safely","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"simple-sheets-put","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"swe-bench-langcodes","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"sanitize-git-repo.hard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"sqlite-db-truncate","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"fibonacci-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"oom","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"vim-terminal-task","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"cartpole-rl-training","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"jupyter-notebook-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"tmux-advanced-workflow","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-1.0","model":"ob1-09-10-25","example_id":"super-benchmark-upet","pass1":0.0,"count":5} diff --git a/terminal-bench-2.0.jsonl b/terminal-bench-2.0.jsonl new file mode 100644 index 0000000..485c3f8 --- /dev/null +++ b/terminal-bench-2.0.jsonl @@ -0,0 +1,623 @@ +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"video-processing","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"circuit-fibsqrt","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"custom-memory-heap-crash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"distribution-search","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"torch-tensor-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"chess-best-move","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"build-pov-ray","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"largest-eigenval","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"polyglot-rust-c","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"constraints-scheduling","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"sparql-university","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"sanitize-git-repo","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"qemu-startup","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"path-tracing","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"mcmc-sampling-stan","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"configure-git-webserver","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"merge-diff-arc-agi-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"pytorch-model-cli","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"hf-model-inference","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"large-scale-text-editing","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"dna-assembly","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"portfolio-optimization","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"filter-js-from-html","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"feal-linear-cryptanalysis","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"winning-avg-corewars","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"crack-7z-hash","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"dna-insert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"cobol-modernization","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"write-compressor","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"make-doom-for-mips","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"overfull-hbox","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"polyglot-c-py","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"mailman","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"git-multibranch","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"query-optimize","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"mteb-retrieve","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"code-from-image","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"headless-terminal","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"count-dataset-tokens","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"password-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"sam-cell-seg","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"db-wal-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"caffe-cifar-10","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"build-cython-ext","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"nginx-request-logging","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"bn-fit-modify","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"regex-chess","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"schemelike-metacircular-eval","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"feal-differential-cryptanalysis","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"kv-store-grpc","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"reshard-c4-data","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"mteb-leaderboard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"build-pmars","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"fix-code-vulnerability","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"modernize-scientific-stack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"protein-assembly","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"pytorch-model-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"multi-source-data-merger","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"sqlite-db-truncate","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"model-extraction-relu-logits","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"make-mips-interpreter","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"git-leak-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"cancel-async-tasks","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"qemu-alpine-ssh","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"llm-inference-batching-scheduler","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"tune-mjcf","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"log-summary-date-ranges","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"break-filter-js-from-html","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"extract-elf","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"financial-document-processor","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"regex-log","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"adaptive-rejection-sampler","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"vulnerable-secret","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"gcode-to-text","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"rstan-to-pystan","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"sqlite-with-gcov","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"extract-moves-from-video","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"install-windows-3.11","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"compile-compcert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"fix-ocaml-gc","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Ante__Gemini-3-Pro-Preview","example_id":"pypi-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"fix-code-vulnerability","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"sqlite-with-gcov","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"filter-js-from-html","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"git-multibranch","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"financial-document-processor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"train-fasttext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"git-leak-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"merge-diff-arc-agi-task","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"largest-eigenval","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"constraints-scheduling","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"build-pov-ray","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"db-wal-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"count-dataset-tokens","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"sparql-university","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"mailman","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"mteb-leaderboard","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"raman-fitting","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"configure-git-webserver","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"sam-cell-seg","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"dna-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"feal-linear-cryptanalysis","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"mcmc-sampling-stan","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"video-processing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"multi-source-data-merger","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"break-filter-js-from-html","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"kv-store-grpc","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"password-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"regex-chess","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"bn-fit-modify","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"build-pmars","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"make-doom-for-mips","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"fix-ocaml-gc","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"install-windows-3.11","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"torch-tensor-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"qemu-startup","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"build-cython-ext","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"compile-compcert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"llm-inference-batching-scheduler","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"cobol-modernization","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"mteb-retrieve","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"cancel-async-tasks","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"adaptive-rejection-sampler","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"model-extraction-relu-logits","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"prove-plus-comm","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"vulnerable-secret","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"distribution-search","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"overfull-hbox","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"extract-elf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"large-scale-text-editing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"gpt2-codegolf","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"caffe-cifar-10","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"protein-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"dna-insert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"pytorch-model-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"tune-mjcf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"regex-log","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"feal-differential-cryptanalysis","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"modernize-scientific-stack","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"nginx-request-logging","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"winning-avg-corewars","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"schemelike-metacircular-eval","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"query-optimize","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"gcode-to-text","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"headless-terminal","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"custom-memory-heap-crash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"log-summary-date-ranges","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"circuit-fibsqrt","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"code-from-image","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"rstan-to-pystan","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"pypi-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"make-mips-interpreter","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"MAYA__Claude-4.5-sonnet","example_id":"portfolio-optimization","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"distribution-search","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"pypi-server","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"regex-chess","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"path-tracing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"install-windows-3.11","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"write-compressor","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"reshard-c4-data","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"portfolio-optimization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"train-fasttext","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"build-pov-ray","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"password-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"mcmc-sampling-stan","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"sqlite-db-truncate","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"count-dataset-tokens","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"build-cython-ext","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"sqlite-with-gcov","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"mailman","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"crack-7z-hash","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"qemu-alpine-ssh","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"llm-inference-batching-scheduler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"schemelike-metacircular-eval","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"extract-elf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"sanitize-git-repo","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"build-pmars","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"code-from-image","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"chess-best-move","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"path-tracing-reverse","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"gpt2-codegolf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"mteb-leaderboard","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"cancel-async-tasks","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"torch-tensor-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"large-scale-text-editing","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"bn-fit-modify","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"sam-cell-seg","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"largest-eigenval","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"fix-code-vulnerability","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"db-wal-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"filter-js-from-html","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"hf-model-inference","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"video-processing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"dna-assembly","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"log-summary-date-ranges","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"make-doom-for-mips","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"winning-avg-corewars","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"merge-diff-arc-agi-task","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"caffe-cifar-10","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"feal-differential-cryptanalysis","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"pytorch-model-cli","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"feal-linear-cryptanalysis","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"configure-git-webserver","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"qemu-startup","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"fix-ocaml-gc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"constraints-scheduling","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"overfull-hbox","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"tune-mjcf","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"sparql-university","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"fix-git","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"headless-terminal","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"kv-store-grpc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"custom-memory-heap-crash","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"break-filter-js-from-html","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"model-extraction-relu-logits","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"adaptive-rejection-sampler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"extract-moves-from-video","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"git-leak-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"regex-log","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"multi-source-data-merger","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"gcode-to-text","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"modernize-scientific-stack","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"vulnerable-secret","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"dna-insert","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"query-optimize","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"cobol-modernization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"raman-fitting","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"nginx-request-logging","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"financial-document-processor","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"polyglot-rust-c","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"circuit-fibsqrt","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"pytorch-model-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"git-multibranch","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"prove-plus-comm","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"rstan-to-pystan","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"make-mips-interpreter","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"compile-compcert","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"protein-assembly","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"polyglot-c-py","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__Claude-Opus-4.5","example_id":"mteb-retrieve","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"rstan-to-pystan","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"fix-code-vulnerability","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"llm-inference-batching-scheduler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"sqlite-with-gcov","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"sqlite-db-truncate","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"compile-compcert","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"sanitize-git-repo","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"configure-git-webserver","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"install-windows-3.11","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"polyglot-rust-c","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"git-multibranch","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"count-dataset-tokens","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"mailman","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"query-optimize","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"circuit-fibsqrt","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"protein-assembly","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"dna-assembly","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"merge-diff-arc-agi-task","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"feal-linear-cryptanalysis","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"tune-mjcf","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"mteb-retrieve","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"vulnerable-secret","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"qemu-startup","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"fix-ocaml-gc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"db-wal-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"overfull-hbox","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"extract-elf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"openssl-selfsigned-cert","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"multi-source-data-merger","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"polyglot-c-py","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"break-filter-js-from-html","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"chess-best-move","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"path-tracing-reverse","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"fix-git","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"make-doom-for-mips","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"custom-memory-heap-crash","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"video-processing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"extract-moves-from-video","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"modernize-scientific-stack","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"torch-tensor-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"dna-insert","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"prove-plus-comm","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"build-pov-ray","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"feal-differential-cryptanalysis","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"password-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"pytorch-model-recovery","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"mcmc-sampling-stan","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"model-extraction-relu-logits","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"pypi-server","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"cobol-modernization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"portfolio-optimization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"cancel-async-tasks","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"build-pmars","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"log-summary-date-ranges","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"make-mips-interpreter","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"kv-store-grpc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"pytorch-model-cli","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"gpt2-codegolf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"winning-avg-corewars","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"bn-fit-modify","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"raman-fitting","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"sparql-university","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"caffe-cifar-10","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"git-leak-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"mteb-leaderboard","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"sam-cell-seg","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"distribution-search","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"hf-model-inference","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"gcode-to-text","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"filter-js-from-html","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"schemelike-metacircular-eval","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"qemu-alpine-ssh","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"write-compressor","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"crack-7z-hash","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"path-tracing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"reshard-c4-data","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"largest-eigenval","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"train-fasttext","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"code-from-image","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"build-cython-ext","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"regex-chess","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"headless-terminal","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"large-scale-text-editing","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"adaptive-rejection-sampler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"financial-document-processor","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"constraints-scheduling","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"regex-log","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Mux__GPT-5.2","example_id":"nginx-request-logging","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"modernize-scientific-stack","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"regex-log","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"hf-model-inference","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"mteb-retrieve","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"cancel-async-tasks","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"mcmc-sampling-stan","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"tune-mjcf","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"gpt2-codegolf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"make-mips-interpreter","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"llm-inference-batching-scheduler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"protein-assembly","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"largest-eigenval","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"make-doom-for-mips","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"feal-linear-cryptanalysis","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"reshard-c4-data","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"financial-document-processor","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"qemu-alpine-ssh","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"fix-code-vulnerability","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"vulnerable-secret","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"path-tracing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"portfolio-optimization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"feal-differential-cryptanalysis","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"pypi-server","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"password-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"cobol-modernization","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"chess-best-move","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"distribution-search","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"path-tracing-reverse","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"query-optimize","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"polyglot-rust-c","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"caffe-cifar-10","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"video-processing","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"pytorch-model-cli","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"pytorch-model-recovery","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"model-extraction-relu-logits","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"db-wal-recovery","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"multi-source-data-merger","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"schemelike-metacircular-eval","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"large-scale-text-editing","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"write-compressor","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"mailman","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"constraints-scheduling","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"break-filter-js-from-html","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"sqlite-db-truncate","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"log-summary-date-ranges","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"configure-git-webserver","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"custom-memory-heap-crash","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"build-pov-ray","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"fix-git","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"sqlite-with-gcov","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"regex-chess","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"bn-fit-modify","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"polyglot-c-py","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"git-multibranch","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"raman-fitting","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"adaptive-rejection-sampler","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"crack-7z-hash","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"dna-assembly","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"install-windows-3.11","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"headless-terminal","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"dna-insert","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"build-pmars","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"count-dataset-tokens","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"gcode-to-text","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"merge-diff-arc-agi-task","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"circuit-fibsqrt","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"sam-cell-seg","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"torch-tensor-parallelism","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"filter-js-from-html","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"git-leak-recovery","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"build-cython-ext","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"kv-store-grpc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"prove-plus-comm","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"code-from-image","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"compile-compcert","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"sparql-university","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"rstan-to-pystan","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"sanitize-git-repo","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"train-fasttext","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"nginx-request-logging","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"mteb-leaderboard","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"qemu-startup","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"overfull-hbox","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"extract-moves-from-video","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"fix-ocaml-gc","pass1":1.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"winning-avg-corewars","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"OpenCode__Claude-Opus-4.5","example_id":"extract-elf","pass1":0.0,"count":1} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"vulnerable-secret","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"sam-cell-seg","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"build-cython-ext","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"cobol-modernization","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"qemu-alpine-ssh","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"sparql-university","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"video-processing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"kv-store-grpc","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"count-dataset-tokens","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"password-recovery","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"pytorch-model-cli","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"mteb-retrieve","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"distribution-search","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"filter-js-from-html","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"break-filter-js-from-html","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"extract-elf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"compile-compcert","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"build-pmars","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"dna-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"schemelike-metacircular-eval","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"multi-source-data-merger","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"bn-fit-modify","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"adaptive-rejection-sampler","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"portfolio-optimization","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"caffe-cifar-10","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"fix-git","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"modernize-scientific-stack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"circuit-fibsqrt","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"dna-insert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"qemu-startup","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"reshard-c4-data","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"protein-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"tune-mjcf","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"build-pov-ray","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"financial-document-processor","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"sanitize-git-repo","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"db-wal-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"large-scale-text-editing","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"regex-chess","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"crack-7z-hash","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"make-mips-interpreter","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"git-multibranch","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"mailman","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"overfull-hbox","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"headless-terminal","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"fix-ocaml-gc","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"fix-code-vulnerability","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"feal-differential-cryptanalysis","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"regex-log","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"torch-tensor-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"openssl-selfsigned-cert","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"constraints-scheduling","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"pytorch-model-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"sqlite-with-gcov","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"gcode-to-text","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"largest-eigenval","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"pypi-server","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"train-fasttext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"make-doom-for-mips","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"custom-memory-heap-crash","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"mcmc-sampling-stan","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"cancel-async-tasks","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"rstan-to-pystan","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"nginx-request-logging","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"code-from-image","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"llm-inference-batching-scheduler","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"winning-avg-corewars","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"install-windows-3.11","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"log-summary-date-ranges","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"merge-diff-arc-agi-task","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"git-leak-recovery","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"feal-linear-cryptanalysis","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"query-optimize","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"model-extraction-relu-logits","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"mteb-leaderboard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"hf-model-inference","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"Terminus2__GLM-4.7","example_id":"configure-git-webserver","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"financial-document-processor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"polyglot-rust-c","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"install-windows-3.11","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"tune-mjcf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"gpt2-codegolf","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"caffe-cifar-10","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"constraints-scheduling","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"raman-fitting","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"db-wal-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"torch-pipeline-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"make-mips-interpreter","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"log-summary-date-ranges","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"build-pmars","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"sanitize-git-repo","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"fix-git","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"nginx-request-logging","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"kv-store-grpc","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"sparql-university","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"configure-git-webserver","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"custom-memory-heap-crash","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"sam-cell-seg","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"train-fasttext","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"regex-chess","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"code-from-image","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"openssl-selfsigned-cert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"path-tracing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"sqlite-with-gcov","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"extract-moves-from-video","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"adaptive-rejection-sampler","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"gcode-to-text","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"make-doom-for-mips","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"pypi-server","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"path-tracing-reverse","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"bn-fit-modify","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"circuit-fibsqrt","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"count-dataset-tokens","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"build-cython-ext","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"polyglot-c-py","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"dna-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"pytorch-model-cli","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"extract-elf","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"video-processing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"rstan-to-pystan","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"cobol-modernization","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"winning-avg-corewars","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"git-multibranch","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"mcmc-sampling-stan","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"distribution-search","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"git-leak-recovery","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"headless-terminal","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"reshard-c4-data","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"mteb-retrieve","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"qemu-startup","pass1":0.4,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"password-recovery","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"fix-ocaml-gc","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"regex-log","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"largest-eigenval","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"qemu-alpine-ssh","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"mailman","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"fix-code-vulnerability","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"torch-tensor-parallelism","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"model-extraction-relu-logits","pass1":0.6,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"merge-diff-arc-agi-task","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"vulnerable-secret","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"multi-source-data-merger","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"portfolio-optimization","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"schemelike-metacircular-eval","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"overfull-hbox","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"protein-assembly","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"build-pov-ray","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"chess-best-move","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"feal-differential-cryptanalysis","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"mteb-leaderboard","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"cancel-async-tasks","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"dna-insert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"feal-linear-cryptanalysis","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"compile-compcert","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"sqlite-db-truncate","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"hf-model-inference","pass1":0.8,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"break-filter-js-from-html","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"filter-js-from-html","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"modernize-scientific-stack","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"write-compressor","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"prove-plus-comm","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"llm-inference-batching-scheduler","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"large-scale-text-editing","pass1":0.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"pytorch-model-recovery","pass1":0.2,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"crack-7z-hash","pass1":1.0,"count":5} +{"benchmark_id":"terminal-bench-2.0","model":"dakou__qwen3-coder-480b","example_id":"query-optimize","pass1":0.2,"count":5}