From 6b3c3d26270a6b1c72f73abacb943322c29160ab Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 16 Feb 2026 14:06:26 +0000
Subject: [PATCH 1/4] Escaping nodeids in query contexts to allow for
 parameterised tests and other special characters

---
 src/pytest_flakefighters/plugin.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pytest_flakefighters/plugin.py b/src/pytest_flakefighters/plugin.py
index f671bb6..410d728 100644
--- a/src/pytest_flakefighters/plugin.py
+++ b/src/pytest_flakefighters/plugin.py
@@ -4,6 +4,7 @@
 
 from datetime import datetime
 from enum import Enum
+from re import escape
 from typing import Union
 from xml.etree import ElementTree as ET
 
@@ -99,7 +100,7 @@ def pytest_runtest_call(self, item: pytest.Item):
         item.start = datetime.now().timestamp()
         self.cov.start()
         # Lines cannot appear as covered on our tests because the coverage measurement is leaking into the self.cov
-        self.cov.switch_context(item.nodeid)  # pragma: no cover
+        self.cov.switch_context(escape(item.nodeid))  # pragma: no cover
         yield  # pragma: no cover
         self.cov.stop()  # pragma: no cover
         item.stop = datetime.now().timestamp()
@@ -166,7 +167,7 @@ def pytest_runtest_protocol(self, item: pytest.Item, nextitem: pytest.Item) -> b
                     skipped = True
                 if report.when == "call":
                     line_coverage = self.cov.get_data()
-                    line_coverage.set_query_contexts(["collection", item.nodeid])
+                    line_coverage.set_query_contexts(["collection", escape(item.nodeid)])
                     captured_output = dict(report.sections)
                     test_execution = TestExecution(  # pylint: disable=E1123
                         outcome=report.outcome,

From eac8cfd2c4b1225aa94c3a8afc95305a581ca5d2 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 16 Feb 2026 14:31:07 +0000
Subject: [PATCH 2/4] Added test

---
 tests/conftest.py                  | 20 ++++++++++++++++++++
 tests/resources/test_gatorgrade.py | 28 ++++++++++++++++++++++++++++
 tests/test_end_2_end.py            |  8 ++++++++
 3 files changed, 56 insertions(+)
 create mode 100644 tests/resources/test_gatorgrade.py

diff --git a/tests/conftest.py b/tests/conftest.py
index f080fb4..aff8397 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -31,6 +31,26 @@ def fixture_flaky_triangle_repo(tmpdir_factory):
     return repo
 
 
+@pytest.fixture(scope="function", name="gatorgrade_repo")
+def fixture_gatorgrade_repo(tmpdir_factory):
+    """
+    Fixture for a repo containing the gatorgrade test that broke the plugin.
+    """
+    repo_root = tmpdir_factory.mktemp("gatorgrade_repo")
+    repo = git.Repo.init(repo_root, initial_branch="main")
+
+    shutil.copy(
+        os.path.join(CURRENT_DIR, "resources", "test_gatorgrade.py"), os.path.join(repo_root, "test_gatorgrade.py")
+    )
+    repo.index.add(["test_gatorgrade.py"])
+    repo.index.commit("Initial commit of test file.")
+    os.chdir(repo_root)
+    os.mkdir("test_assignment")
+    with open(os.path.join("test_assignment", "result.txt"), "w") as f:
+        f.write("✓  Complete all TODOs\n✓  Use an if statement\n✓  Complete all TODOs\nPassed 3/3 (100%) of checks")
+    return repo
+
+
 @pytest.fixture(scope="function", name="deflaker_repo")
 def fixture_deflaker_repo(tmpdir_factory):
     """
diff --git a/tests/resources/test_gatorgrade.py b/tests/resources/test_gatorgrade.py
new file mode 100644
index 0000000..6f0277c
--- /dev/null
+++ b/tests/resources/test_gatorgrade.py
@@ -0,0 +1,28 @@
+import os
+
+import pytest
+
+
+@pytest.mark.parametrize(
+    "assignment_path,expected_output_and_freqs",
+    [
+        (
+            "test_assignment",
+            [
+                ("Complete all TODOs", 2),
+                ("Use an if statement", 1),
+                ("✓", 3),
+                ("✕", 0),
+                ("Passed 3/3 (100%) of checks", 1),
+            ],
+        )
+    ],
+)
+def test_full_integration_creates_valid_output(assignment_path, expected_output_and_freqs):
+    """Simplified version of
+    https://github.com/GatorEducator/gatorgrade/blob/91cb86d5383675c5bc3c95363bc29b45108b2e29/tests/test_main.py#L70
+    which initially broke the plugin due to the test IDs contaning [] characters from the parameterisation."""
+    with open(os.path.join(assignment_path, "result.txt")) as f:
+        result = f.read()
+    for output, freq in expected_output_and_freqs:
+        assert result.count(output) == freq
diff --git a/tests/test_end_2_end.py b/tests/test_end_2_end.py
index 17873ba..0ae118a 100644
--- a/tests/test_end_2_end.py
+++ b/tests/test_end_2_end.py
@@ -317,3 +317,11 @@ def test_display_test_level_verdicts(pytester, deflaker_repo):
     result.assert_outcomes(failed=1)
     result.stdout.fnmatch_lines(["FAILED app.py::test_app - assert False"])
     result.stdout.fnmatch_lines(["  CoverageIndependence: genuine"])
+
+
+def test_gatorgrade(pytester, gatorgrade_repo):
+    """
+    Test that flakefighters can run OK on parameterised tests.
+    """
+    result = pytester.runpytest(os.path.join(gatorgrade_repo.working_dir, "test_gatorgrade.py"))
+    result.assert_outcomes(passed=1)

From e960ec132b4a4aad8fe57cbc6bba79ac1f4ece30 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 16 Feb 2026 14:43:21 +0000
Subject: [PATCH 3/4] Renamed test

---
 tests/conftest.py                                     | 6 ++----
 tests/resources/{test_gatorgrade.py => gatorgrade.py} | 0
 tests/test_end_2_end.py                               | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)
 rename tests/resources/{test_gatorgrade.py => gatorgrade.py} (100%)

diff --git a/tests/conftest.py b/tests/conftest.py
index aff8397..0c2891f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,10 +39,8 @@ def fixture_gatorgrade_repo(tmpdir_factory):
     repo_root = tmpdir_factory.mktemp("gatorgrade_repo")
     repo = git.Repo.init(repo_root, initial_branch="main")
 
-    shutil.copy(
-        os.path.join(CURRENT_DIR, "resources", "test_gatorgrade.py"), os.path.join(repo_root, "test_gatorgrade.py")
-    )
-    repo.index.add(["test_gatorgrade.py"])
+    shutil.copy(os.path.join(CURRENT_DIR, "resources", "gatorgrade.py"), os.path.join(repo_root, "gatorgrade.py"))
+    repo.index.add(["gatorgrade.py"])
     repo.index.commit("Initial commit of test file.")
     os.chdir(repo_root)
     os.mkdir("test_assignment")
diff --git a/tests/resources/test_gatorgrade.py b/tests/resources/gatorgrade.py
similarity index 100%
rename from tests/resources/test_gatorgrade.py
rename to tests/resources/gatorgrade.py
diff --git a/tests/test_end_2_end.py b/tests/test_end_2_end.py
index 0ae118a..9dd7574 100644
--- a/tests/test_end_2_end.py
+++ b/tests/test_end_2_end.py
@@ -319,9 +319,9 @@ def test_display_test_level_verdicts(pytester, deflaker_repo):
     result.stdout.fnmatch_lines(["  CoverageIndependence: genuine"])
 
 
-def test_gatorgrade(pytester, gatorgrade_repo):
+def test_gatorgrade_parameterised(pytester, gatorgrade_repo):
     """
     Test that flakefighters can run OK on parameterised tests.
     """
-    result = pytester.runpytest(os.path.join(gatorgrade_repo.working_dir, "test_gatorgrade.py"))
+    result = pytester.runpytest(os.path.join(gatorgrade_repo.working_dir, "gatorgrade.py"))
     result.assert_outcomes(passed=1)

From 9592b6167a1716b135df29deb302287d5f32b6d6 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Mon, 16 Feb 2026 14:48:36 +0000
Subject: [PATCH 4/4] Added encoding for Windows tests

---
 tests/conftest.py             | 2 +-
 tests/resources/gatorgrade.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 0c2891f..c0bbf0c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,7 +44,7 @@ def fixture_gatorgrade_repo(tmpdir_factory):
     repo.index.commit("Initial commit of test file.")
     os.chdir(repo_root)
     os.mkdir("test_assignment")
-    with open(os.path.join("test_assignment", "result.txt"), "w") as f:
+    with open(os.path.join("test_assignment", "result.txt"), "w", encoding="utf8") as f:
         f.write("✓  Complete all TODOs\n✓  Use an if statement\n✓  Complete all TODOs\nPassed 3/3 (100%) of checks")
     return repo
 
diff --git a/tests/resources/gatorgrade.py b/tests/resources/gatorgrade.py
index 6f0277c..e8101ad 100644
--- a/tests/resources/gatorgrade.py
+++ b/tests/resources/gatorgrade.py
@@ -22,7 +22,7 @@ def test_full_integration_creates_valid_output(assignment_path, expected_output_
     """Simplified version of
     https://github.com/GatorEducator/gatorgrade/blob/91cb86d5383675c5bc3c95363bc29b45108b2e29/tests/test_main.py#L70
     which initially broke the plugin due to the test IDs contaning [] characters from the parameterisation."""
-    with open(os.path.join(assignment_path, "result.txt")) as f:
+    with open(os.path.join(assignment_path, "result.txt"), encoding="utf8") as f:
         result = f.read()
     for output, freq in expected_output_and_freqs:
         assert result.count(output) == freq