From 404c63b4e6fe7eea24ccac3fe3873acef57668d0 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Tue, 10 Jun 2025 23:48:34 +0100
Subject: [PATCH 01/11] Test on windows

---
 .github/workflows/test.yaml           |  37 ++++
 src/python_minifier/__init__.pyi      |   6 +-
 src/python_minifier/__main__.py       |  31 +++-
 src/python_minifier/module_printer.py |   8 +-
 src/python_minifier/token_printer.py  |  10 +-
 test/test_utf8_encoding.py            | 114 ++++++++++++
 test/test_windows_encoding.py         | 246 ++++++++++++++++++++++++++
 tox-windows.ini                       |  81 +++++++++
 8 files changed, 522 insertions(+), 11 deletions(-)
 create mode 100644 test/test_utf8_encoding.py
 create mode 100644 test/test_windows_encoding.py
 create mode 100644 tox-windows.ini

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 616015eb..73437a47 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -30,3 +30,40 @@ jobs:
           image: danielflook/python-minifier-build:${{ matrix.python }}-2024-09-15
           run: |
             tox -r -e $(echo "${{ matrix.python }}" | tr -d .)
+
+  test-windows:
+    name: Test Windows
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4.2.2
+        with:
+          fetch-depth: 1
+          show-progress: false
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Set version statically
+        shell: powershell
+        run: |
+          $content = Get-Content setup.py
+          $content = $content -replace "setup_requires=.*", "version='0.0.0',"
+          $content = $content -replace "use_scm_version=.*", ""
+          Set-Content setup.py $content
+
+      - name: Install tox
+        run: |
+          python -m pip install --upgrade pip
+          pip install tox
+
+      - name: Run tests
+        run: |
+          tox -c tox-windows.ini -r -e ${{ matrix.python-version }}
diff --git a/src/python_minifier/__init__.pyi b/src/python_minifier/__init__.pyi
index 1a371bd9..a87722f0 100644
--- a/src/python_minifier/__init__.pyi
+++ b/src/python_minifier/__init__.pyi
@@ -1,6 +1,6 @@
 import ast
 
-from typing import Any, AnyStr, List, Optional, Text, Union
+from typing import Any, List, Optional, Text, Union
 
 from .transforms.remove_annotations_options import RemoveAnnotationsOptions as RemoveAnnotationsOptions
 
@@ -10,7 +10,7 @@ class UnstableMinification(RuntimeError):
 
 
 def minify(
-    source: AnyStr,
+    source: Union[str, bytes],
     filename: Optional[str] = ...,
     remove_annotations: Union[bool, RemoveAnnotationsOptions] = ...,
     remove_pass: bool = ...,
@@ -36,7 +36,7 @@ def unparse(module: ast.Module) -> Text: ...
 
 
 def awslambda(
-    source: AnyStr,
+    source: Union[str, bytes],
     filename: Optional[Text] = ...,
     entrypoint: Optional[Text] = ...
 ) -> Text: ...
diff --git a/src/python_minifier/__main__.py b/src/python_minifier/__main__.py
index 18a4e5cd..b1948188 100644
--- a/src/python_minifier/__main__.py
+++ b/src/python_minifier/__main__.py
@@ -7,6 +7,27 @@
 from python_minifier import minify
 from python_minifier.transforms.remove_annotations_options import RemoveAnnotationsOptions
 
+# Python 2.7 compatibility for UTF-8 file writing
+if sys.version_info[0] == 2:
+    import codecs
+    def open_utf8(filename, mode):
+        return codecs.open(filename, mode, encoding='utf-8')
+else:
+    def open_utf8(filename, mode):
+        return open(filename, mode, encoding='utf-8')
+
+def safe_stdout_write(text):
+    """Write text to stdout with proper encoding handling."""
+    try:
+        sys.stdout.write(text)
+    except UnicodeEncodeError:
+        # Fallback: encode to UTF-8 and write to stdout.buffer (Python 3) or sys.stdout (Python 2)
+        if sys.version_info[0] >= 3 and hasattr(sys.stdout, 'buffer'):
+            sys.stdout.buffer.write(text.encode('utf-8'))
+        else:
+            # Python 2.7 or no buffer attribute - write UTF-8 encoded bytes
+            sys.stdout.write(text.encode('utf-8'))
+
 
 if sys.version_info >= (3, 8):
     from importlib import metadata
@@ -53,10 +74,10 @@ def main():
         source = sys.stdin.buffer.read() if sys.version_info >= (3, 0) else sys.stdin.read()
         minified = do_minify(source, 'stdin', args)
         if args.output:
-            with open(args.output, 'w') as f:
+            with open_utf8(args.output, 'w') as f:
                 f.write(minified)
         else:
-            sys.stdout.write(minified)
+            safe_stdout_write(minified)
 
     else:
         # minify source paths
@@ -70,13 +91,13 @@ def main():
             minified = do_minify(source, path, args)
 
             if args.in_place:
-                with open(path, 'w') as f:
+                with open_utf8(path, 'w') as f:
                     f.write(minified)
             elif args.output:
-                with open(args.output, 'w') as f:
+                with open_utf8(args.output, 'w') as f:
                     f.write(minified)
             else:
-                sys.stdout.write(minified)
+                safe_stdout_write(minified)
 
 
 def parse_args():
diff --git a/src/python_minifier/module_printer.py b/src/python_minifier/module_printer.py
index 46a0ad3a..e1574653 100644
--- a/src/python_minifier/module_printer.py
+++ b/src/python_minifier/module_printer.py
@@ -28,11 +28,15 @@ def __call__(self, module):
         assert isinstance(module, ast.Module)
 
         self.visit_Module(module)
-        return str(self.printer).rstrip('\n' + self.indent_char + ';')
+        # On Python 2.7, preserve unicode strings to avoid encoding issues
+        code = unicode(self.printer) if sys.version_info[0] < 3 else str(self.printer)
+        return code.rstrip('\n' + self.indent_char + ';')
 
     @property
     def code(self):
-        return str(self.printer).rstrip('\n' + self.indent_char + ';')
+        # On Python 2.7, preserve unicode strings to avoid encoding issues
+        code = unicode(self.printer) if sys.version_info[0] < 3 else str(self.printer)
+        return code.rstrip('\n' + self.indent_char + ';')
 
     # region Simple Statements
 
diff --git a/src/python_minifier/token_printer.py b/src/python_minifier/token_printer.py
index bcb24545..52a03c66 100644
--- a/src/python_minifier/token_printer.py
+++ b/src/python_minifier/token_printer.py
@@ -91,7 +91,11 @@ def __init__(self, prefer_single_line=False, allow_invalid_num_warnings=False):
         self._prefer_single_line = prefer_single_line
         self._allow_invalid_num_warnings = allow_invalid_num_warnings
 
-        self._code = ''
+        # Initialize as unicode string on Python 2.7 to handle Unicode content
+        if sys.version_info[0] < 3:
+            self._code = u''
+        else:
+            self._code = ''
         self.indent = 0
         self.unicode_literals = False
         self.previous_token = TokenTypes.NoToken
@@ -99,6 +103,10 @@ def __init__(self, prefer_single_line=False, allow_invalid_num_warnings=False):
     def __str__(self):
         """Return the output code."""
         return self._code
+    
+    def __unicode__(self):
+        """Return the output code as unicode (for Python 2.7 compatibility)."""
+        return self._code
 
     def identifier(self, name):
         """Add an identifier to the output code."""
diff --git a/test/test_utf8_encoding.py b/test/test_utf8_encoding.py
new file mode 100644
index 00000000..f018e60a
--- /dev/null
+++ b/test/test_utf8_encoding.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+import pytest
+import python_minifier
+import tempfile
+import os
+import codecs
+
+
+def test_minify_utf8_file():
+    """Test minifying a Python file with UTF-8 characters not in Windows default encoding."""
+
+    # Create Python source with UTF-8 characters that are not in Windows-1252
+    # Using Greek letters, Cyrillic, and mathematical symbols
+    source_code = u'''"""
+This module contains UTF-8 characters that are not in Windows-1252 encoding:
+- Greek: α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω
+- Cyrillic: а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я
+- Mathematical: ∀ ∃ ∈ ∉ ∅ ∞ ∑ ∏ √ ∫ ∇ ∂ ≠ ≤ ≥ ≈ ≡ ⊂ ⊃ ⊆ ⊇
+- Arrows: ← → ↑ ↓ ↔ ↕ ↖ ↗ ↘ ↙
+"""
+
+def greet_in_greek():
+    return u"Γεια σας κόσμος"  # "Hello world" in Greek
+
+def mathematical_formula():
+    # Using mathematical symbols in comments
+    # ∀x ∈ ℝ: x² ≥ 0
+    return u"∑ from i=1 to ∞ of 1/i² = π²/6"
+
+def arrow_symbols():
+    directions = {
+        u"left": u"←",
+        u"right": u"→", 
+        u"up": u"↑",
+        u"down": u"↓"
+    }
+    return directions
+
+if __name__ == "__main__":
+    print(greet_in_greek())
+    print(greet_in_russian())
+    print(mathematical_formula())
+    print(arrow_symbols())
+'''
+
+    # Write to temporary file with UTF-8 encoding
+    # Python 2.7 doesn't support encoding parameter, so use binary mode
+    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as f:
+        f.write(source_code.encode('utf-8'))
+        temp_file = f.name
+
+    try:
+        # Read the file and minify it
+        # Python 2.7 doesn't support encoding parameter in open()
+        with codecs.open(temp_file, 'r', encoding='utf-8') as f:
+            original_content = f.read()
+
+        # This should work - minify the UTF-8 content
+        minified = python_minifier.minify(original_content)
+
+        # Verify the minified code still contains the UTF-8 characters
+        # On Python 2.7, Unicode characters in string literals are escaped but preserved
+        # Test by executing the minified code and checking the actual values
+        minified_globals = {}
+        exec(minified, minified_globals)
+        
+        # The minified code should contain the same functions that return Unicode
+        assert 'greet_in_greek' in minified_globals
+        assert u"Γεια σας κόσμος" == minified_globals['greet_in_greek']()
+        
+        # Test that mathematical symbols are also preserved 
+        assert 'mathematical_formula' in minified_globals
+        assert u"∑ from i=1 to ∞" in minified_globals['mathematical_formula']()
+
+    finally:
+        # Clean up
+        os.unlink(temp_file)
+
+
+def test_minify_utf8_file_direct():
+    """Test minifying a file directly with UTF-8 characters."""
+
+    # Create Python source with UTF-8 characters
+    source_code = u'''# UTF-8 test file
+def emoji_function():
+    """Function with emoji and special characters: 🐍 ∆ ∑ ∫ ∞"""
+    return u"Python is 🐍 awesome! Math symbols: ∆x ≈ 0, ∑∞ = ∞"
+
+class UnicodeClass:
+    """Class with unicode: ñ ü ö ä ë ï ÿ"""
+    def __init__(self):
+        self.message = u"Héllö Wörld with àccénts!"
+        
+    def get_symbols(self):
+        return u"Symbols: ™ © ® ° ± × ÷ ≠ ≤ ≥"
+'''
+
+    # Test direct minification
+    minified = python_minifier.minify(source_code)
+
+    # Verify UTF-8 characters are preserved by executing the minified code
+    minified_globals = {}
+    exec(minified, minified_globals)
+    
+    # Test that the functions return the correct Unicode strings
+    assert u"🐍" in minified_globals['emoji_function']()
+    assert u"∆" in minified_globals['emoji_function']()
+    
+    # Test the class
+    unicode_obj = minified_globals['UnicodeClass']()
+    assert u"Héllö" in unicode_obj.message
+    assert u"àccénts" in unicode_obj.message
+    assert u"™" in unicode_obj.get_symbols()
+    assert u"©" in unicode_obj.get_symbols()
diff --git a/test/test_windows_encoding.py b/test/test_windows_encoding.py
new file mode 100644
index 00000000..20af213d
--- /dev/null
+++ b/test/test_windows_encoding.py
@@ -0,0 +1,246 @@
+# -*- coding: utf-8 -*-
+import pytest
+import tempfile
+import os
+import subprocess
+import sys
+import locale
+import codecs
+
+# Compatibility for subprocess.run (added in Python 3.5)
+def run_subprocess(cmd, timeout=None):
+    """Cross-platform subprocess runner for Python 2.7+ compatibility."""
+    if hasattr(subprocess, 'run'):
+        # Python 3.5+
+        return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
+    else:
+        # Python 2.7, 3.3, 3.4
+        popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = popen.communicate()
+        # Create a simple result object similar to subprocess.CompletedProcess
+        class Result:
+            def __init__(self, returncode, stdout, stderr):
+                self.returncode = returncode
+                self.stdout = stdout
+                self.stderr = stderr
+        return Result(popen.returncode, stdout, stderr)
+
+def safe_decode(data, encoding='utf-8', errors='replace'):
+    """Safe decode for Python 2.7/3.x compatibility."""
+    if isinstance(data, bytes):
+        try:
+            return data.decode(encoding, errors)
+        except UnicodeDecodeError:
+            return data.decode(encoding, 'replace')
+    return data
+
+
+def test_cli_output_flag_with_unicode():
+    """Regression test for GitHub issues #2, #57, #59, #68, #113, #123, #129.
+
+    Tests that the CLI tool can write Unicode characters to files using --output flag.
+    This should fail on Windows without proper encoding handling.
+    """
+    # Minimal source with all problematic Unicode characters from reported issues
+    source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
+
+    # Create temporary source file
+    # Python 2.7 doesn't support encoding parameter, so use binary mode
+    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as source_file:
+        source_file.write(source_code.encode('utf-8'))
+        source_path = source_file.name
+
+    # Create temporary output file path
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.min.py', delete=False) as output_file:
+        output_path = output_file.name
+
+    try:
+        # Remove output file so pyminify can create it
+        os.unlink(output_path)
+
+        # Run pyminify CLI with --output flag (this should reproduce Windows encoding errors)
+        result = run_subprocess([
+            sys.executable, '-m', 'python_minifier',
+            source_path, '--output', output_path
+        ], timeout=30)
+
+        # Test should fail if CLI command fails (indicates Windows encoding bug)
+        assert result.returncode == 0, "CLI failed with encoding error: {}".format(safe_decode(result.stderr))
+
+        # Verify the output file was created and contains Unicode characters
+        # Python 2.7 doesn't support encoding parameter in open()
+        with codecs.open(output_path, 'r', encoding='utf-8') as f:
+            minified_content = f.read()
+
+        # Verify problematic Unicode characters are preserved
+        if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
+            # PyPy3: Unicode characters are escaped as \\u escapes
+            assert "\\u274c" in minified_content  # ❌ Issue #113
+            assert "✓" in minified_content   # Issue #129
+            assert "\\U0001f40d" in minified_content  # 🐍 General emoji
+            assert "Привет" in minified_content  # Issue #123
+            assert "©" in minified_content   # Issue #59
+            assert "∀" in minified_content   # Mathematical symbols
+        elif hasattr(sys, 'pypy_version_info') and sys.version_info[0] < 3:
+            # PyPy2: Unicode characters appear as UTF-8 byte sequences
+            assert "\\xe2\\x9d\\x8c" in minified_content  # ❌ Issue #113
+            assert "\\xe2\\x9c\\x93" in minified_content  # ✓ Issue #129
+            assert "\\xf0\\x9f\\x90\\x8d" in minified_content  # 🐍 General emoji
+        elif sys.version_info[0] >= 3:
+            # CPython 3: Unicode characters should appear literally  
+            assert "❌" in minified_content  # Issue #113
+            assert "✓" in minified_content   # Issue #129
+            assert "🐍" in minified_content  # General emoji
+            assert "Привет" in minified_content  # Issue #123
+            assert "©" in minified_content   # Issue #59
+            assert "∀" in minified_content   # Mathematical symbols
+        else:
+            # Python 2.7: Check for escaped sequences or use Unicode literals
+            assert u"\\xe2\\x9d\\x8c" in minified_content or u"❌" in minified_content  # ❌
+            assert u"\\xe2\\x9c\\x93" in minified_content or u"✓" in minified_content  # ✓ 
+            assert u"\\xf0\\x9f\\x90\\x8d" in minified_content or u"🐍" in minified_content  # 🐍
+
+    finally:
+        # Cleanup
+        if os.path.exists(source_path):
+            os.unlink(source_path)
+        if os.path.exists(output_path):
+            os.unlink(output_path)
+
+
+def test_cli_in_place_with_unicode():
+    """Regression test for --in-place flag with Unicode characters.
+
+    Tests GitHub issues #57, #68 where --in-place fails on Windows.
+    """
+    source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
+
+    # Create temporary file
+    # Python 2.7 doesn't support encoding parameter, so use binary mode
+    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as temp_file:
+        temp_file.write(source_code.encode('utf-8'))
+        temp_path = temp_file.name
+
+    try:
+        # Run pyminify with --in-place flag
+        result = run_subprocess([
+            sys.executable, '-m', 'python_minifier',
+            temp_path, '--in-place'
+        ], timeout=30)
+
+        # Test should fail if CLI command fails (indicates Windows encoding bug)
+        assert result.returncode == 0, "CLI failed with encoding error: {}".format(safe_decode(result.stderr))
+
+        # Verify Unicode characters are preserved in the modified file
+        # Python 2.7 doesn't support encoding parameter in open()
+        with codecs.open(temp_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
+            # PyPy3: Unicode characters are escaped as \\u escapes  
+            assert "✓" in content
+            assert "\\u274c" in content  # ❌
+            assert "\\U0001f40d" in content  # 🐍
+            assert "Привет" in content
+            assert "©" in content
+            assert "∀" in content
+        elif hasattr(sys, 'pypy_version_info') and sys.version_info[0] < 3:
+            # PyPy2: Unicode characters appear as UTF-8 byte sequences
+            assert "\\xe2\\x9c\\x93" in content  # ✓
+            assert "\\xe2\\x9d\\x8c" in content  # ❌
+            assert "\\xf0\\x9f\\x90\\x8d" in content  # 🐍
+        elif sys.version_info[0] >= 3:
+            # CPython 3: Unicode characters should appear literally
+            assert "✓" in content
+            assert "❌" in content
+            assert "🐍" in content
+            assert "Привет" in content
+            assert "©" in content
+            assert "∀" in content
+        else:
+            # Python 2.7: Check for escaped sequences or Unicode literals
+            assert u"\\xe2\\x9c\\x93" in content or u"✓" in content  # ✓
+            assert u"\\xe2\\x9d\\x8c" in content or u"❌" in content  # ❌
+            assert u"\\xf0\\x9f\\x90\\x8d" in content or u"🐍" in content  # 🐍
+
+    finally:
+        if os.path.exists(temp_path):
+            os.unlink(temp_path)
+
+
+def test_cli_stdout_with_unicode():
+    """Verify that stdout output works fine (as reported in issues).
+
+    All GitHub issues mention that stdout output works, only file output fails.
+    """
+    source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
+
+    # Python 2.7 doesn't support encoding parameter, so use binary mode
+    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as temp_file:
+        temp_file.write(source_code.encode('utf-8'))
+        temp_path = temp_file.name
+
+    try:
+        # Run without --output or --in-place (should output to stdout)
+        # Use our compatibility function to avoid subprocess decoding issues with Windows
+        # We'll manually decode as UTF-8 to properly handle Unicode characters
+        result = run_subprocess([
+            sys.executable, '-m', 'python_minifier', temp_path
+        ], timeout=30)
+
+        assert result.returncode == 0, "Stdout output failed: {}".format(safe_decode(result.stderr))
+
+        # Decode stdout and verify Unicode characters are present
+        stdout_text = safe_decode(result.stdout)
+        
+        if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
+            # PyPy3: Unicode characters are escaped as \\u escapes
+            assert "\\u274c" in stdout_text  # ❌
+            assert "✓" in stdout_text   # ✓
+            assert "\\U0001f40d" in stdout_text  # 🐍
+            assert "Привет" in stdout_text  # Привет
+            assert "©" in stdout_text   # ©
+            assert "∀" in stdout_text   # ∀
+        elif sys.version_info[0] >= 3:
+            # CPython 3: Unicode characters should appear literally
+            assert "❌" in stdout_text
+            assert "✓" in stdout_text
+            assert "🐍" in stdout_text
+            assert "Привет" in stdout_text
+            assert "©" in stdout_text
+            assert "∀" in stdout_text
+        else:
+            # Python 2.7: Unicode characters appear as escaped sequences
+            assert "\\xe2\\x9d\\x8c" in stdout_text  # ❌
+            assert "\\xe2\\x9c\\x93" in stdout_text  # ✓
+            assert "\\xf0\\x9f\\x90\\x8d" in stdout_text  # 🐍
+
+    finally:
+        os.unlink(temp_path)
+
+
+@pytest.mark.skipif(os.name != 'nt', reason="Windows-specific encoding test")
+def test_windows_default_encoding_detection():
+    """Test to detect Windows default encoding that causes issues."""
+
+    # Check what encoding Python would use on Windows for file operations
+    default_encoding = locale.getpreferredencoding()
+
+    # On problematic Windows systems, this is often cp1252, gbk, or similar
+    print("Windows default encoding: {}".format(default_encoding))
+
+    # This test documents the encoding environment for debugging
+    assert default_encoding is not None
+
+
+def test_system_encoding_info():
+    """Diagnostic test to understand system encoding setup."""
+
+    print("System default encoding: {}".format(sys.getdefaultencoding()))
+    print("Filesystem encoding: {}".format(sys.getfilesystemencoding()))
+    print("Preferred encoding: {}".format(locale.getpreferredencoding()))
+    print("Platform: {}".format(sys.platform))
+
+    # This test always passes but provides diagnostic information
+    assert True
+
diff --git a/tox-windows.ini b/tox-windows.ini
new file mode 100644
index 00000000..cd8320e1
--- /dev/null
+++ b/tox-windows.ini
@@ -0,0 +1,81 @@
+[tox]
+envlist = 3.8,3.9,3.10,3.11,3.12,3.13
+
+[testenv]
+commands =
+    pytest {posargs:test} --junitxml=junit-python{envname}.xml --verbose
+
+[testenv:3.8]
+basepython = python3.8
+deps =
+    atomicwrites==1.4.1
+    attrs==20.3.0
+    more-itertools==10.5.0
+    pluggy==0.13.1
+    py==1.11.0
+    pytest==4.5.0
+    PyYAML==5.1
+    setuptools==45.3.0
+    six==1.16.0
+    wcwidth==0.2.13
+
+[testenv:3.9]
+basepython = python3.9
+deps =
+    exceptiongroup==1.2.2
+    iniconfig==2.0.0
+    packaging==24.1
+    pluggy==1.5.0
+    pytest==8.3.3
+    PyYAML==6.0.2
+    tomli==2.0.1
+
+[testenv:3.10]
+basepython = python3.10
+setenv =
+    PIP_CONSTRAINT={toxinidir}/tox/pyyaml-5.4.1-constraints.txt
+deps =
+    attrs==24.2.0
+    iniconfig==2.0.0
+    packaging==24.1
+    pluggy==0.13.1
+    py==1.11.0
+    pyperf==2.2.0
+    pytest==6.2.4
+    PyYAML==5.4.1
+    toml==0.10.2
+
+[testenv:3.11]
+basepython = python3.11
+deps =
+    attrs==24.2.0
+    iniconfig==2.0.0
+    packaging==24.1
+    pluggy==1.5.0
+    py==1.11.0
+    pyperf==2.4.1
+    pytest==7.1.2
+    PyYAML==6.0
+    tomli==2.0.1
+
+[testenv:3.12]
+basepython = python3.12
+deps =
+    iniconfig==2.0.0
+    packaging==24.1
+    pluggy==1.5.0
+    psutil==6.0.0
+    pyperf==2.6.1
+    pytest==7.4.2
+    PyYAML==6.0.1
+
+[testenv:3.13]
+basepython = python3.13
+deps =
+    iniconfig==2.0.0
+    packaging==24.1
+    pluggy==1.5.0
+    psutil==6.0.0
+    pyperf==2.7.0
+    pytest==8.3.3
+    PyYAML==6.0.2

From 885dcf3d7affb9b9cd7c45c06fdf5514d1dad573 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Thu, 7 Aug 2025 19:22:10 +0100
Subject: [PATCH 02/11] Properly categorise corpus entries with null bytes as
 invalid

---
 corpus_test/generate_report.py  | 2 +-
 corpus_test/generate_results.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/corpus_test/generate_report.py b/corpus_test/generate_report.py
index 336491f8..214b11f1 100644
--- a/corpus_test/generate_report.py
+++ b/corpus_test/generate_report.py
@@ -49,7 +49,7 @@ def add(self, result: Result):
             self.recursion_error_count += 1
         elif result.outcome == 'UnstableMinification':
             self.unstable_minification_count += 1
-        elif result.outcome.startswith('Exception'):
+        elif result.outcome.startswith('Exception') and result.outcome != 'Exception: source code string cannot contain null bytes':
             self.exception_count += 1
 
     @property
diff --git a/corpus_test/generate_results.py b/corpus_test/generate_results.py
index ade3fce1..bdd0b17f 100644
--- a/corpus_test/generate_results.py
+++ b/corpus_test/generate_results.py
@@ -50,6 +50,10 @@ def minify_corpus_entry(corpus_path, corpus_entry):
         # Source is too deep
         result.outcome = 'RecursionError'
 
+    except ValueError:
+        # Source is not valid Python
+        result.outcome = 'ValueError'
+
     except SyntaxError:
         # Source not valid for this version of Python
         result.outcome = 'SyntaxError'

From b591e9c62b986f5479762c81b44879a2472246d7 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Thu, 7 Aug 2025 23:27:52 +0100
Subject: [PATCH 03/11] Filter out expected warnings

test_is_constant_node.py intentionally uses deprecated nodes to test backwards compatability
test_regrtest.py uses sh, which correctly warns but it's not our code
---
 test/test_is_constant_node.py | 10 ++++++++++
 xtest/test_regrtest.py        |  1 +
 2 files changed, 11 insertions(+)

diff --git a/test/test_is_constant_node.py b/test/test_is_constant_node.py
index 41160ae7..1be7950d 100644
--- a/test/test_is_constant_node.py
+++ b/test/test_is_constant_node.py
@@ -7,6 +7,11 @@
 from python_minifier.util import is_constant_node
 
 
+@pytest.mark.filterwarnings("ignore:ast.Str is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.Bytes is deprecated:DeprecationWarning") 
+@pytest.mark.filterwarnings("ignore:ast.Num is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.NameConstant is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.Ellipsis is deprecated:DeprecationWarning")
 def test_type_nodes():
     assert is_constant_node(ast.Str('a'), ast.Str)
 
@@ -28,6 +33,11 @@ def test_type_nodes():
     assert is_constant_node(ast.Ellipsis(), ast.Ellipsis)
 
 
+@pytest.mark.filterwarnings("ignore:ast.Str is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.Bytes is deprecated:DeprecationWarning") 
+@pytest.mark.filterwarnings("ignore:ast.Num is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.NameConstant is deprecated:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ast.Ellipsis is deprecated:DeprecationWarning")
 def test_constant_nodes():
     # only test on python 3.8+
     if sys.version_info < (3, 8):
diff --git a/xtest/test_regrtest.py b/xtest/test_regrtest.py
index 39653694..3c07173a 100644
--- a/xtest/test_regrtest.py
+++ b/xtest/test_regrtest.py
@@ -122,6 +122,7 @@ def get_active_manifest():
 manifest = get_active_manifest()
 
 
+@pytest.mark.filterwarnings("ignore:This process \(pid=\d+\) is multi-threaded, use of fork\(\) may lead to deadlocks in the child.:DeprecationWarning:sh")
 @pytest.mark.parametrize('test_case', list(manifest), ids=lambda test_case: repr(test_case))
 def test_regrtest(test_case):
     test_case.run_test()

From 38d35f616200f1baac69fe8f8b7d92d3b225f3ac Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Fri, 8 Aug 2025 07:53:57 +0100
Subject: [PATCH 04/11] Test additional windows versions

---
 .github/workflows/test.yaml | 11 +++++--
 tox-windows.ini             | 61 +++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 73437a47..d33475f7 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -33,11 +33,11 @@ jobs:
 
   test-windows:
     name: Test Windows
-    runs-on: windows-latest
+    runs-on: windows-2022
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['2.7', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
     steps:
       - name: Checkout
         uses: actions/checkout@v4.2.2
@@ -47,10 +47,17 @@ jobs:
           persist-credentials: false
 
       - name: Set up Python
+        if: ${{ matrix.python-version != '2.7' }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
+      - name: Set up Python
+        if: ${{ matrix.python-version == '2.7' }}
+        uses: LizardByte/actions/actions/setup_python@eddc8fc8b27048e25040e37e3585bd3ef9a968ed  # master
+        with:
+          python-version: ${{ matrix.python-version }}
+
       - name: Set version statically
         shell: powershell
         run: |
diff --git a/tox-windows.ini b/tox-windows.ini
index cd8320e1..1a2be052 100644
--- a/tox-windows.ini
+++ b/tox-windows.ini
@@ -5,6 +5,67 @@ envlist = 3.8,3.9,3.10,3.11,3.12,3.13
 commands =
     pytest {posargs:test} --junitxml=junit-python{envname}.xml --verbose
 
+[testenv:2.7]
+basepython = python2.7
+deps =
+    atomicwrites==1.4.1
+    attrs==21.4.0
+    backports.functools-lru-cache==1.6.6
+    colorama==0.4.6
+    configparser==4.0.2
+    contextlib2==0.6.0.post1
+    funcsigs==1.0.2
+    importlib-metadata==2.1.3
+    more-itertools==5.0.0
+    packaging==20.9
+    pathlib2==2.3.7.post1
+    pluggy==0.13.1
+    py==1.11.0
+    pyparsing==2.4.7
+    pytest==4.6.11
+    PyYAML==5.4.1
+    scandir==1.10.0
+    six==1.17.0
+    typing==3.10.0.0
+    wcwidth==0.2.13
+    zipp==1.2.0
+
+[testenv:3.6]
+basepython = python3.6
+deps =
+    atomicwrites==1.4.1
+    attrs==22.2.0
+    colorama==0.4.5
+    importlib-metadata==4.8.3
+    iniconfig==1.1.1
+    packaging==21.3
+    pluggy==1.0.0
+    py==1.11.0
+    pyparsing==3.1.4
+    pytest==7.0.1
+    PyYAML==6.0.1
+    tomli==1.2.3
+    typing_extensions==4.1.1
+    zipp==3.6.0
+
+[testenv:3.7]
+basepython = python3.7
+deps =
+    colorama==0.4.6
+    exceptiongroup==1.3.0
+    importlib-metadata==6.7.0
+    iniconfig==2.0.0
+    packaging==24.0
+    pip==24.0
+    pluggy==1.2.0
+    pytest==7.4.4
+    PyYAML==6.0.1
+    setuptools==68.0.0
+    tomli==2.0.1
+    typing_extensions==4.7.1
+    wheel==0.42.0
+    zipp==3.15.0
+
 [testenv:3.8]
 basepython = python3.8
 deps =

From 33005ee7940d72a16c77dc58d61ad50b2ccfd747 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Fri, 8 Aug 2025 08:58:37 +0100
Subject: [PATCH 05/11] Show all exceptions

---
 corpus_test/generate_report.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/corpus_test/generate_report.py b/corpus_test/generate_report.py
index 214b11f1..3c9c07a4 100644
--- a/corpus_test/generate_report.py
+++ b/corpus_test/generate_report.py
@@ -184,7 +184,7 @@ def format_difference(compare: Iterable[Result], base: Iterable[Result]) -> str:
         return s
 
 
-def report_larger_than_original(results_dir: str, python_versions: str, minifier_sha: str) -> str:
+def report_larger_than_original(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
     yield '''
 ## Larger than original
 
@@ -203,7 +203,7 @@ def report_larger_than_original(results_dir: str, python_versions: str, minifier
             yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'
 
 
-def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -> str:
+def report_unstable(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
     yield '''
 ## Unstable
 
@@ -222,7 +222,7 @@ def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -
             yield f'| {entry.corpus_entry} | {python_version} | {entry.original_size} |'
 
 
-def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str) -> str:
+def report_exceptions(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
     yield '''
 ## Exceptions
 
@@ -247,7 +247,7 @@ def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str)
         yield ' None | | |'
 
 
-def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha: str, base_sha: str) -> str:
+def report_larger_than_base(results_dir: str, python_versions: list[str], minifier_sha: str, base_sha: str) -> str:
     yield '''
 ## Top 10 Larger than base
 
@@ -277,7 +277,7 @@ def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha
         yield '| N/A | N/A | N/A |'
 
 
-def report_slowest(results_dir: str, python_versions: str, minifier_sha: str) -> str:
+def report_slowest(results_dir: str, python_versions: list[str], minifier_sha: str) -> str:
     yield '''
 ## Top 10 Slowest
 
@@ -368,7 +368,7 @@ def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str
         yield from report_larger_than_base(results_dir, ['3.13'], minifier_sha, base_sha)
         yield from report_slowest(results_dir, ['3.13'], minifier_sha)
         yield from report_unstable(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
-        yield from report_exceptions(results_dir, ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
+        yield from report_exceptions(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'], minifier_sha)
 
 
 def main():

From 7ca34e2dce62446e8cd48ecb822c3359ca095ead Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Fri, 8 Aug 2025 15:10:39 +0100
Subject: [PATCH 06/11] Simplify tests

---
 ...indows_encoding.py => test_unicode_cli.py} | 156 ++++++------------
 1 file changed, 53 insertions(+), 103 deletions(-)
 rename test/{test_windows_encoding.py => test_unicode_cli.py} (50%)

diff --git a/test/test_windows_encoding.py b/test/test_unicode_cli.py
similarity index 50%
rename from test/test_windows_encoding.py
rename to test/test_unicode_cli.py
index 20af213d..6522f9fe 100644
--- a/test/test_windows_encoding.py
+++ b/test/test_unicode_cli.py
@@ -1,10 +1,8 @@
 # -*- coding: utf-8 -*-
-import pytest
 import tempfile
 import os
 import subprocess
 import sys
-import locale
 import codecs
 
 # Compatibility for subprocess.run (added in Python 3.5)
@@ -36,108 +34,91 @@ def safe_decode(data, encoding='utf-8', errors='replace'):
 
 
 def test_cli_output_flag_with_unicode():
-    """Regression test for GitHub issues #2, #57, #59, #68, #113, #123, #129.
-
+    """
     Tests that the CLI tool can write Unicode characters to files using --output flag.
-    This should fail on Windows without proper encoding handling.
     """
     # Minimal source with all problematic Unicode characters from reported issues
     source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
 
-    # Create temporary source file
-    # Python 2.7 doesn't support encoding parameter, so use binary mode
-    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as source_file:
-        source_file.write(source_code.encode('utf-8'))
-        source_path = source_file.name
+    source_file = tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False)
+    source_file.write(source_code.encode('utf-8'))
+    source_file.close()
 
-    # Create temporary output file path
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.min.py', delete=False) as output_file:
-        output_path = output_file.name
+    output_path = source_file.name + '.min.py'
 
     try:
-        # Remove output file so pyminify can create it
-        os.unlink(output_path)
-
-        # Run pyminify CLI with --output flag (this should reproduce Windows encoding errors)
+        # Run pyminify CLI with --output flag
         result = run_subprocess([
             sys.executable, '-m', 'python_minifier',
-            source_path, '--output', output_path
+            source_file.name, '--output', output_path
         ], timeout=30)
 
-        # Test should fail if CLI command fails (indicates Windows encoding bug)
         assert result.returncode == 0, "CLI failed with encoding error: {}".format(safe_decode(result.stderr))
 
         # Verify the output file was created and contains Unicode characters
-        # Python 2.7 doesn't support encoding parameter in open()
         with codecs.open(output_path, 'r', encoding='utf-8') as f:
             minified_content = f.read()
 
         # Verify problematic Unicode characters are preserved
         if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
-            # PyPy3: Unicode characters are escaped as \\u escapes
-            assert "\\u274c" in minified_content  # ❌ Issue #113
-            assert "✓" in minified_content   # Issue #129
-            assert "\\U0001f40d" in minified_content  # 🐍 General emoji
-            assert "Привет" in minified_content  # Issue #123
-            assert "©" in minified_content   # Issue #59
-            assert "∀" in minified_content   # Mathematical symbols
+            # PyPy3: Unicode characters may be escaped as \\u escapes
+            assert "\\u274c" in minified_content
+            assert "✓" in minified_content
+            assert "\\U0001f40d" in minified_content
+            assert "Привет" in minified_content
+            assert "©" in minified_content
+            assert "∀" in minified_content
         elif hasattr(sys, 'pypy_version_info') and sys.version_info[0] < 3:
             # PyPy2: Unicode characters appear as UTF-8 byte sequences
-            assert "\\xe2\\x9d\\x8c" in minified_content  # ❌ Issue #113
-            assert "\\xe2\\x9c\\x93" in minified_content  # ✓ Issue #129
-            assert "\\xf0\\x9f\\x90\\x8d" in minified_content  # 🐍 General emoji
+            assert "\\xe2\\x9d\\x8c" in minified_content  # ❌
+            assert "\\xe2\\x9c\\x93" in minified_content  # ✓
+            assert "\\xf0\\x9f\\x90\\x8d" in minified_content  # 🐍
         elif sys.version_info[0] >= 3:
-            # CPython 3: Unicode characters should appear literally  
-            assert "❌" in minified_content  # Issue #113
-            assert "✓" in minified_content   # Issue #129
-            assert "🐍" in minified_content  # General emoji
-            assert "Привет" in minified_content  # Issue #123
-            assert "©" in minified_content   # Issue #59
-            assert "∀" in minified_content   # Mathematical symbols
+            # CPython 3: Unicode characters should appear literally
+            assert "❌" in minified_content
+            assert "✓" in minified_content
+            assert "🐍" in minified_content
+            assert "Привет" in minified_content
+            assert "©" in minified_content
+            assert "∀" in minified_content
         else:
             # Python 2.7: Check for escaped sequences or use Unicode literals
-            assert u"\\xe2\\x9d\\x8c" in minified_content or u"❌" in minified_content  # ❌
-            assert u"\\xe2\\x9c\\x93" in minified_content or u"✓" in minified_content  # ✓ 
-            assert u"\\xf0\\x9f\\x90\\x8d" in minified_content or u"🐍" in minified_content  # 🐍
+            assert u"\\xe2\\x9d\\x8c" in minified_content
+            assert u"\\xe2\\x9c\\x93" in minified_content
+            assert u"\\xf0\\x9f\\x90\\x8d" in minified_content
 
     finally:
         # Cleanup
-        if os.path.exists(source_path):
-            os.unlink(source_path)
+        if os.path.exists(source_file.name):
+            os.unlink(source_file.name)
         if os.path.exists(output_path):
             os.unlink(output_path)
 
 
 def test_cli_in_place_with_unicode():
-    """Regression test for --in-place flag with Unicode characters.
-
-    Tests GitHub issues #57, #68 where --in-place fails on Windows.
+    """
+    Tests that the CLI tool can write Unicode characters to files using --in-place flag.
     """
     source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
 
-    # Create temporary file
-    # Python 2.7 doesn't support encoding parameter, so use binary mode
-    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as temp_file:
-        temp_file.write(source_code.encode('utf-8'))
-        temp_path = temp_file.name
+    temp_file = tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False)
+    temp_file.write(source_code.encode('utf-8'))
+    temp_file.close()
 
     try:
         # Run pyminify with --in-place flag
         result = run_subprocess([
             sys.executable, '-m', 'python_minifier',
-            temp_path, '--in-place'
+            temp_file.name, '--in-place'
         ], timeout=30)
 
-        # Test should fail if CLI command fails (indicates Windows encoding bug)
         assert result.returncode == 0, "CLI failed with encoding error: {}".format(safe_decode(result.stderr))
 
-        # Verify Unicode characters are preserved in the modified file
-        # Python 2.7 doesn't support encoding parameter in open()
-        with codecs.open(temp_path, 'r', encoding='utf-8') as f:
+        with codecs.open(temp_file.name, 'r', encoding='utf-8') as f:
             content = f.read()
 
         if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
-            # PyPy3: Unicode characters are escaped as \\u escapes  
+            # PyPy3: Unicode characters may be escaped as \\u escapes
             assert "✓" in content
             assert "\\u274c" in content  # ❌
             assert "\\U0001f40d" in content  # 🐍
@@ -158,43 +139,38 @@ def test_cli_in_place_with_unicode():
             assert "©" in content
             assert "∀" in content
         else:
-            # Python 2.7: Check for escaped sequences or Unicode literals
-            assert u"\\xe2\\x9c\\x93" in content or u"✓" in content  # ✓
-            assert u"\\xe2\\x9d\\x8c" in content or u"❌" in content  # ❌
-            assert u"\\xf0\\x9f\\x90\\x8d" in content or u"🐍" in content  # 🐍
+            # Python 2.7: Unicode characters appear as escaped sequences
+            assert "\\xe2\\x9d\\x8c" in content  # ❌
+            assert "\\xe2\\x9c\\x93" in content  # ✓
+            assert "\\xf0\\x9f\\x90\\x8d" in content  # 🐍
 
     finally:
-        if os.path.exists(temp_path):
-            os.unlink(temp_path)
+        if os.path.exists(temp_file.name):
+            os.unlink(temp_file.name)
 
 
 def test_cli_stdout_with_unicode():
-    """Verify that stdout output works fine (as reported in issues).
-
-    All GitHub issues mention that stdout output works, only file output fails.
+    """
+    Tests that the CLI tool can write Unicode characters to stdout.
     """
     source_code = u'print(u"❌ ✓ 🐍 Привет © ∀")'
 
-    # Python 2.7 doesn't support encoding parameter, so use binary mode
-    with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as temp_file:
-        temp_file.write(source_code.encode('utf-8'))
-        temp_path = temp_file.name
+    temp_file = tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False)
+    temp_file.write(source_code.encode('utf-8'))
+    temp_file.close()
 
     try:
         # Run without --output or --in-place (should output to stdout)
-        # Use our compatibility function to avoid subprocess decoding issues with Windows
-        # We'll manually decode as UTF-8 to properly handle Unicode characters
         result = run_subprocess([
-            sys.executable, '-m', 'python_minifier', temp_path
+            sys.executable, '-m', 'python_minifier', temp_file.name
         ], timeout=30)
 
         assert result.returncode == 0, "Stdout output failed: {}".format(safe_decode(result.stderr))
 
-        # Decode stdout and verify Unicode characters are present
         stdout_text = safe_decode(result.stdout)
-        
+
         if hasattr(sys, 'pypy_version_info') and sys.version_info[0] >= 3:
-            # PyPy3: Unicode characters are escaped as \\u escapes
+            # PyPy3: Unicode characters may be escaped as \\u escapes
             assert "\\u274c" in stdout_text  # ❌
             assert "✓" in stdout_text   # ✓
             assert "\\U0001f40d" in stdout_text  # 🐍
@@ -216,31 +192,5 @@ def test_cli_stdout_with_unicode():
             assert "\\xf0\\x9f\\x90\\x8d" in stdout_text  # 🐍
 
     finally:
-        os.unlink(temp_path)
-
-
-@pytest.mark.skipif(os.name != 'nt', reason="Windows-specific encoding test")
-def test_windows_default_encoding_detection():
-    """Test to detect Windows default encoding that causes issues."""
-
-    # Check what encoding Python would use on Windows for file operations
-    default_encoding = locale.getpreferredencoding()
-
-    # On problematic Windows systems, this is often cp1252, gbk, or similar
-    print("Windows default encoding: {}".format(default_encoding))
-
-    # This test documents the encoding environment for debugging
-    assert default_encoding is not None
-
-
-def test_system_encoding_info():
-    """Diagnostic test to understand system encoding setup."""
-
-    print("System default encoding: {}".format(sys.getdefaultencoding()))
-    print("Filesystem encoding: {}".format(sys.getfilesystemencoding()))
-    print("Preferred encoding: {}".format(locale.getpreferredencoding()))
-    print("Platform: {}".format(sys.platform))
-
-    # This test always passes but provides diagnostic information
-    assert True
-
+        if os.path.exists(temp_file.name):
+            os.unlink(temp_file.name)

From 4ddbb8f52a1a5ab1c75e54c7c5c0ed5f916c8be9 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Mon, 11 Aug 2025 09:15:54 +0100
Subject: [PATCH 07/11] Properly classify recursion error on Python <=3.4

---
 corpus_test/generate_report.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/corpus_test/generate_report.py b/corpus_test/generate_report.py
index 3c9c07a4..9ed6b3ec 100644
--- a/corpus_test/generate_report.py
+++ b/corpus_test/generate_report.py
@@ -9,6 +9,33 @@
 
 ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', True)
 
+def is_recursion_error(python_version: str, result: Result) -> bool:
+    """
+    Check if the result is a recursion error
+    """
+    if result.outcome == 'RecursionError':
+        return True
+
+    if python_version in ['2.7', '3.3', '3.4']:
+        # In these versions, the recursion error is raised as an Exception
+        return result.outcome.startswith('Exception: maximum recursion depth exceeded')
+
+    return False
+
+def is_syntax_error(python_version: str, result: Result) -> bool:
+    """
+    Check if the result is a syntax error
+    """
+    if result.outcome == 'SyntaxError':
+        return True
+
+    if python_version == '2.7' and result.outcome == 'Exception: compile() expected string without null bytes':
+        return True
+
+    if python_version != '2.7' and result.outcome == 'Exception: source code string cannot contain null bytes':
+        return True
+
+    return False
 
 @dataclass
 class ResultSet:
@@ -45,11 +72,11 @@ def add(self, result: Result):
             if result.original_size < result.minified_size:
                 self.larger_than_original_count += 1
 
-        if result.outcome == 'RecursionError':
+        if is_recursion_error(self.python_version, result):
             self.recursion_error_count += 1
         elif result.outcome == 'UnstableMinification':
             self.unstable_minification_count += 1
-        elif result.outcome.startswith('Exception') and result.outcome != 'Exception: source code string cannot contain null bytes':
+        elif result.outcome.startswith('Exception') is not is_syntax_error(self.python_version, result):
             self.exception_count += 1
 
     @property

From 6cecf3a140694548286609e2ad6e4bd164807cff Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Mon, 11 Aug 2025 14:47:45 +0100
Subject: [PATCH 08/11] Properly classify recursion error on Python <=3.4

---
 .config/corpus_report.markdownlint.yaml |  3 +++
 .github/workflows/test_corpus.yaml      |  9 ++++++++-
 corpus_test/generate_report.py          | 10 +++++-----
 3 files changed, 16 insertions(+), 6 deletions(-)
 create mode 100644 .config/corpus_report.markdownlint.yaml

diff --git a/.config/corpus_report.markdownlint.yaml b/.config/corpus_report.markdownlint.yaml
new file mode 100644
index 00000000..6626bf76
--- /dev/null
+++ b/.config/corpus_report.markdownlint.yaml
@@ -0,0 +1,3 @@
+# Markdownlint configuration for corpus test reports
+# Disable line length rule since tables naturally exceed 80 characters
+MD013: false
\ No newline at end of file
diff --git a/.github/workflows/test_corpus.yaml b/.github/workflows/test_corpus.yaml
index 996240bd..07c7369c 100644
--- a/.github/workflows/test_corpus.yaml
+++ b/.github/workflows/test_corpus.yaml
@@ -144,4 +144,11 @@ jobs:
           volumes: |
             /corpus-results:/corpus-results
           run: |
-            python3.13 workflow/corpus_test/generate_report.py /corpus-results ${{ inputs.ref }} ${{ steps.ref.outputs.commit }} ${{ inputs.base-ref }} ${{ steps.base-ref.outputs.commit }} >> $GITHUB_STEP_SUMMARY
+            python3.13 workflow/corpus_test/generate_report.py /corpus-results ${{ inputs.ref }} ${{ steps.ref.outputs.commit }} ${{ inputs.base-ref }} ${{ steps.base-ref.outputs.commit }} | tee -a $GITHUB_STEP_SUMMARY > report.md
+
+      - name: Lint Report
+        uses: DavidAnson/markdownlint-cli2-action@05f32210e84442804257b2a6f20b273450ec8265 # v19
+        continue-on-error: true
+        with:
+          config: '.config/corpus_report.markdownlint.yaml'
+          globs: 'report.md'
diff --git a/corpus_test/generate_report.py b/corpus_test/generate_report.py
index 9ed6b3ec..76d70fb7 100644
--- a/corpus_test/generate_report.py
+++ b/corpus_test/generate_report.py
@@ -76,7 +76,7 @@ def add(self, result: Result):
             self.recursion_error_count += 1
         elif result.outcome == 'UnstableMinification':
             self.unstable_minification_count += 1
-        elif result.outcome.startswith('Exception') is not is_syntax_error(self.python_version, result):
+        elif result.outcome.startswith('Exception') and not is_syntax_error(self.python_version, result):
             self.exception_count += 1
 
     @property
@@ -101,13 +101,13 @@ def larger_than_original(self) -> Iterable[Result]:
     def recursion_error(self) -> Iterable[Result]:
         """Return those entries that have a recursion error"""
         for result in self.entries.values():
-            if result.outcome == 'RecursionError':
+            if is_recursion_error(self.python_version, result):
                 yield result
 
     def exception(self) -> Iterable[Result]:
         """Return those entries that have an exception"""
         for result in self.entries.values():
-            if result.outcome.startswith('Exception'):
+            if result.outcome.startswith('Exception') and not is_syntax_error(self.python_version, result) and not is_recursion_error(self.python_version, result):
                 yield result
 
     def unstable_minification(self) -> Iterable[Result]:
@@ -271,7 +271,7 @@ def report_exceptions(results_dir: str, python_versions: list[str], minifier_sha
             yield f'| {entry.corpus_entry} | {python_version} | {entry.outcome} |'
 
     if not exceptions_found:
-        yield ' None | | |'
+        yield '| None | | |'
 
 
 def report_larger_than_base(results_dir: str, python_versions: list[str], minifier_sha: str, base_sha: str) -> str:
@@ -387,7 +387,7 @@ def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str
                 f'| {format_difference(summary.larger_than_original(), base_summary.larger_than_original())} ' +
                 f'| {format_difference(summary.recursion_error(), base_summary.recursion_error())} ' +
                 f'| {format_difference(summary.unstable_minification(), base_summary.unstable_minification())} ' +
-                f'| {format_difference(summary.exception(), base_summary.exception())} '
+                f'| {format_difference(summary.exception(), base_summary.exception())} |'
         )
 
     if ENHANCED_REPORT:

From ef30252ac1396a7aef70fd2a4a7c672275eb975e Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Mon, 11 Aug 2025 14:52:28 +0100
Subject: [PATCH 09/11] Startup failure is not helpful

---
 .github/github_sucks | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/github_sucks b/.github/github_sucks
index e69de29b..8b137891 100644
--- a/.github/github_sucks
+++ b/.github/github_sucks
@@ -0,0 +1 @@
+

From 138d8ba65700ced84557b5e07747f1e2c003a10c Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Mon, 11 Aug 2025 14:54:28 +0100
Subject: [PATCH 10/11] Startup failure is not helpful

---
 .github/github_sucks | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/github_sucks b/.github/github_sucks
index 8b137891..139597f9 100644
--- a/.github/github_sucks
+++ b/.github/github_sucks
@@ -1 +1,2 @@
 
+

From cdcadc8fe9dc90fef055085807256f4bb516ad00 Mon Sep 17 00:00:00 2001
From: Daniel Flook <daniel@flook.org>
Date: Mon, 11 Aug 2025 14:55:41 +0100
Subject: [PATCH 11/11] Startup failure is not helpful

---
 .github/github_sucks | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/github_sucks b/.github/github_sucks
index 139597f9..b28b04f6 100644
--- a/.github/github_sucks
+++ b/.github/github_sucks
@@ -1,2 +1,3 @@
 
 
+