Avoid overly-specific encoding assertion

We don't pin charset-normalizer versions, so assertions about its behaviour are likely to flake. For example: - charset-normalizer 3.2.0 reports this encoding as cp037. - charset-normalizer 3.3.0 reports it as CP949. - charset-normalizer 3.3.1 and 3.3.2 report it as Big5. We don't care so much what the exact encoding is; we just care that we get a usable result. So, we change our assertion here to be much less specific.
remix · Nov 23, 2023 · 5b61461 · 5b61461
1 parent c3afb13
commit 5b61461
Showing 1 changed file with 14 additions and 10 deletions.
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
@@ -49,13 +49,17 @@ def test_empty_df():
     assert actual.equals(expected)
 
 
-@pytest.mark.parametrize(
-    "test_string,encoding",
-    [
-        (b"abcde", "utf-8"),  # straight up ascii is a subset of unicode
-        (b"Eyjafjallaj\xc3\xb6kull", "utf-8"),  # actual unicode
-        (b"\xC4pple", "cp037"),  # non-unicode, ISO characterset
-    ],
-)
-def test_detect_encoding(test_string, encoding):
-    assert detect_encoding(io.BytesIO(test_string)) == encoding
+def test_detect_encoding():
+    # straight up ascii is a subset of unicode
+    assert detect_encoding(io.BytesIO(b"abcde")) == "utf-8"
+
+    # actual unicode
+    assert detect_encoding(io.BytesIO(b"Eyjafjallaj\xc3\xb6kull")) == "utf-8"
+
+    # non-unicode, ISO characterset
+    #
+    # (Note: we don't assert a specific characterset, because we don't want
+    # tests to break as changes are made in charset-normalizer. See:
+    # https://github.com/remix/partridge/pull/84)
+    enc = detect_encoding(io.BytesIO(b"\xC4pple"))
+    assert enc and enc != "utf-8"