Skip to content

Commit

Permalink
Merge pull request #84 from remix/fix-encoding-detection-test
Browse files Browse the repository at this point in the history
Avoid overly-specific assertion about charset-normalizer result
  • Loading branch information
harto authored Nov 30, 2023
2 parents c3afb13 + 5b61461 commit e68fa25
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,17 @@ def test_empty_df():
assert actual.equals(expected)


@pytest.mark.parametrize(
"test_string,encoding",
[
(b"abcde", "utf-8"), # straight up ascii is a subset of unicode
(b"Eyjafjallaj\xc3\xb6kull", "utf-8"), # actual unicode
(b"\xC4pple", "cp037"), # non-unicode, ISO characterset
],
)
def test_detect_encoding(test_string, encoding):
assert detect_encoding(io.BytesIO(test_string)) == encoding
def test_detect_encoding():
# straight up ascii is a subset of unicode
assert detect_encoding(io.BytesIO(b"abcde")) == "utf-8"

# actual unicode
assert detect_encoding(io.BytesIO(b"Eyjafjallaj\xc3\xb6kull")) == "utf-8"

# non-unicode, ISO characterset
#
# (Note: we don't assert a specific characterset, because we don't want
# tests to break as changes are made in charset-normalizer. See:
# https://github.com/remix/partridge/pull/84)
enc = detect_encoding(io.BytesIO(b"\xC4pple"))
assert enc and enc != "utf-8"

0 comments on commit e68fa25

Please sign in to comment.