Skip to content

Commit

Permalink
drop _guess_content_json_utf
Browse files Browse the repository at this point in the history
  • Loading branch information
Tester authored and Tester committed Oct 30, 2023
1 parent c97f840 commit e7e7665
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 42 deletions.
35 changes: 1 addition & 34 deletions httpx/_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import codecs
import datetime
import email.message
import json as jsonlib
Expand Down Expand Up @@ -758,40 +757,8 @@ def raise_for_status(self) -> "Response":
message = message.format(self, error_type=error_type)
raise HTTPStatusError(message, request=request, response=self)

def _guess_content_json_utf(self) -> typing.Optional[str]:
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = self.content[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return "utf-32" # BOM included
if sample[:3] == codecs.BOM_UTF8:
return "utf-8-sig" # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return "utf-16" # BOM included
nullcount = sample.count(b"\0")
if nullcount == 0:
return "utf-8"
if nullcount == 2:
if sample[::2] == b"\0\0": # 1st and 3rd are null
return "utf-16-be"
if sample[1::2] == b"\0\0": # 2nd and 4th are null
return "utf-16-le"
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == b"\0\0\0":
return "utf-32-be"
if sample[1:] == b"\0\0\0":
return "utf-32-le"
# Did not detect a valid UTF-32 ascii-range character
return None

def json(self, **kwargs: typing.Any) -> typing.Any:
if self.charset_encoding is None and self.content and len(self.content) > 3:
encoding = self._guess_content_json_utf()
if encoding is not None:
return jsonlib.loads(self.content.decode(encoding), **kwargs)
return jsonlib.loads(self.text, **kwargs)
return jsonlib.loads(self.content, **kwargs)

@property
def cookies(self) -> "Cookies":
Expand Down
19 changes: 11 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os
import random
Expand All @@ -23,14 +24,16 @@
),
)
def test_encoded(encoding):
data = "{}".encode(encoding)
response = httpx.Response(200, content=data)
assert response._guess_content_json_utf() == encoding
content = '{"abc": 123}'.encode(encoding)
response = httpx.Response(200, content=content)
assert response.json() == {"abc": 123}


def test_bad_utf_like_encoding():
response = httpx.Response(200, content=b"\x00\x00\x00\x00")
assert response._guess_content_json_utf() is None
content = b"\x00\x00\x00\x00"
response = httpx.Response(200, content=content)
with pytest.raises(json.decoder.JSONDecodeError):
response.json()


@pytest.mark.parametrize(
Expand All @@ -43,9 +46,9 @@ def test_bad_utf_like_encoding():
),
)
def test_guess_by_bom(encoding, expected):
data = "\ufeff{}".encode(encoding)
response = httpx.Response(200, content=data)
assert response._guess_content_json_utf() == expected
content = '\ufeff{"abc": 123}'.encode(encoding)
response = httpx.Response(200, content=content)
assert response.json() == {"abc": 123}


@pytest.mark.parametrize(
Expand Down

0 comments on commit e7e7665

Please sign in to comment.