From ad19d27eb5f0e16131a0f4f7d93853a9cacd9703 Mon Sep 17 00:00:00 2001 From: Jo2234 <64789670+Jo2234@users.noreply.github.com> Date: Sun, 1 Mar 2026 16:05:16 +0800 Subject: [PATCH] feat: add RFC 7616 support for non-Latin credentials in HTTPDigestAuth HTTPDigestAuth currently fails when usernames contain non-Latin-1 characters (e.g., Cyrillic, Czech diacritics). The username is directly interpolated into the Digest header, which either produces garbled output or raises a UnicodeEncodeError. This commit implements RFC 7616 (HTTP Digest Access Authentication) extensions to properly handle non-Latin credentials: 1. username* parameter (RFC 7616 Section 3.4 + RFC 5987): When the username contains characters outside Latin-1, the username* parameter with RFC 5987 encoding is used instead of the standard username parameter. Format: username*=UTF-8''percent-encoded-value 2. userhash support (RFC 7616 Section 3.4.4): When the server sends userhash=true in the WWW-Authenticate challenge, the client hashes the username with the realm using the selected hash algorithm, providing privacy protection. 3. charset support (RFC 7616 Section 3.3): When the server advertises charset=UTF-8, the client echoes it back in the Authorization header. The fix also adds: - _is_latin1_encodable() helper to detect non-Latin-1 strings - _encode_rfc5987() helper for RFC 5987 encoding - Comprehensive test suite in tests/test_digest_rfc7616.py Fixes #6102 --- src/requests/auth.py | 74 ++++++++++++++++++-- tests/test_digest_rfc7616.py | 130 +++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 5 deletions(-) create mode 100644 tests/test_digest_rfc7616.py diff --git a/src/requests/auth.py b/src/requests/auth.py index c39b645189..9ce43fd4b2 100644 --- a/src/requests/auth.py +++ b/src/requests/auth.py @@ -12,6 +12,7 @@ import time import warnings from base64 import b64encode +from urllib.parse import quote from ._internal_utils import to_native_string from .compat import basestring, str, urlparse @@ -123,6 +124,36 @@ def init_per_thread_state(self): self._thread_local.pos = None self._thread_local.num_401_calls = None + @staticmethod + def _is_latin1_encodable(s): + """Check whether string can be encoded as Latin-1 (ISO-8859-1). + + Per RFC 7616 Section 3.4.4, if the username cannot be encoded + as Latin-1, the ``username*`` parameter with RFC 5987 encoding + must be used instead of the ``username`` parameter. + """ + try: + s.encode("latin-1") + return True + except (UnicodeEncodeError, UnicodeDecodeError): + return False + + @staticmethod + def _encode_rfc5987(value): + """Encode a string per RFC 5987 for use in the username* parameter. + + Format: ``UTF-8''percent-encoded-value`` + + Per RFC 5987 Section 3.2.1, attr-char is defined as:: + + attr-char = ALPHA / DIGIT + / "!" / "#" / "$" / "&" / "+" / "-" / "." + / "^" / "_" / "`" / "|" / "~" + + All other characters are percent-encoded as UTF-8 octets. + """ + return "UTF-8''" + quote(value, safe="!#$&+-.^_`|~") + def build_digest_header(self, method, url): """ :rtype: str @@ -135,6 +166,11 @@ def build_digest_header(self, method, url): opaque = self._thread_local.chal.get("opaque") hash_utf8 = None + # RFC 7616 Section 3.4.4: userhash support + userhash = self._thread_local.chal.get("userhash", "false").lower() == "true" + # RFC 7616 Section 3.3: charset support + charset = self._thread_local.chal.get("charset", "").upper() + if algorithm is None: _algorithm = "MD5" else: @@ -217,11 +253,36 @@ def sha512_utf8(x): self._thread_local.last_nonce = nonce - # XXX should the partial digests be encoded too? - base = ( - f'username="{self.username}", realm="{realm}", nonce="{nonce}", ' - f'uri="{path}", response="{respdig}"' - ) + # RFC 7616 Section 3.4: Determine username representation. + # + # If userhash is true, hash the username per RFC 7616 Section 3.4.4: + # userhash = hash(username ":" realm) + # and use the hashed value as the username parameter. + # + # If the username contains characters outside Latin-1, use the + # username* parameter with RFC 5987 encoding per RFC 7616 Section 3.4. + # + # Otherwise, use the standard username parameter. + if userhash: + # RFC 7616 Section 3.4.4: username is hashed + username_value = hash_utf8(f"{self.username}:{realm}") + base = ( + f'username="{username_value}", realm="{realm}", ' + f'nonce="{nonce}", uri="{path}", response="{respdig}", ' + f"userhash=true" + ) + elif not self._is_latin1_encodable(self.username): + # RFC 7616 Section 3.4: use username* with RFC 5987 encoding + username_star = self._encode_rfc5987(self.username) + base = ( + f"username*={username_star}, realm=\"{realm}\", " + f'nonce="{nonce}", uri="{path}", response="{respdig}"' + ) + else: + base = ( + f'username="{self.username}", realm="{realm}", nonce="{nonce}", ' + f'uri="{path}", response="{respdig}"' + ) if opaque: base += f', opaque="{opaque}"' if algorithm: @@ -230,6 +291,9 @@ def sha512_utf8(x): base += f', digest="{entdig}"' if qop: base += f', qop="auth", nc={ncvalue}, cnonce="{cnonce}"' + # RFC 7616 Section 3.3: include charset when server advertised it + if charset == "UTF-8": + base += ', charset="UTF-8"' return f"Digest {base}" diff --git a/tests/test_digest_rfc7616.py b/tests/test_digest_rfc7616.py new file mode 100644 index 0000000000..56d923498c --- /dev/null +++ b/tests/test_digest_rfc7616.py @@ -0,0 +1,130 @@ +"""Tests for RFC 7616 HTTPDigestAuth non-Latin credential handling.""" + +import pytest + +from requests.auth import HTTPDigestAuth + + +class TestDigestAuthRFC7616: + """Test RFC 7616 extensions for HTTP Digest Authentication. + + RFC 7616 adds support for: + - username* parameter with RFC 5987 encoding for non-Latin-1 usernames + - userhash parameter to hash the username for privacy + - charset parameter to indicate UTF-8 support + """ + + def _setup_auth(self, username, password, chal_overrides=None): + """Create an HTTPDigestAuth instance with a pre-populated challenge.""" + auth = HTTPDigestAuth(username, password) + auth.init_per_thread_state() + chal = { + "realm": "test@example.com", + "nonce": "dcd98b7102dd2f0e8b11d0f600bfb0c093", + "qop": "auth", + "algorithm": "MD5", + } + if chal_overrides: + chal.update(chal_overrides) + auth._thread_local.chal = chal + return auth + + def test_latin1_username_uses_standard_parameter(self): + """Latin-1 encodable usernames should use the standard username parameter.""" + auth = self._setup_auth("user", "pass") + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert 'username="user"' in header + assert "username*=" not in header + + def test_non_latin1_username_uses_username_star(self): + """Non-Latin-1 usernames should use the username* parameter (RFC 5987).""" + auth = self._setup_auth("Ondřej", "heslíčko") + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert "username*=UTF-8''" in header + assert 'username="' not in header + + def test_cyrillic_username_uses_username_star(self): + """Cyrillic usernames must use username* since they're not Latin-1.""" + auth = self._setup_auth("Сергей", "пароль") + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert "username*=UTF-8''" in header + assert 'username="' not in header + + def test_latin1_extended_username_uses_standard(self): + """Characters like ü, é, ñ ARE Latin-1 and should use standard username.""" + auth = self._setup_auth("José", "contraseña") + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert 'username="José"' in header + assert "username*=" not in header + + def test_userhash_hashes_username(self): + """When userhash=true, the username should be hashed (RFC 7616 §3.4.4).""" + auth = self._setup_auth("user", "pass", {"userhash": "true"}) + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + # The username in the header should NOT be "user" in plain text + assert 'username="user"' not in header + assert "userhash=true" in header + # It should be a hex hash instead + assert 'username="' in header + + def test_userhash_false_uses_plain_username(self): + """When userhash=false (default), username should appear in plain text.""" + auth = self._setup_auth("user", "pass", {"userhash": "false"}) + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert 'username="user"' in header + assert "userhash=true" not in header + + def test_charset_utf8_included_when_advertised(self): + """charset=UTF-8 should be echoed when the server advertises it.""" + auth = self._setup_auth("user", "pass", {"charset": "UTF-8"}) + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert 'charset="UTF-8"' in header + + def test_charset_not_included_when_not_advertised(self): + """charset should not appear when server doesn't advertise it.""" + auth = self._setup_auth("user", "pass") + header = auth.build_digest_header("GET", "http://example.com/") + assert header is not None + assert "charset" not in header + + def test_is_latin1_encodable(self): + """Test the Latin-1 encoding check helper.""" + assert HTTPDigestAuth._is_latin1_encodable("hello") is True + assert HTTPDigestAuth._is_latin1_encodable("José") is True + assert HTTPDigestAuth._is_latin1_encodable("über") is True + assert HTTPDigestAuth._is_latin1_encodable("Ondřej") is False + assert HTTPDigestAuth._is_latin1_encodable("Сергей") is False + assert HTTPDigestAuth._is_latin1_encodable("日本語") is False + + def test_encode_rfc5987(self): + """Test RFC 5987 encoding of values.""" + # ASCII string should pass through mostly unchanged + result = HTTPDigestAuth._encode_rfc5987("hello") + assert result == "UTF-8''hello" + + # Non-ASCII should be percent-encoded + result = HTTPDigestAuth._encode_rfc5987("Ondřej") + assert result.startswith("UTF-8''Ond") + assert "%C5%99" in result # ř encoded as UTF-8 + + def test_non_latin1_username_generates_valid_response(self): + """Ensure the response digest is still computed correctly with non-Latin usernames.""" + auth = self._setup_auth("Ondřej", "heslíčko") + header = auth.build_digest_header("GET", "http://example.com/dir/index.html") + assert header is not None + assert header.startswith("Digest ") + # Should contain all required digest fields + assert "realm=" in header + assert "nonce=" in header + assert "uri=" in header + assert "response=" in header + assert "qop=" in header + assert "nc=" in header + assert "cnonce=" in header