aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Twisted/py3/twisted/web/http_headers.py
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-11-12 07:54:50 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-11-12 08:05:59 +0300
commit55cec9f6b0618fb3570fc8ef66aad151f4932591 (patch)
tree9198c2ca0b0305269062c3674ce79f19c4990e65 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parentb77b1fbf262ea4f40e33a60ce32c4db4e5e49015 (diff)
downloadydb-55cec9f6b0618fb3570fc8ef66aad151f4932591.tar.gz
Intermediate changes
commit_hash:c229701a8b4f4d9ee57ce1ed763099d862d53fa6
Diffstat (limited to 'contrib/python/Twisted/py3/twisted/web/http_headers.py')
-rw-r--r--contrib/python/Twisted/py3/twisted/web/http_headers.py155
1 files changed, 92 insertions, 63 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py
index 8b1d41adb6..88b653439b 100644
--- a/contrib/python/Twisted/py3/twisted/web/http_headers.py
+++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py
@@ -22,18 +22,26 @@ from typing import (
)
from twisted.python.compat import cmp, comparable
+from twisted.web._abnf import _istoken
+
+
+class InvalidHeaderName(ValueError):
+ """
+ HTTP header names must be tokens, per RFC 9110 section 5.1.
+ """
+
_T = TypeVar("_T")
def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes:
r"""
- Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key
- or value with a single space.
+ Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header
+ value with a single space.
- @param headerComponent: The header key or value to sanitize.
+ @param headerComponent: The header value to sanitize.
- @return: The sanitized header key or value.
+ @return: The sanitized header value.
"""
return b" ".join(headerComponent.splitlines())
@@ -53,31 +61,10 @@ class Headers:
ensure no decoding or encoding is done, and L{Headers} will treat the keys
and values as opaque byte strings.
- @cvar _caseMappings: A L{dict} that maps lowercase header names
- to their canonicalized representation, for headers with unconventional
- capitalization.
-
- @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
- canonicalized representation.
-
@ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of
header values as L{bytes}.
"""
- _caseMappings: ClassVar[Dict[bytes, bytes]] = {
- b"content-md5": b"Content-MD5",
- b"dnt": b"DNT",
- b"etag": b"ETag",
- b"p3p": b"P3P",
- b"te": b"TE",
- b"www-authenticate": b"WWW-Authenticate",
- b"x-xss-protection": b"X-XSS-Protection",
- }
-
- _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {}
-
- _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
-
__slots__ = ["_rawHeaders"]
def __init__(
@@ -109,39 +96,6 @@ class Headers:
)
return NotImplemented
- def _encodeName(self, name: Union[str, bytes]) -> bytes:
- """
- Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
- encoded bytestring if required. It will be canonicalized and
- whitespace-sanitized.
-
- @param name: A HTTP header name
-
- @return: C{name}, encoded if required, lowercased
- """
- if canonicalName := self._canonicalHeaderCache.get(name, None):
- return canonicalName
-
- bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
-
- if bytes_name.lower() in self._caseMappings:
- # Some headers have special capitalization:
- result = self._caseMappings[bytes_name.lower()]
- else:
- result = _sanitizeLinearWhitespace(
- b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
- )
-
- # In general, we should only see a very small number of header
- # variations in the real world, so caching them is fine. However, an
- # attacker could generate infinite header variations to fill up RAM, so
- # we cap how many we cache. The performance degradation from lack of
- # caching won't be that bad, and legit traffic won't hit it.
- if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
- self._canonicalHeaderCache[name] = result
-
- return result
-
def copy(self):
"""
Return a copy of itself with the same headers set.
@@ -158,7 +112,7 @@ class Headers:
@return: C{True} if the header exists, otherwise C{False}.
"""
- return self._encodeName(name) in self._rawHeaders
+ return _nameEncoder.encode(name) in self._rawHeaders
def removeHeader(self, name: AnyStr) -> None:
"""
@@ -168,7 +122,7 @@ class Headers:
@return: L{None}
"""
- self._rawHeaders.pop(self._encodeName(name), None)
+ self._rawHeaders.pop(_nameEncoder.encode(name), None)
def setRawHeaders(
self, name: Union[str, bytes], values: Sequence[Union[str, bytes]]
@@ -186,7 +140,7 @@ class Headers:
@return: L{None}
"""
- _name = self._encodeName(name)
+ _name = _nameEncoder.encode(name)
encodedValues: List[bytes] = []
for v in values:
if isinstance(v, str):
@@ -205,7 +159,7 @@ class Headers:
@param value: The value to set for the named header.
"""
- self._rawHeaders.setdefault(self._encodeName(name), []).append(
+ self._rawHeaders.setdefault(_nameEncoder.encode(name), []).append(
_sanitizeLinearWhitespace(
value.encode("utf8") if isinstance(value, str) else value
)
@@ -234,7 +188,7 @@ class Headers:
@return: If the named header is present, a sequence of its
values. Otherwise, C{default}.
"""
- encodedName = self._encodeName(name)
+ encodedName = _nameEncoder.encode(name)
values = self._rawHeaders.get(encodedName, [])
if not values:
return default
@@ -252,4 +206,79 @@ class Headers:
return iter(self._rawHeaders.items())
+class _NameEncoder:
+ """
+ C{_NameEncoder} converts HTTP header names to L{bytes} and canonicalizies
+ their capitalization.
+
+ @cvar _caseMappings: A L{dict} that maps conventionally-capitalized
+ header names to their canonicalized representation, for headers with
+ unconventional capitalization.
+
+ @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
+ canonicalized representation.
+ """
+
+ __slots__ = ("_canonicalHeaderCache",)
+ _canonicalHeaderCache: Dict[Union[bytes, str], bytes]
+
+ _caseMappings: ClassVar[Dict[bytes, bytes]] = {
+ b"Content-Md5": b"Content-MD5",
+ b"Dnt": b"DNT",
+ b"Etag": b"ETag",
+ b"P3p": b"P3P",
+ b"Te": b"TE",
+ b"Www-Authenticate": b"WWW-Authenticate",
+ b"X-Xss-Protection": b"X-XSS-Protection",
+ }
+
+ _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
+
+ def __init__(self):
+ self._canonicalHeaderCache = {}
+
+ def encode(self, name: Union[str, bytes]) -> bytes:
+ """
+ Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
+ bytestring if required. It will be canonicalized to Http-Header-Case.
+
+ @raises InvalidHeaderName:
+ If the header name contains invalid characters like whitespace
+ or NUL.
+
+ @param name: An HTTP header name
+
+ @return: C{name}, encoded if required, in Header-Case
+ """
+ if canonicalName := self._canonicalHeaderCache.get(name):
+ return canonicalName
+
+ bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
+
+ if not _istoken(bytes_name):
+ raise InvalidHeaderName(bytes_name)
+
+ result = b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
+
+ # Some headers have special capitalization:
+ if result in self._caseMappings:
+ result = self._caseMappings[result]
+
+ # In general, we should only see a very small number of header
+ # variations in the real world, so caching them is fine. However, an
+ # attacker could generate infinite header variations to fill up RAM, so
+ # we cap how many we cache. The performance degradation from lack of
+ # caching won't be that bad, and legit traffic won't hit it.
+ if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
+ self._canonicalHeaderCache[name] = result
+
+ return result
+
+
+_nameEncoder = _NameEncoder()
+"""
+The global name encoder.
+"""
+
+
__all__ = ["Headers"]