diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-11-12 07:54:50 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-11-12 08:05:59 +0300 |
commit | 55cec9f6b0618fb3570fc8ef66aad151f4932591 (patch) | |
tree | 9198c2ca0b0305269062c3674ce79f19c4990e65 /contrib/python/Twisted/py3/twisted/web/http_headers.py | |
parent | b77b1fbf262ea4f40e33a60ce32c4db4e5e49015 (diff) | |
download | ydb-55cec9f6b0618fb3570fc8ef66aad151f4932591.tar.gz |
Intermediate changes
commit_hash:c229701a8b4f4d9ee57ce1ed763099d862d53fa6
Diffstat (limited to 'contrib/python/Twisted/py3/twisted/web/http_headers.py')
-rw-r--r-- | contrib/python/Twisted/py3/twisted/web/http_headers.py | 155 |
1 files changed, 92 insertions, 63 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py index 8b1d41adb6..88b653439b 100644 --- a/contrib/python/Twisted/py3/twisted/web/http_headers.py +++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py @@ -22,18 +22,26 @@ from typing import ( ) from twisted.python.compat import cmp, comparable +from twisted.web._abnf import _istoken + + +class InvalidHeaderName(ValueError): + """ + HTTP header names must be tokens, per RFC 9110 section 5.1. + """ + _T = TypeVar("_T") def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes: r""" - Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key - or value with a single space. + Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header + value with a single space. - @param headerComponent: The header key or value to sanitize. + @param headerComponent: The header value to sanitize. - @return: The sanitized header key or value. + @return: The sanitized header value. """ return b" ".join(headerComponent.splitlines()) @@ -53,31 +61,10 @@ class Headers: ensure no decoding or encoding is done, and L{Headers} will treat the keys and values as opaque byte strings. - @cvar _caseMappings: A L{dict} that maps lowercase header names - to their canonicalized representation, for headers with unconventional - capitalization. - - @cvar _canonicalHeaderCache: A L{dict} that maps header names to their - canonicalized representation. - @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of header values as L{bytes}. """ - _caseMappings: ClassVar[Dict[bytes, bytes]] = { - b"content-md5": b"Content-MD5", - b"dnt": b"DNT", - b"etag": b"ETag", - b"p3p": b"P3P", - b"te": b"TE", - b"www-authenticate": b"WWW-Authenticate", - b"x-xss-protection": b"X-XSS-Protection", - } - - _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {} - - _MAX_CACHED_HEADERS: ClassVar[int] = 10_000 - __slots__ = ["_rawHeaders"] def __init__( @@ -109,39 +96,6 @@ class Headers: ) return NotImplemented - def _encodeName(self, name: Union[str, bytes]) -> bytes: - """ - Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 - encoded bytestring if required. It will be canonicalized and - whitespace-sanitized. - - @param name: A HTTP header name - - @return: C{name}, encoded if required, lowercased - """ - if canonicalName := self._canonicalHeaderCache.get(name, None): - return canonicalName - - bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name - - if bytes_name.lower() in self._caseMappings: - # Some headers have special capitalization: - result = self._caseMappings[bytes_name.lower()] - else: - result = _sanitizeLinearWhitespace( - b"-".join([word.capitalize() for word in bytes_name.split(b"-")]) - ) - - # In general, we should only see a very small number of header - # variations in the real world, so caching them is fine. However, an - # attacker could generate infinite header variations to fill up RAM, so - # we cap how many we cache. The performance degradation from lack of - # caching won't be that bad, and legit traffic won't hit it. - if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS: - self._canonicalHeaderCache[name] = result - - return result - def copy(self): """ Return a copy of itself with the same headers set. @@ -158,7 +112,7 @@ class Headers: @return: C{True} if the header exists, otherwise C{False}. """ - return self._encodeName(name) in self._rawHeaders + return _nameEncoder.encode(name) in self._rawHeaders def removeHeader(self, name: AnyStr) -> None: """ @@ -168,7 +122,7 @@ class Headers: @return: L{None} """ - self._rawHeaders.pop(self._encodeName(name), None) + self._rawHeaders.pop(_nameEncoder.encode(name), None) def setRawHeaders( self, name: Union[str, bytes], values: Sequence[Union[str, bytes]] @@ -186,7 +140,7 @@ class Headers: @return: L{None} """ - _name = self._encodeName(name) + _name = _nameEncoder.encode(name) encodedValues: List[bytes] = [] for v in values: if isinstance(v, str): @@ -205,7 +159,7 @@ class Headers: @param value: The value to set for the named header. """ - self._rawHeaders.setdefault(self._encodeName(name), []).append( + self._rawHeaders.setdefault(_nameEncoder.encode(name), []).append( _sanitizeLinearWhitespace( value.encode("utf8") if isinstance(value, str) else value ) @@ -234,7 +188,7 @@ class Headers: @return: If the named header is present, a sequence of its values. Otherwise, C{default}. """ - encodedName = self._encodeName(name) + encodedName = _nameEncoder.encode(name) values = self._rawHeaders.get(encodedName, []) if not values: return default @@ -252,4 +206,79 @@ class Headers: return iter(self._rawHeaders.items()) +class _NameEncoder: + """ + C{_NameEncoder} converts HTTP header names to L{bytes} and canonicalizies + their capitalization. + + @cvar _caseMappings: A L{dict} that maps conventionally-capitalized + header names to their canonicalized representation, for headers with + unconventional capitalization. + + @cvar _canonicalHeaderCache: A L{dict} that maps header names to their + canonicalized representation. + """ + + __slots__ = ("_canonicalHeaderCache",) + _canonicalHeaderCache: Dict[Union[bytes, str], bytes] + + _caseMappings: ClassVar[Dict[bytes, bytes]] = { + b"Content-Md5": b"Content-MD5", + b"Dnt": b"DNT", + b"Etag": b"ETag", + b"P3p": b"P3P", + b"Te": b"TE", + b"Www-Authenticate": b"WWW-Authenticate", + b"X-Xss-Protection": b"X-XSS-Protection", + } + + _MAX_CACHED_HEADERS: ClassVar[int] = 10_000 + + def __init__(self): + self._canonicalHeaderCache = {} + + def encode(self, name: Union[str, bytes]) -> bytes: + """ + Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 + bytestring if required. It will be canonicalized to Http-Header-Case. + + @raises InvalidHeaderName: + If the header name contains invalid characters like whitespace + or NUL. + + @param name: An HTTP header name + + @return: C{name}, encoded if required, in Header-Case + """ + if canonicalName := self._canonicalHeaderCache.get(name): + return canonicalName + + bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name + + if not _istoken(bytes_name): + raise InvalidHeaderName(bytes_name) + + result = b"-".join([word.capitalize() for word in bytes_name.split(b"-")]) + + # Some headers have special capitalization: + if result in self._caseMappings: + result = self._caseMappings[result] + + # In general, we should only see a very small number of header + # variations in the real world, so caching them is fine. However, an + # attacker could generate infinite header variations to fill up RAM, so + # we cap how many we cache. The performance degradation from lack of + # caching won't be that bad, and legit traffic won't hit it. + if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS: + self._canonicalHeaderCache[name] = result + + return result + + +_nameEncoder = _NameEncoder() +""" +The global name encoder. +""" + + __all__ = ["Headers"] |