Intermediate changes

commit_hash:c229701a8b4f4d9ee57ce1ed763099d862d53fa6
author: robot-piglet <robot-piglet@yandex-team.com> 2024-11-12 07:54:50 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-11-12 08:05:59 +0300
commit: 55cec9f6b0618fb3570fc8ef66aad151f4932591 (patch)
tree: 9198c2ca0b0305269062c3674ce79f19c4990e65 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parent: b77b1fbf262ea4f40e33a60ce32c4db4e5e49015 (diff)
download: ydb-55cec9f6b0618fb3570fc8ef66aad151f4932591.tar.gz
1 files changed, 92 insertions, 63 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py
index 8b1d41adb64..88b653439b5 100644
--- a/contrib/python/Twisted/py3/twisted/web/http_headers.py
+++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py
@@ -22,18 +22,26 @@ from typing import (
 )
 
 from twisted.python.compat import cmp, comparable
+from twisted.web._abnf import _istoken
+
+
+class InvalidHeaderName(ValueError):
+    """
+    HTTP header names must be tokens, per RFC 9110 section 5.1.
+    """
+
 
 _T = TypeVar("_T")
 
 
 def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes:
     r"""
-    Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key
-    or value with a single space.
+    Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header
+    value with a single space.
 
-    @param headerComponent: The header key or value to sanitize.
+    @param headerComponent: The header value to sanitize.
 
-    @return: The sanitized header key or value.
+    @return: The sanitized header value.
     """
     return b" ".join(headerComponent.splitlines())
 
@@ -53,31 +61,10 @@ class Headers:
     ensure no decoding or encoding is done, and L{Headers} will treat the keys
     and values as opaque byte strings.
 
-    @cvar _caseMappings: A L{dict} that maps lowercase header names
-        to their canonicalized representation, for headers with unconventional
-        capitalization.
-
-    @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
-        canonicalized representation.
-
     @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of
         header values as L{bytes}.
     """
 
-    _caseMappings: ClassVar[Dict[bytes, bytes]] = {
-        b"content-md5": b"Content-MD5",
-        b"dnt": b"DNT",
-        b"etag": b"ETag",
-        b"p3p": b"P3P",
-        b"te": b"TE",
-        b"www-authenticate": b"WWW-Authenticate",
-        b"x-xss-protection": b"X-XSS-Protection",
-    }
-
-    _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {}
-
-    _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
-
     __slots__ = ["_rawHeaders"]
 
     def __init__(
@@ -109,39 +96,6 @@ class Headers:
             )
         return NotImplemented
 
-    def _encodeName(self, name: Union[str, bytes]) -> bytes:
-        """
-        Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
-        encoded bytestring if required.  It will be canonicalized and
-        whitespace-sanitized.
-
-        @param name: A HTTP header name
-
-        @return: C{name}, encoded if required, lowercased
-        """
-        if canonicalName := self._canonicalHeaderCache.get(name, None):
-            return canonicalName
-
-        bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
-
-        if bytes_name.lower() in self._caseMappings:
-            # Some headers have special capitalization:
-            result = self._caseMappings[bytes_name.lower()]
-        else:
-            result = _sanitizeLinearWhitespace(
-                b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
-            )
-
-        # In general, we should only see a very small number of header
-        # variations in the real world, so caching them is fine. However, an
-        # attacker could generate infinite header variations to fill up RAM, so
-        # we cap how many we cache. The performance degradation from lack of
-        # caching won't be that bad, and legit traffic won't hit it.
-        if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
-            self._canonicalHeaderCache[name] = result
-
-        return result
-
     def copy(self):
         """
         Return a copy of itself with the same headers set.
@@ -158,7 +112,7 @@ class Headers:
 
         @return: C{True} if the header exists, otherwise C{False}.
         """
-        return self._encodeName(name) in self._rawHeaders
+        return _nameEncoder.encode(name) in self._rawHeaders
 
     def removeHeader(self, name: AnyStr) -> None:
         """
@@ -168,7 +122,7 @@ class Headers:
 
         @return: L{None}
         """
-        self._rawHeaders.pop(self._encodeName(name), None)
+        self._rawHeaders.pop(_nameEncoder.encode(name), None)
 
     def setRawHeaders(
         self, name: Union[str, bytes], values: Sequence[Union[str, bytes]]
@@ -186,7 +140,7 @@ class Headers:
 
         @return: L{None}
         """
-        _name = self._encodeName(name)
+        _name = _nameEncoder.encode(name)
         encodedValues: List[bytes] = []
         for v in values:
             if isinstance(v, str):
@@ -205,7 +159,7 @@ class Headers:
 
         @param value: The value to set for the named header.
         """
-        self._rawHeaders.setdefault(self._encodeName(name), []).append(
+        self._rawHeaders.setdefault(_nameEncoder.encode(name), []).append(
             _sanitizeLinearWhitespace(
                 value.encode("utf8") if isinstance(value, str) else value
             )
@@ -234,7 +188,7 @@ class Headers:
         @return: If the named header is present, a sequence of its
             values.  Otherwise, C{default}.
         """
-        encodedName = self._encodeName(name)
+        encodedName = _nameEncoder.encode(name)
         values = self._rawHeaders.get(encodedName, [])
         if not values:
             return default
@@ -252,4 +206,79 @@ class Headers:
         return iter(self._rawHeaders.items())
 
 
+class _NameEncoder:
+    """
+    C{_NameEncoder} converts HTTP header names to L{bytes} and canonicalizies
+    their capitalization.
+
+    @cvar _caseMappings: A L{dict} that maps conventionally-capitalized
+        header names to their canonicalized representation, for headers with
+        unconventional capitalization.
+
+    @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
+        canonicalized representation.
+    """
+
+    __slots__ = ("_canonicalHeaderCache",)
+    _canonicalHeaderCache: Dict[Union[bytes, str], bytes]
+
+    _caseMappings: ClassVar[Dict[bytes, bytes]] = {
+        b"Content-Md5": b"Content-MD5",
+        b"Dnt": b"DNT",
+        b"Etag": b"ETag",
+        b"P3p": b"P3P",
+        b"Te": b"TE",
+        b"Www-Authenticate": b"WWW-Authenticate",
+        b"X-Xss-Protection": b"X-XSS-Protection",
+    }
+
+    _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
+
+    def __init__(self):
+        self._canonicalHeaderCache = {}
+
+    def encode(self, name: Union[str, bytes]) -> bytes:
+        """
+        Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
+        bytestring if required. It will be canonicalized to Http-Header-Case.
+
+        @raises InvalidHeaderName:
+            If the header name contains invalid characters like whitespace
+            or NUL.
+
+        @param name: An HTTP header name
+
+        @return: C{name}, encoded if required, in Header-Case
+        """
+        if canonicalName := self._canonicalHeaderCache.get(name):
+            return canonicalName
+
+        bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
+
+        if not _istoken(bytes_name):
+            raise InvalidHeaderName(bytes_name)
+
+        result = b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
+
+        # Some headers have special capitalization:
+        if result in self._caseMappings:
+            result = self._caseMappings[result]
+
+        # In general, we should only see a very small number of header
+        # variations in the real world, so caching them is fine. However, an
+        # attacker could generate infinite header variations to fill up RAM, so
+        # we cap how many we cache. The performance degradation from lack of
+        # caching won't be that bad, and legit traffic won't hit it.
+        if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
+            self._canonicalHeaderCache[name] = result
+
+        return result
+
+
+_nameEncoder = _NameEncoder()
+"""
+The global name encoder.
+"""
+
+
 __all__ = ["Headers"]
author	robot-piglet <robot-piglet@yandex-team.com>	2024-11-12 07:54:50 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-11-12 08:05:59 +0300
commit	55cec9f6b0618fb3570fc8ef66aad151f4932591 (patch)
tree	9198c2ca0b0305269062c3674ce79f19c4990e65 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parent	b77b1fbf262ea4f40e33a60ce32c4db4e5e49015 (diff)
download	ydb-55cec9f6b0618fb3570fc8ef66aad151f4932591.tar.gz