Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2024-08-25 12:54:32 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-08-25 13:03:33 +0300
commit: 4a64a813e1d34e732f35d8a65147974f76395a6f (patch)
tree: a8da0dede5213f85e45b95047cfbdcf5427cf0b7 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parent: e9bbee265681b79a9ef9795bdc84cf6996f9cfec (diff)
download: ydb-4a64a813e1d34e732f35d8a65147974f76395a6f.tar.gz
1 files changed, 41 insertions, 81 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py
index f810f4bc2c..8b1d41adb6 100644
--- a/contrib/python/Twisted/py3/twisted/web/http_headers.py
+++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py
@@ -6,9 +6,9 @@
 An API for storing HTTP header names and values.
 """
 
-from collections.abc import Sequence as _Sequence
 from typing import (
     AnyStr,
+    ClassVar,
     Dict,
     Iterator,
     List,
@@ -26,17 +26,6 @@ from twisted.python.compat import cmp, comparable
 _T = TypeVar("_T")
 
 
-def _dashCapitalize(name: bytes) -> bytes:
-    """
-    Return a byte string which is capitalized using '-' as a word separator.
-
-    @param name: The name of the header to capitalize.
-
-    @return: The given header capitalized using '-' as a word separator.
-    """
-    return b"-".join([word.capitalize() for word in name.split(b"-")])
-
-
 def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes:
     r"""
     Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key
@@ -65,13 +54,17 @@ class Headers:
     and values as opaque byte strings.
 
     @cvar _caseMappings: A L{dict} that maps lowercase header names
-        to their canonicalized representation.
+        to their canonicalized representation, for headers with unconventional
+        capitalization.
+
+    @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
+        canonicalized representation.
 
     @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of
         header values as L{bytes}.
     """
 
-    _caseMappings = {
+    _caseMappings: ClassVar[Dict[bytes, bytes]] = {
         b"content-md5": b"Content-MD5",
         b"dnt": b"DNT",
         b"etag": b"ETag",
@@ -81,6 +74,12 @@ class Headers:
         b"x-xss-protection": b"X-XSS-Protection",
     }
 
+    _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {}
+
+    _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
+
+    __slots__ = ["_rawHeaders"]
+
     def __init__(
         self,
         rawHeaders: Optional[Mapping[AnyStr, Sequence[AnyStr]]] = None,
@@ -112,16 +111,36 @@ class Headers:
 
     def _encodeName(self, name: Union[str, bytes]) -> bytes:
         """
-        Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded
-        bytestring if required.
+        Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
+        encoded bytestring if required.  It will be canonicalized and
+        whitespace-sanitized.
 
         @param name: A HTTP header name
 
         @return: C{name}, encoded if required, lowercased
         """
-        if isinstance(name, str):
-            return name.lower().encode("iso-8859-1")
-        return name.lower()
+        if canonicalName := self._canonicalHeaderCache.get(name, None):
+            return canonicalName
+
+        bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
+
+        if bytes_name.lower() in self._caseMappings:
+            # Some headers have special capitalization:
+            result = self._caseMappings[bytes_name.lower()]
+        else:
+            result = _sanitizeLinearWhitespace(
+                b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
+            )
+
+        # In general, we should only see a very small number of header
+        # variations in the real world, so caching them is fine. However, an
+        # attacker could generate infinite header variations to fill up RAM, so
+        # we cap how many we cache. The performance degradation from lack of
+        # caching won't be that bad, and legit traffic won't hit it.
+        if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
+            self._canonicalHeaderCache[name] = result
+
+        return result
 
     def copy(self):
         """
@@ -151,21 +170,9 @@ class Headers:
         """
         self._rawHeaders.pop(self._encodeName(name), None)
 
-    @overload
-    def setRawHeaders(self, name: Union[str, bytes], values: Sequence[bytes]) -> None:
-        ...
-
-    @overload
-    def setRawHeaders(self, name: Union[str, bytes], values: Sequence[str]) -> None:
-        ...
-
-    @overload
     def setRawHeaders(
         self, name: Union[str, bytes], values: Sequence[Union[str, bytes]]
     ) -> None:
-        ...
-
-    def setRawHeaders(self, name: Union[str, bytes], values: object) -> None:
         """
         Sets the raw representation of the given header.
 
@@ -179,29 +186,7 @@ class Headers:
 
         @return: L{None}
         """
-        if not isinstance(values, _Sequence):
-            raise TypeError(
-                "Header entry %r should be sequence but found "
-                "instance of %r instead" % (name, type(values))
-            )
-
-        if not isinstance(name, (bytes, str)):
-            raise TypeError(
-                f"Header name is an instance of {type(name)!r}, not bytes or str"
-            )
-
-        for count, value in enumerate(values):
-            if not isinstance(value, (bytes, str)):
-                raise TypeError(
-                    "Header value at position %s is an instance of %r, not "
-                    "bytes or str"
-                    % (
-                        count,
-                        type(value),
-                    )
-                )
-
-        _name = _sanitizeLinearWhitespace(self._encodeName(name))
+        _name = self._encodeName(name)
         encodedValues: List[bytes] = []
         for v in values:
             if isinstance(v, str):
@@ -220,20 +205,7 @@ class Headers:
 
         @param value: The value to set for the named header.
         """
-        if not isinstance(name, (bytes, str)):
-            raise TypeError(
-                f"Header name is an instance of {type(name)!r}, not bytes or str"
-            )
-
-        if not isinstance(value, (bytes, str)):
-            raise TypeError(
-                "Header value is an instance of %r, not "
-                "bytes or str" % (type(value),)
-            )
-
-        self._rawHeaders.setdefault(
-            _sanitizeLinearWhitespace(self._encodeName(name)), []
-        ).append(
+        self._rawHeaders.setdefault(self._encodeName(name), []).append(
             _sanitizeLinearWhitespace(
                 value.encode("utf8") if isinstance(value, str) else value
             )
@@ -277,19 +249,7 @@ class Headers:
         object, as L{bytes}.  The keys are capitalized in canonical
         capitalization.
         """
-        for k, v in self._rawHeaders.items():
-            yield self._canonicalNameCaps(k), v
-
-    def _canonicalNameCaps(self, name: bytes) -> bytes:
-        """
-        Return the canonical name for the given header.
-
-        @param name: The all-lowercase header name to capitalize in its
-            canonical form.
-
-        @return: The canonical name of the header.
-        """
-        return self._caseMappings.get(name, _dashCapitalize(name))
+        return iter(self._rawHeaders.items())
 
 
 __all__ = ["Headers"]
author	robot-piglet <robot-piglet@yandex-team.com>	2024-08-25 12:54:32 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-08-25 13:03:33 +0300
commit	4a64a813e1d34e732f35d8a65147974f76395a6f (patch)
tree	a8da0dede5213f85e45b95047cfbdcf5427cf0b7 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parent	e9bbee265681b79a9ef9795bdc84cf6996f9cfec (diff)
download	ydb-4a64a813e1d34e732f35d8a65147974f76395a6f.tar.gz