aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Twisted/py3/twisted/web/http_headers.py
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-08-25 12:54:32 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-08-25 13:03:33 +0300
commit4a64a813e1d34e732f35d8a65147974f76395a6f (patch)
treea8da0dede5213f85e45b95047cfbdcf5427cf0b7 /contrib/python/Twisted/py3/twisted/web/http_headers.py
parente9bbee265681b79a9ef9795bdc84cf6996f9cfec (diff)
downloadydb-4a64a813e1d34e732f35d8a65147974f76395a6f.tar.gz
Intermediate changes
Diffstat (limited to 'contrib/python/Twisted/py3/twisted/web/http_headers.py')
-rw-r--r--contrib/python/Twisted/py3/twisted/web/http_headers.py122
1 files changed, 41 insertions, 81 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py
index f810f4bc2c..8b1d41adb6 100644
--- a/contrib/python/Twisted/py3/twisted/web/http_headers.py
+++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py
@@ -6,9 +6,9 @@
An API for storing HTTP header names and values.
"""
-from collections.abc import Sequence as _Sequence
from typing import (
AnyStr,
+ ClassVar,
Dict,
Iterator,
List,
@@ -26,17 +26,6 @@ from twisted.python.compat import cmp, comparable
_T = TypeVar("_T")
-def _dashCapitalize(name: bytes) -> bytes:
- """
- Return a byte string which is capitalized using '-' as a word separator.
-
- @param name: The name of the header to capitalize.
-
- @return: The given header capitalized using '-' as a word separator.
- """
- return b"-".join([word.capitalize() for word in name.split(b"-")])
-
-
def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes:
r"""
Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key
@@ -65,13 +54,17 @@ class Headers:
and values as opaque byte strings.
@cvar _caseMappings: A L{dict} that maps lowercase header names
- to their canonicalized representation.
+ to their canonicalized representation, for headers with unconventional
+ capitalization.
+
+ @cvar _canonicalHeaderCache: A L{dict} that maps header names to their
+ canonicalized representation.
@ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of
header values as L{bytes}.
"""
- _caseMappings = {
+ _caseMappings: ClassVar[Dict[bytes, bytes]] = {
b"content-md5": b"Content-MD5",
b"dnt": b"DNT",
b"etag": b"ETag",
@@ -81,6 +74,12 @@ class Headers:
b"x-xss-protection": b"X-XSS-Protection",
}
+ _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {}
+
+ _MAX_CACHED_HEADERS: ClassVar[int] = 10_000
+
+ __slots__ = ["_rawHeaders"]
+
def __init__(
self,
rawHeaders: Optional[Mapping[AnyStr, Sequence[AnyStr]]] = None,
@@ -112,16 +111,36 @@ class Headers:
def _encodeName(self, name: Union[str, bytes]) -> bytes:
"""
- Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded
- bytestring if required.
+ Encode the name of a header (eg 'Content-Type') to an ISO-8859-1
+ encoded bytestring if required. It will be canonicalized and
+ whitespace-sanitized.
@param name: A HTTP header name
@return: C{name}, encoded if required, lowercased
"""
- if isinstance(name, str):
- return name.lower().encode("iso-8859-1")
- return name.lower()
+ if canonicalName := self._canonicalHeaderCache.get(name, None):
+ return canonicalName
+
+ bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name
+
+ if bytes_name.lower() in self._caseMappings:
+ # Some headers have special capitalization:
+ result = self._caseMappings[bytes_name.lower()]
+ else:
+ result = _sanitizeLinearWhitespace(
+ b"-".join([word.capitalize() for word in bytes_name.split(b"-")])
+ )
+
+ # In general, we should only see a very small number of header
+ # variations in the real world, so caching them is fine. However, an
+ # attacker could generate infinite header variations to fill up RAM, so
+ # we cap how many we cache. The performance degradation from lack of
+ # caching won't be that bad, and legit traffic won't hit it.
+ if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS:
+ self._canonicalHeaderCache[name] = result
+
+ return result
def copy(self):
"""
@@ -151,21 +170,9 @@ class Headers:
"""
self._rawHeaders.pop(self._encodeName(name), None)
- @overload
- def setRawHeaders(self, name: Union[str, bytes], values: Sequence[bytes]) -> None:
- ...
-
- @overload
- def setRawHeaders(self, name: Union[str, bytes], values: Sequence[str]) -> None:
- ...
-
- @overload
def setRawHeaders(
self, name: Union[str, bytes], values: Sequence[Union[str, bytes]]
) -> None:
- ...
-
- def setRawHeaders(self, name: Union[str, bytes], values: object) -> None:
"""
Sets the raw representation of the given header.
@@ -179,29 +186,7 @@ class Headers:
@return: L{None}
"""
- if not isinstance(values, _Sequence):
- raise TypeError(
- "Header entry %r should be sequence but found "
- "instance of %r instead" % (name, type(values))
- )
-
- if not isinstance(name, (bytes, str)):
- raise TypeError(
- f"Header name is an instance of {type(name)!r}, not bytes or str"
- )
-
- for count, value in enumerate(values):
- if not isinstance(value, (bytes, str)):
- raise TypeError(
- "Header value at position %s is an instance of %r, not "
- "bytes or str"
- % (
- count,
- type(value),
- )
- )
-
- _name = _sanitizeLinearWhitespace(self._encodeName(name))
+ _name = self._encodeName(name)
encodedValues: List[bytes] = []
for v in values:
if isinstance(v, str):
@@ -220,20 +205,7 @@ class Headers:
@param value: The value to set for the named header.
"""
- if not isinstance(name, (bytes, str)):
- raise TypeError(
- f"Header name is an instance of {type(name)!r}, not bytes or str"
- )
-
- if not isinstance(value, (bytes, str)):
- raise TypeError(
- "Header value is an instance of %r, not "
- "bytes or str" % (type(value),)
- )
-
- self._rawHeaders.setdefault(
- _sanitizeLinearWhitespace(self._encodeName(name)), []
- ).append(
+ self._rawHeaders.setdefault(self._encodeName(name), []).append(
_sanitizeLinearWhitespace(
value.encode("utf8") if isinstance(value, str) else value
)
@@ -277,19 +249,7 @@ class Headers:
object, as L{bytes}. The keys are capitalized in canonical
capitalization.
"""
- for k, v in self._rawHeaders.items():
- yield self._canonicalNameCaps(k), v
-
- def _canonicalNameCaps(self, name: bytes) -> bytes:
- """
- Return the canonical name for the given header.
-
- @param name: The all-lowercase header name to capitalize in its
- canonical form.
-
- @return: The canonical name of the header.
- """
- return self._caseMappings.get(name, _dashCapitalize(name))
+ return iter(self._rawHeaders.items())
__all__ = ["Headers"]