diff options
| author | robot-piglet <[email protected]> | 2024-08-25 12:54:32 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2024-08-25 13:03:33 +0300 |
| commit | 4a64a813e1d34e732f35d8a65147974f76395a6f (patch) | |
| tree | a8da0dede5213f85e45b95047cfbdcf5427cf0b7 /contrib/python/Twisted/py3/twisted/web | |
| parent | e9bbee265681b79a9ef9795bdc84cf6996f9cfec (diff) | |
Intermediate changes
Diffstat (limited to 'contrib/python/Twisted/py3/twisted/web')
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/_flatten.py | 7 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/_http2.py | 11 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/_newclient.py | 201 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/_responses.py | 2 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/_template_util.py | 30 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/client.py | 2 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/http.py | 328 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/http_headers.py | 122 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/iweb.py | 7 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/resource.py | 68 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/server.py | 47 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/soap.py | 166 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/test/requesthelper.py | 4 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/util.py | 2 | ||||
| -rw-r--r-- | contrib/python/Twisted/py3/twisted/web/wsgi.py | 122 |
15 files changed, 525 insertions, 594 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/_flatten.py b/contrib/python/Twisted/py3/twisted/web/_flatten.py index 87a8bf2dfbf..12691b87fa8 100644 --- a/contrib/python/Twisted/py3/twisted/web/_flatten.py +++ b/contrib/python/Twisted/py3/twisted/web/_flatten.py @@ -418,7 +418,6 @@ async def _flattenTree( while stack: try: - frame = stack[-1].gi_frame element = next(stack[-1]) if isinstance(element, Deferred): # Before suspending flattening for an unknown amount of time, @@ -428,11 +427,11 @@ async def _flattenTree( except StopIteration: stack.pop() except Exception as e: - stack.pop() roots = [] for generator in stack: - roots.append(generator.gi_frame.f_locals["root"]) - roots.append(frame.f_locals["root"]) + if generator.gi_frame is not None: + roots.append(generator.gi_frame.f_locals["root"]) + stack.pop() raise FlattenerError(e, roots, extract_tb(exc_info()[2])) else: stack.append(element) diff --git a/contrib/python/Twisted/py3/twisted/web/_http2.py b/contrib/python/Twisted/py3/twisted/web/_http2.py index 24c24fc0ffe..f048c7335ec 100644 --- a/contrib/python/Twisted/py3/twisted/web/_http2.py +++ b/contrib/python/Twisted/py3/twisted/web/_http2.py @@ -1073,10 +1073,15 @@ class H2Stream: @type reason: L{bytes} @param headers: The HTTP response headers. - @type headers: Any iterable of two-tuples of L{bytes}, representing header - names and header values. + @type headers: L{twisted.web.http_headers.Headers} """ - self._conn.writeHeaders(version, code, reason, headers, self.streamID) + self._conn.writeHeaders( + version, + code, + reason, + [(k, v) for (k, values) in headers.getAllRawHeaders() for v in values], + self.streamID, + ) def requestDone(self, request): """ diff --git a/contrib/python/Twisted/py3/twisted/web/_newclient.py b/contrib/python/Twisted/py3/twisted/web/_newclient.py index 6fd1ac21bab..a151bdae05c 100644 --- a/contrib/python/Twisted/py3/twisted/web/_newclient.py +++ b/contrib/python/Twisted/py3/twisted/web/_newclient.py @@ -26,17 +26,14 @@ Various other classes in this module support this usage: response. """ +from __future__ import annotations + import re +from typing import TYPE_CHECKING, Optional from zope.interface import implementer -from twisted.internet.defer import ( - CancelledError, - Deferred, - fail, - maybeDeferred, - succeed, -) +from twisted.internet.defer import CancelledError, Deferred, fail, succeed from twisted.internet.error import ConnectionDone from twisted.internet.interfaces import IConsumer, IPushProducer from twisted.internet.protocol import Protocol @@ -45,7 +42,6 @@ from twisted.protocols.basic import LineReceiver from twisted.python.compat import networkString from twisted.python.components import proxyForInterface from twisted.python.failure import Failure -from twisted.python.reflect import fullyQualifiedName from twisted.web.http import ( NO_CONTENT, NOT_MODIFIED, @@ -183,21 +179,6 @@ class RequestNotSent(Exception): """ -def _callAppFunction(function): - """ - Call C{function}. If it raises an exception, log it with a minimal - description of the source. - - @return: L{None} - """ - try: - function() - except BaseException: - _moduleLog.failure( - "Unexpected exception from {name}", name=fullyQualifiedName(function) - ) - - class HTTPParser(LineReceiver): """ L{HTTPParser} handles the parsing side of HTTP processing. With a suitable @@ -207,6 +188,10 @@ class HTTPParser(LineReceiver): @ivar headers: All of the non-connection control message headers yet received. + @ivar connHeaders: All of the connection control message headers yet + received. See L{CONNECTION_CONTROL_HEADERS} and + L{isConnectionControlHeader}. + @ivar state: State indicator for the response parsing state machine. One of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}. @@ -342,6 +327,15 @@ class HTTPParser(LineReceiver): self.switchToBodyMode(None) +_ignoreDecoderErrors = _moduleLog.failureHandler("while interacting with body decoder:") +_ignoreStopProducerStopWriting = _moduleLog.failureHandler( + "while calling stopProducing() in stopWriting():" +) +_ignoreStopProducerWrite = _moduleLog.failureHandler( + "while calling stopProducing() in write():" +) + + class HTTPClientParser(HTTPParser): """ An HTTP parser which only handles HTTP responses. @@ -367,7 +361,7 @@ class HTTPClientParser(HTTPParser): b"chunked": _ChunkedTransferDecoder, } - bodyDecoder = None + bodyDecoder: _IdentityTransferDecoder | None = None _log = Logger() def __init__(self, request, finisher): @@ -389,6 +383,11 @@ class HTTPClientParser(HTTPParser): b'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError on bad syntax. """ + # Vast majority of the time this will be the response, so just + # immediately return the result: + if strversion == b"HTTP/1.1": + return (b"HTTP", 1, 1) + try: proto, strnumber = strversion.split(b"/") major, minor = strnumber.split(b".") @@ -497,18 +496,9 @@ class HTTPClientParser(HTTPParser): # allow the transfer decoder to set the response object's # length attribute. else: - contentLengthHeaders = self.connHeaders.getRawHeaders(b"content-length") - if contentLengthHeaders is None: - contentLength = None - elif len(contentLengthHeaders) == 1: - contentLength = int(contentLengthHeaders[0]) + contentLength = _contentLength(self.connHeaders) + if contentLength is not None: self.response.length = contentLength - else: - # "HTTP Message Splitting" or "HTTP Response Smuggling" - # potentially happening. Or it's just a buggy server. - raise ValueError( - "Too many Content-Length headers; " "response is invalid" - ) if contentLength == 0: self._finished(self.clearLineBuffer()) @@ -539,9 +529,14 @@ class HTTPClientParser(HTTPParser): self._responseDeferred.callback(self.response) del self._responseDeferred - def connectionLost(self, reason): + def connectionLost(self, reason: Failure | None = None) -> None: if self.bodyDecoder is not None: - try: + # Handle exceptions from both the body decoder itself and the + # various invocations of _bodyDataFinished; treat them all as + # application code. The response is part of the HTTP server and + # really shouldn't raise exceptions, but maybe there's some buggy + # application code somewhere making things difficult. + with _ignoreDecoderErrors: try: self.bodyDecoder.noMoreData() except PotentialDataLoss: @@ -552,12 +547,6 @@ class HTTPClientParser(HTTPParser): ) else: self.response._bodyDataFinished() - except BaseException: - # Handle exceptions from both the except suites and the else - # suite. Those functions really shouldn't raise exceptions, - # but maybe there's some buggy application code somewhere - # making things difficult. - self._log.failure("") elif self.state != DONE: if self._everReceivedData: exceptionClass = ResponseFailed @@ -589,7 +578,7 @@ _VALID_METHOD = re.compile( b"~", b"\x30-\x39", b"\x41-\x5a", - b"\x61-\x7A", + b"\x61-\x7a", ), ), ), @@ -645,6 +634,77 @@ def _ensureValidURI(uri): raise ValueError(f"Invalid URI {uri!r}") +def _decint(data: bytes) -> int: + """ + Parse a decimal integer of the form C{1*DIGIT}, i.e. consisting only of + decimal digits. The integer may be embedded in whitespace (space and + horizontal tab). This differs from the built-in L{int()} function by + disallowing a leading C{+} character and various forms of whitespace + (note that we sanitize linear whitespace in header values in + L{twisted.web.http_headers.Headers}). + + @param data: Value to parse. + + @returns: A non-negative integer. + + @raises ValueError: When I{value} contains non-decimal characters. + """ + data = data.strip(b" \t") + if not data.isdigit(): + raise ValueError(f"Value contains non-decimal digits: {data!r}") + return int(data) + + +def _contentLength(connHeaders: Headers) -> Optional[int]: + """ + Parse the I{Content-Length} connection header. + + Two forms of duplicates are permitted. Header repetition: + + Content-Length: 42 + Content-Length: 42 + + And field value repetition: + + Content-Length: 42, 42 + + Duplicates are only permitted if they have the same decimal value + (so C{7, 007} are also permitted). + + @param connHeaders: Connection headers per L{HTTPParser.connHeaders} + + @returns: A non-negative number of octets, or L{None} when there is + no I{Content-Length} header. + + @raises ValueError: when there are conflicting headers, a header value + isn't an integer, or a header value is negative. + + @see: U{https://datatracker.ietf.org/doc/html/rfc9110#section-8.6} + """ + headers = connHeaders.getRawHeaders(b"content-length") + if headers is None: + return None + + if len(headers) > 1: + fieldValues = b",".join(headers) + else: + [fieldValues] = headers + + if b"," in fieldValues: + # Duplicates of the form b'42, 42' are allowed. + values = {_decint(v) for v in fieldValues.split(b",")} + if len(values) != 1: + # "HTTP Message Splitting" or "HTTP Response Smuggling" + # potentially happening. Or it's just a buggy server. + raise ValueError( + f"Invalid response: conflicting Content-Length headers: {fieldValues!r}" + ) + [value] = values + else: + value = _decint(fieldValues) + return value + + @implementer(IClientRequest) class Request: """ @@ -929,12 +989,13 @@ class Request: self._writeToEmptyBodyContentLength(transport) else: self._writeHeaders(transport, None) + return succeed(None) elif self.bodyProducer.length is UNKNOWN_LENGTH: return self._writeToBodyProducerChunked(transport) else: return self._writeToBodyProducerContentLength(transport) - def stopWriting(self): + def stopWriting(self) -> None: """ Stop writing this request to the transport. This can only be called after C{writeTo} and before the L{Deferred} returned by C{writeTo} @@ -944,7 +1005,8 @@ class Request: """ # If bodyProducer is None, then the Deferred returned by writeTo has # fired already and this method cannot be called. - _callAppFunction(self.bodyProducer.stopProducing) + with _ignoreStopProducerStopWriting: + self.bodyProducer.stopProducing() class LengthEnforcingConsumer: @@ -1001,7 +1063,8 @@ class LengthEnforcingConsumer: # we still have _finished which we can use to report the error to a # better place than the direct caller of this method (some # arbitrary application code). - _callAppFunction(self._producer.stopProducing) + with _ignoreStopProducerWrite: + self._producer.stopProducing() self._finished.errback(WrongBodyLength("too many bytes written")) self._allowNoMoreWrites() @@ -1034,9 +1097,10 @@ def makeStatefulDispatcher(name, template): @return: The dispatcher function. """ + pfx = f"_{name}_" def dispatcher(self, *args, **kwargs): - func = getattr(self, "_" + name + "_" + self._state, None) + func = getattr(self, f"{pfx}{self._state}", None) if func is None: raise RuntimeError(f"{self!r} has no {name} method in state {self._state}") return func(*args, **kwargs) @@ -1270,7 +1334,9 @@ class Response: """ self._state = "DEFERRED_CLOSE" if reason is None: - reason = Failure(ResponseDone("Response body fully received")) + reason = Failure._withoutTraceback( + ResponseDone("Response body fully received") + ) self._reason = reason def _bodyDataFinished_CONNECTED(self, reason=None): @@ -1278,7 +1344,9 @@ class Response: Disconnect the protocol and move to the C{'FINISHED'} state. """ if reason is None: - reason = Failure(ResponseDone("Response body fully received")) + reason = Failure._withoutTraceback( + ResponseDone("Response body fully received") + ) self._bodyProtocol.connectionLost(reason) self._bodyProtocol = None self._state = "FINISHED" @@ -1468,11 +1536,11 @@ class HTTP11ClientProtocol(Protocol): """ _state = "QUIESCENT" - _parser = None - _finishedRequest = None - _currentRequest = None + _parser: HTTPClientParser | None = None + _finishedRequest: Deferred[Response] | None = None + _currentRequest: Request | None = None _transportProxy = None - _responseDeferred = None + _responseDeferred: Deferred[Response] | None = None _log = Logger() def __init__(self, quiescentCallback=lambda c: None): @@ -1506,7 +1574,10 @@ class HTTP11ClientProtocol(Protocol): return fail(RequestNotSent()) self._state = "TRANSMITTING" - _requestDeferred = maybeDeferred(request.writeTo, self.transport) + try: + _requestDeferred = request.writeTo(self.transport) + except BaseException: + _requestDeferred = fail() def cancelRequest(ign): # Explicitly cancel the request's deferred if it's still trying to @@ -1550,7 +1621,7 @@ class HTTP11ClientProtocol(Protocol): return self._finishedRequest - def _finishResponse(self, rest): + def _finishResponse(self, rest: bytes) -> None: """ Called by an L{HTTPClientParser} to indicate that it has parsed a complete response. @@ -1562,10 +1633,16 @@ class HTTP11ClientProtocol(Protocol): _finishResponse = makeStatefulDispatcher("finishResponse", _finishResponse) - def _finishResponse_WAITING(self, rest): + def _finishResponse_WAITING(self, rest: bytes) -> None: # Currently the rest parameter is ignored. Don't forget to use it if # we ever add support for pipelining. And maybe check what trailers # mean. + if TYPE_CHECKING: + assert self._responseDeferred is not None + assert self._finishedRequest is not None + assert self._currentRequest is not None + assert self.transport is not None + if self._state == "WAITING": self._state = "QUIESCENT" else: @@ -1590,20 +1667,20 @@ class HTTP11ClientProtocol(Protocol): or self._state != "QUIESCENT" or not self._currentRequest.persistent ): - self._giveUp(Failure(reason)) + self._giveUp(Failure._withoutTraceback(reason)) else: # Just in case we had paused the transport, resume it before # considering it quiescent again. - self.transport.resumeProducing() + producer: IPushProducer = self.transport # type:ignore[assignment] + producer.resumeProducing() # We call the quiescent callback first, to ensure connection gets # added back to connection pool before we finish the request. - try: + with _moduleLog.failuresHandled("while invoking quiescent callback:") as op: self._quiescentCallback(self) - except BaseException: + if op.failed: # If callback throws exception, just log it and disconnect; # keeping persistent connections around is an optimisation: - self._log.failure("") self.transport.loseConnection() self._disconnectParser(reason) diff --git a/contrib/python/Twisted/py3/twisted/web/_responses.py b/contrib/python/Twisted/py3/twisted/web/_responses.py index 2b932293503..5d87fdc597b 100644 --- a/contrib/python/Twisted/py3/twisted/web/_responses.py +++ b/contrib/python/Twisted/py3/twisted/web/_responses.py @@ -46,6 +46,7 @@ REQUEST_URI_TOO_LONG = 414 UNSUPPORTED_MEDIA_TYPE = 415 REQUESTED_RANGE_NOT_SATISFIABLE = 416 EXPECTATION_FAILED = 417 +IM_A_TEAPOT = 418 INTERNAL_SERVER_ERROR = 500 NOT_IMPLEMENTED = 501 @@ -98,6 +99,7 @@ RESPONSES = { UNSUPPORTED_MEDIA_TYPE: b"Unsupported Media Type", REQUESTED_RANGE_NOT_SATISFIABLE: b"Requested Range not satisfiable", EXPECTATION_FAILED: b"Expectation Failed", + IM_A_TEAPOT: b"I'm a teapot", # 500 INTERNAL_SERVER_ERROR: b"Internal Server Error", NOT_IMPLEMENTED: b"Not Implemented", diff --git a/contrib/python/Twisted/py3/twisted/web/_template_util.py b/contrib/python/Twisted/py3/twisted/web/_template_util.py index 230c33f3e8f..501941ad121 100644 --- a/contrib/python/Twisted/py3/twisted/web/_template_util.py +++ b/contrib/python/Twisted/py3/twisted/web/_template_util.py @@ -92,7 +92,7 @@ def redirectTo(URL: bytes, request: IRequest) -> bytes: </body> </html> """ % { - b"url": URL + b"url": escape(URL.decode("utf-8")).encode("utf-8") } return content @@ -118,34 +118,6 @@ class Redirect(resource.Resource): return self -# FIXME: This is totally broken, see https://twistedmatrix.com/trac/ticket/9838 -class ChildRedirector(Redirect): - isLeaf = False - - def __init__(self, url): - # XXX is this enough? - if ( - (url.find("://") == -1) - and (not url.startswith("..")) - and (not url.startswith("/")) - ): - raise ValueError( - ( - "It seems you've given me a redirect (%s) that is a child of" - " myself! That's not good, it'll cause an infinite redirect." - ) - % url - ) - Redirect.__init__(self, url) - - def getChild(self, name, request): - newUrl = self.url - if not newUrl.endswith("/"): - newUrl += "/" - newUrl += name - return ChildRedirector(newUrl) - - class ParentRedirect(resource.Resource): """ Redirect to the nearest directory and strip any query string. diff --git a/contrib/python/Twisted/py3/twisted/web/client.py b/contrib/python/Twisted/py3/twisted/web/client.py index e66b0cf3177..b06f1bef286 100644 --- a/contrib/python/Twisted/py3/twisted/web/client.py +++ b/contrib/python/Twisted/py3/twisted/web/client.py @@ -1530,7 +1530,7 @@ class ContentDecoderAgent: return response -_canonicalHeaderName = Headers()._canonicalNameCaps +_canonicalHeaderName = Headers()._encodeName _defaultSensitiveHeaders = frozenset( [ b"Authorization", diff --git a/contrib/python/Twisted/py3/twisted/web/http.py b/contrib/python/Twisted/py3/twisted/web/http.py index 1c598380acc..e80f6cb365f 100644 --- a/contrib/python/Twisted/py3/twisted/web/http.py +++ b/contrib/python/Twisted/py3/twisted/web/http.py @@ -31,6 +31,7 @@ also useful for HTTP clients (such as the chunked encoding parser). it, as in the HTTP 1.1 chunked I{Transfer-Encoding} (RFC 7230 section 4.1). This limits how much data may be buffered when decoding the line. """ + from __future__ import annotations __all__ = [ @@ -69,6 +70,7 @@ __all__ = [ "UNSUPPORTED_MEDIA_TYPE", "REQUESTED_RANGE_NOT_SATISFIABLE", "EXPECTATION_FAILED", + "IM_A_TEAPOT", "INTERNAL_SERVER_ERROR", "NOT_IMPLEMENTED", "BAD_GATEWAY", @@ -108,9 +110,17 @@ import tempfile import time import warnings from email import message_from_bytes -from email.message import EmailMessage -from io import BytesIO -from typing import AnyStr, Callable, Dict, List, Optional, Tuple +from email.message import EmailMessage, Message +from io import BufferedIOBase, BytesIO, TextIOWrapper +from typing import ( + AnyStr, + Callable, + Dict, + List, + Optional, + Protocol as TypingProtocol, + Tuple, +) from urllib.parse import ( ParseResultBytes, unquote_to_bytes as unquote, @@ -124,13 +134,14 @@ from incremental import Version from twisted.internet import address, interfaces, protocol from twisted.internet._producer_helpers import _PullToPush from twisted.internet.defer import Deferred -from twisted.internet.interfaces import IProtocol +from twisted.internet.interfaces import IAddress, IDelayedCall, IProtocol, IReactorTime +from twisted.internet.protocol import Protocol from twisted.logger import Logger from twisted.protocols import basic, policies from twisted.python import log from twisted.python.compat import nativeString, networkString from twisted.python.components import proxyForInterface -from twisted.python.deprecate import deprecated +from twisted.python.deprecate import deprecated, deprecatedModuleAttribute from twisted.python.failure import Failure from twisted.web._responses import ( ACCEPTED, @@ -144,6 +155,7 @@ from twisted.web._responses import ( GATEWAY_TIMEOUT, GONE, HTTP_VERSION_NOT_SUPPORTED, + IM_A_TEAPOT, INSUFFICIENT_STORAGE_SPACE, INTERNAL_SERVER_ERROR, LENGTH_REQUIRED, @@ -224,6 +236,58 @@ weekdayname_lower = [name.lower() for name in weekdayname] monthname_lower = [name and name.lower() for name in monthname] +def _parseRequestLine(line: bytes) -> tuple[bytes, bytes, bytes]: + """ + Parse an HTTP request line, which looks like: + + GET /foo/bar HTTP/1.1 + + This function attempts to validate the well-formedness of + the line. RFC 9112 section 3 provides this ABNF: + + request-line = method SP request-target SP HTTP-version + + We allow any method that is a valid token: + + method = token + token = 1*tchar + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + / DIGIT / ALPHA + + We allow any non-empty request-target that contains only printable + ASCII characters (no whitespace). + + The RFC defines HTTP-version like this: + + HTTP-version = HTTP-name "/" DIGIT "." DIGIT + HTTP-name = %s"HTTP" + + However, this function is more strict than the RFC: we only allow + HTTP versions of 1.0 and 1.1, as later versions of HTTP don't use + a request line. + + @returns: C{(method, request, version)} three-tuple + + @raises: L{ValueError} when malformed + """ + method, request, version = line.split(b" ") + + if not _istoken(method): + raise ValueError("Invalid method") + + for c in request: + if c <= 32 or c > 176: + raise ValueError("Invalid request-target") + if request == b"": + raise ValueError("Empty request-target") + + if version != b"HTTP/1.1" and version != b"HTTP/1.0": + raise ValueError("Invalid version") + + return method, request, version + + def _parseContentType(line: bytes) -> bytes: """ Parse the Content-Type header. @@ -251,11 +315,16 @@ def _getMultiPartArgs(content: bytes, ctype: bytes) -> dict[bytes, list[bytes]]: if not msg.is_multipart(): raise _MultiPartParseException("Not a multipart.") - for part in msg.get_payload(): - name = part.get_param("name", header="content-disposition") + part: Message + # "per Python docs, a list of Message objects when is_multipart() is True, + # or a string when is_multipart() is False" + for part in msg.get_payload(): # type:ignore[assignment] + name: str | None = part.get_param( + "name", header="content-disposition" + ) # type:ignore[assignment] if not name: continue - payload = part.get_payload(decode=True) + payload: bytes = part.get_payload(decode=True) # type:ignore[assignment] result[name.encode("utf8")] = [payload] return result @@ -378,7 +447,7 @@ def stringToDatetime(dateString): @type dateString: C{bytes} """ - parts = nativeString(dateString).split() + parts = dateString.decode("ascii").split() if not parts[0][0:3].lower() in weekdayname_lower: # Weekday is stupid. Might have been omitted. @@ -438,6 +507,20 @@ def toChunk(data): return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n") +def _istoken(b: bytes) -> bool: + """ + Is the string a token per RFC 9110 section 5.6.2? + """ + for c in b: + if c not in ( + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" # ALPHA + b"0123456789" # DIGIT + b"!#$%^'*+-.^_`|~" + ): + return False + return b != b"" + + def _ishexdigits(b: bytes) -> bool: """ Is the string case-insensitively hexidecimal? @@ -764,6 +847,14 @@ class HTTPClient(basic.LineReceiver): self.setLineMode(rest) +deprecatedModuleAttribute( + Version("Twisted", 24, 7, 0), + "Use twisted.web.client.Agent instead.", + __name__, + HTTPClient.__name__, +) + + # response codes that must have empty bodies NO_BODY_CODES = (204, 304) @@ -1193,7 +1284,6 @@ class Request: version = self.clientproto code = b"%d" % (self.code,) reason = self.code_message - headers = [] # if we don't have a content length, we send data in # chunked mode, so that we can support pipelining in @@ -1204,7 +1294,7 @@ class Request: and self.method != b"HEAD" and self.code not in NO_BODY_CODES ): - headers.append((b"Transfer-Encoding", b"chunked")) + self.responseHeaders.setRawHeaders("Transfer-Encoding", [b"chunked"]) self.chunked = 1 if self.lastModified is not None: @@ -1221,14 +1311,10 @@ class Request: if self.etag is not None: self.responseHeaders.setRawHeaders(b"ETag", [self.etag]) - for name, values in self.responseHeaders.getAllRawHeaders(): - for value in values: - headers.append((name, value)) + if self.cookies: + self.responseHeaders.setRawHeaders(b"Set-Cookie", self.cookies) - for cookie in self.cookies: - headers.append((b"Set-Cookie", cookie)) - - self.channel.writeHeaders(version, code, reason, headers) + self.channel.writeHeaders(version, code, reason, self.responseHeaders) # if this is a "HEAD" request, we shouldn't return any data if self.method == b"HEAD": @@ -1356,19 +1442,15 @@ class Request: cookie += b"; SameSite=" + sameSite self.cookies.append(cookie) - def setResponseCode(self, code, message=None): + def setResponseCode(self, code: int, message: Optional[bytes] = None) -> None: """ Set the HTTP response code. @type code: L{int} @type message: L{bytes} """ - if not isinstance(code, int): - raise TypeError("HTTP response code must be int or long") self.code = code - if message: - if not isinstance(message, bytes): - raise TypeError("HTTP response status message must be bytes") + if message is not None: self.code_message = message else: self.code_message = RESPONSES.get(code, b"Unknown Status") @@ -2000,16 +2082,21 @@ class _ChunkedTransferDecoder: @returns: C{False}, as there is either insufficient data to continue, or no data remains. """ - if ( - self._receivedTrailerHeadersSize + len(self._buffer) - > self._maxTrailerHeadersSize - ): - raise _MalformedChunkedDataError("Trailer headers data is too long.") - eolIndex = self._buffer.find(b"\r\n", self._start) if eolIndex == -1: # Still no end of network line marker found. + # + # Check if we've run up against the trailer size limit: if the next + # read contains the terminating CRLF then we'll have this many bytes + # of trailers (including the CRLFs). + minTrailerSize = ( + self._receivedTrailerHeadersSize + + len(self._buffer) + + (1 if self._buffer.endswith(b"\r") else 2) + ) + if minTrailerSize > self._maxTrailerHeadersSize: + raise _MalformedChunkedDataError("Trailer headers data is too long.") # Continue processing more data. return False @@ -2019,6 +2106,8 @@ class _ChunkedTransferDecoder: del self._buffer[0 : eolIndex + 2] self._start = 0 self._receivedTrailerHeadersSize += eolIndex + 2 + if self._receivedTrailerHeadersSize > self._maxTrailerHeadersSize: + raise _MalformedChunkedDataError("Trailer headers data is too long.") return True # eolIndex in this part of code is equal to 0 @@ -2268,13 +2357,15 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): ) self._networkProducer.registerProducer(self, True) + def dataReceived(self, data): + self.resetTimeout() + basic.LineReceiver.dataReceived(self, data) + def lineReceived(self, line): """ Called for each line from request until the end of headers when it enters binary mode. """ - self.resetTimeout() - self._receivedHeaderSize += len(line) if self._receivedHeaderSize > self.totalHeadersSize: self._respondToBadRequestAndDisconnect() @@ -2302,14 +2393,9 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): self.__first_line = 0 - parts = line.split() - if len(parts) != 3: - self._respondToBadRequestAndDisconnect() - return - command, request, version = parts try: - command.decode("ascii") - except UnicodeDecodeError: + command, request, version = _parseRequestLine(line) + except ValueError: self._respondToBadRequestAndDisconnect() return @@ -2342,8 +2428,8 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): self.__header = line def _finishRequestBody(self, data): - self.allContentReceived() self._dataBuffer.append(data) + self.allContentReceived() def _maybeChooseTransferDecoder(self, header, data): """ @@ -2410,7 +2496,8 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): self._respondToBadRequestAndDisconnect() return False - if not header or header[-1:].isspace(): + # Header names must be tokens, per RFC 9110 section 5.1. + if not _istoken(header): self._respondToBadRequestAndDisconnect() return False @@ -2420,12 +2507,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): if not self._maybeChooseTransferDecoder(header, data): return False - reqHeaders = self.requests[-1].requestHeaders - values = reqHeaders.getRawHeaders(header) - if values is not None: - values.append(data) - else: - reqHeaders.setRawHeaders(header, [data]) + self.requests[-1].requestHeaders.addRawHeader(header, data) self._receivedHeaderCount += 1 if self._receivedHeaderCount > self.maxHeaders: @@ -2498,8 +2580,6 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): self._networkProducer.pauseProducing() return - self.resetTimeout() - try: self._transferDecoder.dataReceived(data) except _MalformedChunkedDataError: @@ -2638,8 +2718,7 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): return False def writeHeaders(self, version, code, reason, headers): - """ - Called by L{Request} objects to write a complete set of HTTP headers to + """Called by L{Request} objects to write a complete set of HTTP headers to a transport. @param version: The HTTP version in use. @@ -2652,19 +2731,25 @@ class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): @type reason: L{bytes} @param headers: The headers to write to the transport. - @type headers: L{twisted.web.http_headers.Headers} - """ - sanitizedHeaders = Headers() - for name, value in headers: - sanitizedHeaders.addRawHeader(name, value) - - responseLine = version + b" " + code + b" " + reason + b"\r\n" - headerSequence = [responseLine] - headerSequence.extend( - name + b": " + value + b"\r\n" - for name, values in sanitizedHeaders.getAllRawHeaders() - for value in values - ) + @type headers: L{twisted.web.http_headers.Headers}, or (for backwards + compatibility purposes only) any iterable of two-tuples of + L{bytes}, representing header names and header values. The latter + option is not actually used by Twisted. + + """ + if not isinstance(headers, Headers): + # Turn into Headers instance for security reasons, to make sure we + # quite and sanitize everything. This variant should be removed + # eventually, it's only here for backwards compatibility. + sanitizedHeaders = Headers() + for name, value in headers: + sanitizedHeaders.addRawHeader(name, value) + headers = sanitizedHeaders + + headerSequence = [version, b" ", code, b" ", reason, b"\r\n"] + for name, values in headers.getAllRawHeaders(): + for value in values: + headerSequence.extend((name, b": ", value, b"\r\n")) headerSequence.append(b"\r\n") self.transport.writeSequence(headerSequence) @@ -3138,11 +3223,9 @@ class _GenericHTTPChannelProtocol(proxyForInterface(IProtocol, "_channel")): # using. """ if self._negotiatedProtocol is None: - try: - negotiatedProtocol = self._channel.transport.negotiatedProtocol - except AttributeError: - # Plaintext HTTP, always HTTP/1.1 - negotiatedProtocol = b"http/1.1" + negotiatedProtocol = getattr( + self._channel.transport, "negotiatedProtocol", b"http/1.1" + ) if negotiatedProtocol is None: negotiatedProtocol = b"http/1.1" @@ -3191,6 +3274,21 @@ def _genericHTTPChannelProtocolFactory(self): return _GenericHTTPChannelProtocol(HTTPChannel()) +class _MinimalLogFile(TypingProtocol): + def write(self, data: str, /) -> object: + """ + Write some data. + """ + + def close(self) -> None: + """ + Close the file. + """ + + +value: type[_MinimalLogFile] = TextIOWrapper + + class HTTPFactory(protocol.ServerFactory): """ Factory for HTTP server. @@ -3221,11 +3319,16 @@ class HTTPFactory(protocol.ServerFactory): protocol = _genericHTTPChannelProtocolFactory # type: ignore[assignment] logPath = None + _logFile: _MinimalLogFile | None = None - timeOut = _REQUEST_TIMEOUT + timeOut: int | float | None = _REQUEST_TIMEOUT def __init__( - self, logPath=None, timeout=_REQUEST_TIMEOUT, logFormatter=None, reactor=None + self, + logPath: str | bytes | None = None, + timeout: int | float = _REQUEST_TIMEOUT, + logFormatter: IAccessLogFormatter | None = None, + reactor: IReactorTime | None = None, ): """ @param logPath: File path to which access log messages will be written @@ -3245,9 +3348,9 @@ class HTTPFactory(protocol.ServerFactory): timeouts and compute logging timestamps. Defaults to the global reactor. """ - if not reactor: - from twisted.internet import reactor - self.reactor = reactor + if reactor is None: + from twisted.internet import reactor # type:ignore[assignment] + self.reactor: IReactorTime = reactor # type:ignore[assignment] if logPath is not None: logPath = os.path.abspath(logPath) @@ -3258,17 +3361,48 @@ class HTTPFactory(protocol.ServerFactory): self._logFormatter = logFormatter # For storing the cached log datetime and the callback to update it - self._logDateTime = None - self._logDateTimeCall = None + self._logDateTime: str | None = None + self._logDateTimeCall: IDelayedCall | None = None + + logFile = property() + """ + A file (object with C{write(data: str)} and C{close()} methods) that will + be used for logging HTTP requests and responses in the standard U{Combined + Log Format <https://en.wikipedia.org/wiki/Common_Log_Format>} . + + @note: for backwards compatibility purposes, this may be I{set} to an + object with a C{write(data: bytes)} method, but these will be detected + (by checking if it's an instance of L{BufferedIOBase}) and replaced + with a L{TextIOWrapper} when retrieved by getting the attribute again. + """ + + @logFile.getter + def _get_logFile(self) -> _MinimalLogFile: + if self._logFile is None: + raise AttributeError("no log file present") + return self._logFile + + @_get_logFile.setter + def _set_logFile(self, newLogFile: BufferedIOBase | _MinimalLogFile) -> None: + if isinstance(newLogFile, BufferedIOBase): + newLogFile = TextIOWrapper( + newLogFile, # type:ignore[arg-type] + "utf-8", + write_through=True, + newline="\n", + ) + self._logFile = newLogFile + + logFile = _set_logFile - def _updateLogDateTime(self): + def _updateLogDateTime(self) -> None: """ Update log datetime periodically, so we aren't always recalculating it. """ self._logDateTime = datetimeToLogString(self.reactor.seconds()) self._logDateTimeCall = self.reactor.callLater(1, self._updateLogDateTime) - def buildProtocol(self, addr): + def buildProtocol(self, addr: IAddress) -> Protocol | None: p = protocol.ServerFactory.buildProtocol(self, addr) # This is a bit of a hack to ensure that the HTTPChannel timeouts @@ -3276,53 +3410,45 @@ class HTTPFactory(protocol.ServerFactory): # ideally be resolved by passing the reactor more generally to the # HTTPChannel, but that won't work for the TimeoutMixin until we fix # https://twistedmatrix.com/trac/ticket/8488 - p.callLater = self.reactor.callLater + p.callLater = self.reactor.callLater # type:ignore[union-attr] # timeOut needs to be on the Protocol instance cause # TimeoutMixin expects it there - p.timeOut = self.timeOut + p.timeOut = self.timeOut # type:ignore[union-attr] return p - def startFactory(self): + def startFactory(self) -> None: """ Set up request logging if necessary. """ if self._logDateTimeCall is None: self._updateLogDateTime() - if self.logPath: - self.logFile = self._openLogFile(self.logPath) - else: - self.logFile = log.logfile + self._logFile = self._openLogFile(self.logPath) if self.logPath else log.logfile - def stopFactory(self): - if hasattr(self, "logFile"): - if self.logFile != log.logfile: - self.logFile.close() - del self.logFile + def stopFactory(self) -> None: + if self._logFile is not None: + if self._logFile != log.logfile: + self._logFile.close() + self._logFile = None if self._logDateTimeCall is not None and self._logDateTimeCall.active(): self._logDateTimeCall.cancel() self._logDateTimeCall = None - def _openLogFile(self, path): + def _openLogFile(self, path: str | bytes) -> _MinimalLogFile: """ Override in subclasses, e.g. to use L{twisted.python.logfile}. """ - f = open(path, "ab", 1) - return f + return open(path, "a", 1, newline="\n") - def log(self, request): + def log(self, request: Request) -> None: """ Write a line representing C{request} to the access log file. @param request: The request object about which to log. - @type request: L{Request} """ - try: - logFile = self.logFile - except AttributeError: - pass - else: + logFile = self._logFile + if logFile is not None: line = self._logFormatter(self._logDateTime, request) + "\n" - logFile.write(line.encode("utf8")) + logFile.write(line) diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py index f810f4bc2c4..8b1d41adb64 100644 --- a/contrib/python/Twisted/py3/twisted/web/http_headers.py +++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py @@ -6,9 +6,9 @@ An API for storing HTTP header names and values. """ -from collections.abc import Sequence as _Sequence from typing import ( AnyStr, + ClassVar, Dict, Iterator, List, @@ -26,17 +26,6 @@ from twisted.python.compat import cmp, comparable _T = TypeVar("_T") -def _dashCapitalize(name: bytes) -> bytes: - """ - Return a byte string which is capitalized using '-' as a word separator. - - @param name: The name of the header to capitalize. - - @return: The given header capitalized using '-' as a word separator. - """ - return b"-".join([word.capitalize() for word in name.split(b"-")]) - - def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes: r""" Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key @@ -65,13 +54,17 @@ class Headers: and values as opaque byte strings. @cvar _caseMappings: A L{dict} that maps lowercase header names - to their canonicalized representation. + to their canonicalized representation, for headers with unconventional + capitalization. + + @cvar _canonicalHeaderCache: A L{dict} that maps header names to their + canonicalized representation. @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of header values as L{bytes}. """ - _caseMappings = { + _caseMappings: ClassVar[Dict[bytes, bytes]] = { b"content-md5": b"Content-MD5", b"dnt": b"DNT", b"etag": b"ETag", @@ -81,6 +74,12 @@ class Headers: b"x-xss-protection": b"X-XSS-Protection", } + _canonicalHeaderCache: ClassVar[Dict[Union[bytes, str], bytes]] = {} + + _MAX_CACHED_HEADERS: ClassVar[int] = 10_000 + + __slots__ = ["_rawHeaders"] + def __init__( self, rawHeaders: Optional[Mapping[AnyStr, Sequence[AnyStr]]] = None, @@ -112,16 +111,36 @@ class Headers: def _encodeName(self, name: Union[str, bytes]) -> bytes: """ - Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded - bytestring if required. + Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 + encoded bytestring if required. It will be canonicalized and + whitespace-sanitized. @param name: A HTTP header name @return: C{name}, encoded if required, lowercased """ - if isinstance(name, str): - return name.lower().encode("iso-8859-1") - return name.lower() + if canonicalName := self._canonicalHeaderCache.get(name, None): + return canonicalName + + bytes_name = name.encode("iso-8859-1") if isinstance(name, str) else name + + if bytes_name.lower() in self._caseMappings: + # Some headers have special capitalization: + result = self._caseMappings[bytes_name.lower()] + else: + result = _sanitizeLinearWhitespace( + b"-".join([word.capitalize() for word in bytes_name.split(b"-")]) + ) + + # In general, we should only see a very small number of header + # variations in the real world, so caching them is fine. However, an + # attacker could generate infinite header variations to fill up RAM, so + # we cap how many we cache. The performance degradation from lack of + # caching won't be that bad, and legit traffic won't hit it. + if len(self._canonicalHeaderCache) < self._MAX_CACHED_HEADERS: + self._canonicalHeaderCache[name] = result + + return result def copy(self): """ @@ -151,21 +170,9 @@ class Headers: """ self._rawHeaders.pop(self._encodeName(name), None) - @overload - def setRawHeaders(self, name: Union[str, bytes], values: Sequence[bytes]) -> None: - ... - - @overload - def setRawHeaders(self, name: Union[str, bytes], values: Sequence[str]) -> None: - ... - - @overload def setRawHeaders( self, name: Union[str, bytes], values: Sequence[Union[str, bytes]] ) -> None: - ... - - def setRawHeaders(self, name: Union[str, bytes], values: object) -> None: """ Sets the raw representation of the given header. @@ -179,29 +186,7 @@ class Headers: @return: L{None} """ - if not isinstance(values, _Sequence): - raise TypeError( - "Header entry %r should be sequence but found " - "instance of %r instead" % (name, type(values)) - ) - - if not isinstance(name, (bytes, str)): - raise TypeError( - f"Header name is an instance of {type(name)!r}, not bytes or str" - ) - - for count, value in enumerate(values): - if not isinstance(value, (bytes, str)): - raise TypeError( - "Header value at position %s is an instance of %r, not " - "bytes or str" - % ( - count, - type(value), - ) - ) - - _name = _sanitizeLinearWhitespace(self._encodeName(name)) + _name = self._encodeName(name) encodedValues: List[bytes] = [] for v in values: if isinstance(v, str): @@ -220,20 +205,7 @@ class Headers: @param value: The value to set for the named header. """ - if not isinstance(name, (bytes, str)): - raise TypeError( - f"Header name is an instance of {type(name)!r}, not bytes or str" - ) - - if not isinstance(value, (bytes, str)): - raise TypeError( - "Header value is an instance of %r, not " - "bytes or str" % (type(value),) - ) - - self._rawHeaders.setdefault( - _sanitizeLinearWhitespace(self._encodeName(name)), [] - ).append( + self._rawHeaders.setdefault(self._encodeName(name), []).append( _sanitizeLinearWhitespace( value.encode("utf8") if isinstance(value, str) else value ) @@ -277,19 +249,7 @@ class Headers: object, as L{bytes}. The keys are capitalized in canonical capitalization. """ - for k, v in self._rawHeaders.items(): - yield self._canonicalNameCaps(k), v - - def _canonicalNameCaps(self, name: bytes) -> bytes: - """ - Return the canonical name for the given header. - - @param name: The all-lowercase header name to capitalize in its - canonical form. - - @return: The canonical name of the header. - """ - return self._caseMappings.get(name, _dashCapitalize(name)) + return iter(self._rawHeaders.items()) __all__ = ["Headers"] diff --git a/contrib/python/Twisted/py3/twisted/web/iweb.py b/contrib/python/Twisted/py3/twisted/web/iweb.py index 1aeb152fd9f..040b916c738 100644 --- a/contrib/python/Twisted/py3/twisted/web/iweb.py +++ b/contrib/python/Twisted/py3/twisted/web/iweb.py @@ -9,6 +9,7 @@ Interface definitions for L{twisted.web}. L{IBodyProducer.length} to indicate that the length of the entity body is not known in advance. """ + from typing import TYPE_CHECKING, Callable, List, Optional from zope.interface import Attribute, Interface @@ -595,15 +596,15 @@ class IResponse(Interface): L{IPushProducer}. The protocol's C{connectionLost} method will be called with: - - ResponseDone, which indicates that all bytes from the response + - L{ResponseDone}, which indicates that all bytes from the response have been successfully delivered. - - PotentialDataLoss, which indicates that it cannot be determined + - L{PotentialDataLoss}, which indicates that it cannot be determined if the entire response body has been delivered. This only occurs when making requests to HTTP servers which do not set I{Content-Length} or a I{Transfer-Encoding} in the response. - - ResponseFailed, which indicates that some bytes from the response + - L{ResponseFailed}, which indicates that some bytes from the response were lost. The C{reasons} attribute of the exception may provide more specific indications as to why. """ diff --git a/contrib/python/Twisted/py3/twisted/web/resource.py b/contrib/python/Twisted/py3/twisted/web/resource.py index 456db72d120..56595d2995b 100644 --- a/contrib/python/Twisted/py3/twisted/web/resource.py +++ b/contrib/python/Twisted/py3/twisted/web/resource.py @@ -28,7 +28,7 @@ from incremental import Version from twisted.python.compat import nativeString from twisted.python.components import proxyForInterface -from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.deprecate import deprecated from twisted.python.reflect import prefixedMethodNames from twisted.web._responses import FORBIDDEN, NOT_FOUND from twisted.web.error import UnsupportedMethod @@ -294,15 +294,9 @@ def _computeAllowedMethods(resource): return allowedMethods -class _UnsafeErrorPage(Resource): +class _UnsafeErrorPageBase(Resource): """ - L{_UnsafeErrorPage}, publicly available via the deprecated alias - C{ErrorPage}, is a resource which responds with a particular - (parameterized) status and a body consisting of HTML containing some - descriptive text. This is useful for rendering simple error pages. - - Deprecated in Twisted 22.10.0 because it permits HTML injection; use - L{twisted.web.pages.errorPage} instead. + Base class for deprecated error page resources. @ivar template: A native string which will have a dictionary interpolated into it to generate the response body. The dictionary has the following @@ -355,7 +349,26 @@ class _UnsafeErrorPage(Resource): return self -class _UnsafeNoResource(_UnsafeErrorPage): +class _UnsafeErrorPage(_UnsafeErrorPageBase): + """ + L{_UnsafeErrorPage}, publicly available via the deprecated alias + C{ErrorPage}, is a resource which responds with a particular + (parameterized) status and a body consisting of HTML containing some + descriptive text. This is useful for rendering simple error pages. + + Deprecated in Twisted 22.10.0 because it permits HTML injection; use + L{twisted.web.pages.errorPage} instead. + """ + + @deprecated( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.errorPage instead, which properly escapes HTML.", + ) + def __init__(self, status, brief, detail): + _UnsafeErrorPageBase.__init__(self, status, brief, detail) + + +class _UnsafeNoResource(_UnsafeErrorPageBase): """ L{_UnsafeNoResource}, publicly available via the deprecated alias C{NoResource}, is a specialization of L{_UnsafeErrorPage} which @@ -365,11 +378,15 @@ class _UnsafeNoResource(_UnsafeErrorPage): L{twisted.web.pages.notFound} instead. """ + @deprecated( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.notFound instead, which properly escapes HTML.", + ) def __init__(self, message="Sorry. No luck finding that resource."): - _UnsafeErrorPage.__init__(self, NOT_FOUND, "No Such Resource", message) + _UnsafeErrorPageBase.__init__(self, NOT_FOUND, "No Such Resource", message) -class _UnsafeForbiddenResource(_UnsafeErrorPage): +class _UnsafeForbiddenResource(_UnsafeErrorPageBase): """ L{_UnsafeForbiddenResource}, publicly available via the deprecated alias C{ForbiddenResource} is a specialization of L{_UnsafeErrorPage} which @@ -379,8 +396,12 @@ class _UnsafeForbiddenResource(_UnsafeErrorPage): L{twisted.web.pages.forbidden} instead. """ + @deprecated( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.forbidden instead, which properly escapes HTML.", + ) def __init__(self, message="Sorry, resource is forbidden."): - _UnsafeErrorPage.__init__(self, FORBIDDEN, "Forbidden Resource", message) + _UnsafeErrorPageBase.__init__(self, FORBIDDEN, "Forbidden Resource", message) # Deliberately undocumented public aliases. See GHSA-vg46-2rrj-3647. @@ -388,27 +409,6 @@ ErrorPage = _UnsafeErrorPage NoResource = _UnsafeNoResource ForbiddenResource = _UnsafeForbiddenResource -deprecatedModuleAttribute( - Version("Twisted", 22, 10, 0), - "Use twisted.web.pages.errorPage instead, which properly escapes HTML.", - __name__, - "ErrorPage", -) - -deprecatedModuleAttribute( - Version("Twisted", 22, 10, 0), - "Use twisted.web.pages.notFound instead, which properly escapes HTML.", - __name__, - "NoResource", -) - -deprecatedModuleAttribute( - Version("Twisted", 22, 10, 0), - "Use twisted.web.pages.forbidden instead, which properly escapes HTML.", - __name__, - "ForbiddenResource", -) - class _IEncodingResource(Interface): """ diff --git a/contrib/python/Twisted/py3/twisted/web/server.py b/contrib/python/Twisted/py3/twisted/web/server.py index 6392a3168ae..cfcefad7f36 100644 --- a/contrib/python/Twisted/py3/twisted/web/server.py +++ b/contrib/python/Twisted/py3/twisted/web/server.py @@ -25,19 +25,23 @@ from urllib.parse import quote as _quote from zope.interface import implementer -from incremental import Version - from twisted import copyright from twisted.internet import address, interfaces from twisted.internet.error import AlreadyCalled, AlreadyCancelled from twisted.logger import Logger from twisted.python import components, failure, reflect from twisted.python.compat import nativeString, networkString -from twisted.python.deprecate import deprecatedModuleAttribute from twisted.spread.pb import Copyable, ViewPoint from twisted.web import http, iweb, resource, util from twisted.web.error import UnsupportedMethod -from twisted.web.http import unquote +from twisted.web.http import ( + NO_CONTENT, + NOT_MODIFIED, + HTTPFactory, + Request as _HTTPRequest, + datetimeToString, + unquote, +) NOT_DONE_YET = 1 @@ -51,23 +55,6 @@ __all__ = [ "GzipEncoderFactory", ] - -# backwards compatibility -deprecatedModuleAttribute( - Version("Twisted", 12, 1, 0), - "Please use twisted.web.http.datetimeToString instead", - "twisted.web.server", - "date_time_string", -) -deprecatedModuleAttribute( - Version("Twisted", 12, 1, 0), - "Please use twisted.web.http.stringToDatetime instead", - "twisted.web.server", - "string_date_time", -) -date_time_string = http.datetimeToString -string_date_time = http.stringToDatetime - # Support for other methods may be implemented on a per-resource basis. supportedMethods = (b"GET", b"HEAD", b"POST") @@ -112,7 +99,7 @@ class Request(Copyable, http.Request, components.Componentized): _log = Logger() def __init__(self, *args, **kw): - http.Request.__init__(self, *args, **kw) + _HTTPRequest.__init__(self, *args, **kw) components.Componentized.__init__(self) def getStateToCopyFor(self, issuer): @@ -187,7 +174,7 @@ class Request(Copyable, http.Request, components.Componentized): try: getContentFile = self.channel.site.getContentFile except AttributeError: - http.Request.gotLength(self, length) + _HTTPRequest.gotLength(self, length) else: self.content = getContentFile(length) @@ -206,7 +193,7 @@ class Request(Copyable, http.Request, components.Componentized): # set various default headers self.setHeader(b"server", version) - self.setHeader(b"date", http.datetimeToString()) + self.setHeader(b"date", datetimeToString()) # Resource Identification self.prepath = [] @@ -240,7 +227,7 @@ class Request(Copyable, http.Request, components.Componentized): # NOT_MODIFIED and NO_CONTENT responses. We also omit it if there # is a Content-Length header set to 0, as empty bodies don't need # a content-type. - needsCT = self.code not in (http.NOT_MODIFIED, http.NO_CONTENT) + needsCT = self.code not in (NOT_MODIFIED, NO_CONTENT) contentType = self.responseHeaders.getRawHeaders(b"content-type") contentLength = self.responseHeaders.getRawHeaders(b"content-length") contentLengthZero = contentLength and (contentLength[0] == b"0") @@ -263,17 +250,17 @@ class Request(Copyable, http.Request, components.Componentized): if not self._inFakeHead: if self._encoder: data = self._encoder.encode(data) - http.Request.write(self, data) + _HTTPRequest.write(self, data) def finish(self): """ - Override C{http.Request.finish} for possible encoding. + Override L{twisted.web.http.Request.finish} for possible encoding. """ if self._encoder: data = self._encoder.finish() if data: - http.Request.write(self, data) - return http.Request.finish(self) + _HTTPRequest.write(self, data) + return _HTTPRequest.finish(self) def render(self, resrc): """ @@ -768,7 +755,7 @@ version = networkString(f"TwistedWeb/{copyright.version}") @implementer(interfaces.IProtocolNegotiationFactory) -class Site(http.HTTPFactory): +class Site(HTTPFactory): """ A web site: manage log, sessions, and resources. diff --git a/contrib/python/Twisted/py3/twisted/web/soap.py b/contrib/python/Twisted/py3/twisted/web/soap.py deleted file mode 100644 index cc44b41e2ac..00000000000 --- a/contrib/python/Twisted/py3/twisted/web/soap.py +++ /dev/null @@ -1,166 +0,0 @@ -# -*- test-case-name: twisted.web.test.test_soap -*- -# Copyright (c) Twisted Matrix Laboratories. -# See LICENSE for details. - - -""" -SOAP support for twisted.web. - -Requires SOAPpy 0.10.1 or later. - -Maintainer: Itamar Shtull-Trauring - -Future plans: -SOAPContext support of some kind. -Pluggable method lookup policies. -""" - -# SOAPpy -import SOAPpy - -from twisted.internet import defer - -# twisted imports -from twisted.web import client, resource, server - - -class SOAPPublisher(resource.Resource): - """Publish SOAP methods. - - By default, publish methods beginning with 'soap_'. If the method - has an attribute 'useKeywords', it well get the arguments passed - as keyword args. - """ - - isLeaf = 1 - - # override to change the encoding used for responses - encoding = "UTF-8" - - def lookupFunction(self, functionName): - """Lookup published SOAP function. - - Override in subclasses. Default behaviour - publish methods - starting with soap_. - - @return: callable or None if not found. - """ - return getattr(self, "soap_%s" % functionName, None) - - def render(self, request): - """Handle a SOAP command.""" - data = request.content.read() - - p, header, body, attrs = SOAPpy.parseSOAPRPC(data, 1, 1, 1) - - methodName, args, kwargs = p._name, p._aslist, p._asdict - - # deal with changes in SOAPpy 0.11 - if callable(args): - args = args() - if callable(kwargs): - kwargs = kwargs() - - function = self.lookupFunction(methodName) - - if not function: - self._methodNotFound(request, methodName) - return server.NOT_DONE_YET - else: - if hasattr(function, "useKeywords"): - keywords = {} - for k, v in kwargs.items(): - keywords[str(k)] = v - d = defer.maybeDeferred(function, **keywords) - else: - d = defer.maybeDeferred(function, *args) - - d.addCallback(self._gotResult, request, methodName) - d.addErrback(self._gotError, request, methodName) - return server.NOT_DONE_YET - - def _methodNotFound(self, request, methodName): - response = SOAPpy.buildSOAP( - SOAPpy.faultType( - "%s:Client" % SOAPpy.NS.ENV_T, "Method %s not found" % methodName - ), - encoding=self.encoding, - ) - self._sendResponse(request, response, status=500) - - def _gotResult(self, result, request, methodName): - if not isinstance(result, SOAPpy.voidType): - result = {"Result": result} - response = SOAPpy.buildSOAP( - kw={"%sResponse" % methodName: result}, encoding=self.encoding - ) - self._sendResponse(request, response) - - def _gotError(self, failure, request, methodName): - e = failure.value - if isinstance(e, SOAPpy.faultType): - fault = e - else: - fault = SOAPpy.faultType( - "%s:Server" % SOAPpy.NS.ENV_T, "Method %s failed." % methodName - ) - response = SOAPpy.buildSOAP(fault, encoding=self.encoding) - self._sendResponse(request, response, status=500) - - def _sendResponse(self, request, response, status=200): - request.setResponseCode(status) - - if self.encoding is not None: - mimeType = 'text/xml; charset="%s"' % self.encoding - else: - mimeType = "text/xml" - request.setHeader("Content-type", mimeType) - request.setHeader("Content-length", str(len(response))) - request.write(response) - request.finish() - - -class Proxy: - """A Proxy for making remote SOAP calls. - - Pass the URL of the remote SOAP server to the constructor. - - Use proxy.callRemote('foobar', 1, 2) to call remote method - 'foobar' with args 1 and 2, proxy.callRemote('foobar', x=1) - will call foobar with named argument 'x'. - """ - - # at some point this should have encoding etc. kwargs - def __init__(self, url, namespace=None, header=None): - self.url = url - self.namespace = namespace - self.header = header - - def _cbGotResult(self, result): - result = SOAPpy.parseSOAPRPC(result) - if hasattr(result, "Result"): - return result.Result - elif len(result) == 1: - ## SOAPpy 0.11.6 wraps the return results in a containing structure. - ## This check added to make Proxy behaviour emulate SOAPProxy, which - ## flattens the structure by default. - ## This behaviour is OK because even singleton lists are wrapped in - ## another singleton structType, which is almost always useless. - return result[0] - else: - return result - - def callRemote(self, method, *args, **kwargs): - payload = SOAPpy.buildSOAP( - args=args, - kw=kwargs, - method=method, - header=self.header, - namespace=self.namespace, - ) - return client.getPage( - self.url, - postdata=payload, - method="POST", - headers={"content-type": "text/xml", "SOAPAction": method}, - ).addCallback(self._cbGotResult) diff --git a/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py b/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py index a3b0904427e..d5c8918b302 100644 --- a/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py +++ b/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py @@ -118,6 +118,10 @@ class DummyChannel: pass def writeHeaders(self, version, code, reason, headers): + if isinstance(headers, Headers): + headers = [ + (k, v) for (k, values) in headers.getAllRawHeaders() for v in values + ] response_line = version + b" " + code + b" " + reason + b"\r\n" headerSequence = [response_line] headerSequence.extend(name + b": " + value + b"\r\n" for name, value in headers) diff --git a/contrib/python/Twisted/py3/twisted/web/util.py b/contrib/python/Twisted/py3/twisted/web/util.py index 3135f05cd96..756c870480e 100644 --- a/contrib/python/Twisted/py3/twisted/web/util.py +++ b/contrib/python/Twisted/py3/twisted/web/util.py @@ -9,7 +9,6 @@ An assortment of web server-related utilities. __all__ = [ "redirectTo", "Redirect", - "ChildRedirector", "ParentRedirect", "DeferredResource", "FailureElement", @@ -24,7 +23,6 @@ __all__ = [ from ._template_util import ( _PRE, - ChildRedirector, DeferredResource, FailureElement, ParentRedirect, diff --git a/contrib/python/Twisted/py3/twisted/web/wsgi.py b/contrib/python/Twisted/py3/twisted/web/wsgi.py index 43227f40e32..e979d30416e 100644 --- a/contrib/python/Twisted/py3/twisted/web/wsgi.py +++ b/contrib/python/Twisted/py3/twisted/web/wsgi.py @@ -8,6 +8,7 @@ U{Python Web Server Gateway Interface v1.0.1<http://www.python.org/dev/peps/pep- from collections.abc import Sequence from sys import exc_info +from typing import List, Union from warnings import warn from zope.interface import implementer @@ -19,79 +20,49 @@ from twisted.web.http import INTERNAL_SERVER_ERROR from twisted.web.resource import IResource from twisted.web.server import NOT_DONE_YET -# PEP-3333 -- which has superseded PEP-333 -- states that, in both Python 2 -# and Python 3, text strings MUST be represented using the platform's native -# string type, limited to characters defined in ISO-8859-1. Byte strings are -# used only for values read from wsgi.input, passed to write() or yielded by -# the application. + +# PEP-3333 -- which has superseded PEP-333 -- states that text strings MUST +# be represented using the platform's native string type, limited to +# characters defined in ISO-8859-1. Byte strings are used only for values +# read from wsgi.input, passed to write() or yielded by the application. # # Put another way: # -# - In Python 2, all text strings and binary data are of type str/bytes and -# NEVER of type unicode. Whether the strings contain binary data or -# ISO-8859-1 text depends on context. -# -# - In Python 3, all text strings are of type str, and all binary data are of +# - All text strings are of type str, and all binary data are of # type bytes. Text MUST always be limited to that which can be encoded as # ISO-8859-1, U+0000 to U+00FF inclusive. # # The following pair of functions -- _wsgiString() and _wsgiStringToBytes() -- # are used to make Twisted's WSGI support compliant with the standard. -if str is bytes: - - def _wsgiString(string): # Python 2. - """ - Convert C{string} to an ISO-8859-1 byte string, if it is not already. - - @type string: C{str}/C{bytes} or C{unicode} - @rtype: C{str}/C{bytes} - - @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. - """ - if isinstance(string, str): - return string - else: - return string.encode("iso-8859-1") - - def _wsgiStringToBytes(string): # Python 2. - """ - Return C{string} as is; a WSGI string is a byte string in Python 2. - - @type string: C{str}/C{bytes} - @rtype: C{str}/C{bytes} - """ - return string +def _wsgiString(string: Union[str, bytes]) -> str: + """ + Convert C{string} to a WSGI "bytes-as-unicode" string. -else: + If it's a byte string, decode as ISO-8859-1. If it's a Unicode string, + round-trip it to bytes and back using ISO-8859-1 as the encoding. - def _wsgiString(string): # Python 3. - """ - Convert C{string} to a WSGI "bytes-as-unicode" string. + @type string: C{str} or C{bytes} + @rtype: C{str} - If it's a byte string, decode as ISO-8859-1. If it's a Unicode string, - round-trip it to bytes and back using ISO-8859-1 as the encoding. + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + if isinstance(string, str): + return string.encode("iso-8859-1").decode("iso-8859-1") + else: + return string.decode("iso-8859-1") - @type string: C{str} or C{bytes} - @rtype: C{str} - @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. - """ - if isinstance(string, str): - return string.encode("iso-8859-1").decode("iso-8859-1") - else: - return string.decode("iso-8859-1") - - def _wsgiStringToBytes(string): # Python 3. - """ - Convert C{string} from a WSGI "bytes-as-unicode" string to an - ISO-8859-1 byte string. +def _wsgiStringToBytes(string: str) -> bytes: + """ + Convert C{string} from a WSGI "bytes-as-unicode" string to an + ISO-8859-1 byte string. - @type string: C{str} - @rtype: C{bytes} + @type string: C{str} + @rtype: C{bytes} - @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. - """ - return string.encode("iso-8859-1") + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + return string.encode("iso-8859-1") class _ErrorStream: @@ -108,7 +79,7 @@ class _ErrorStream: _log = Logger() - def write(self, data): + def write(self, data: str) -> None: """ Generate an event for the logging system with the given bytes as the message. @@ -117,27 +88,19 @@ class _ErrorStream: @type data: str - @raise TypeError: On Python 3, if C{data} is not a native string. On - Python 2 a warning will be issued. + @raise TypeError: if C{data} is not a native string. """ if not isinstance(data, str): - if str is bytes: - warn( - "write() argument should be str, not %r (%s)" - % (data, type(data).__name__), - category=UnicodeWarning, - ) - else: - raise TypeError( - "write() argument must be str, not %r (%s)" - % (data, type(data).__name__) - ) + raise TypeError( + "write() argument must be str, not %r (%s)" + % (data, type(data).__name__) + ) # Note that in old style, message was a tuple. logger._legacy # will overwrite this value if it is not properly formatted here. self._log.error(data, system="wsgi", isError=True, message=(data,)) - def writelines(self, iovec): + def writelines(self, iovec: List[str]) -> None: """ Join the given lines and pass them to C{write} to be handled in the usual way. @@ -147,8 +110,7 @@ class _ErrorStream: @param iovec: A C{list} of C{'\\n'}-terminated C{str} which will be logged. - @raise TypeError: On Python 3, if C{iovec} contains any non-native - strings. On Python 2 a warning will be issued. + @raise TypeError: if C{iovec} contains any non-native strings. """ self.write("".join(iovec)) @@ -287,9 +249,11 @@ class _WSGIResponse: # All keys and values need to be native strings, i.e. of type str in # *both* Python 2 and Python 3, so says PEP-3333. + remotePeer = request.getClientAddress() self.environ = { "REQUEST_METHOD": _wsgiString(request.method), - "REMOTE_ADDR": _wsgiString(request.getClientAddress().host), + "REMOTE_ADDR": _wsgiString(remotePeer.host), + "REMOTE_PORT": _wsgiString(str(remotePeer.port)), "SCRIPT_NAME": _wsgiString(scriptName), "PATH_INFO": _wsgiString(pathInfo), "QUERY_STRING": _wsgiString(queryString), @@ -357,8 +321,7 @@ class _WSGIResponse: raise excInfo[1].with_traceback(excInfo[2]) # PEP-3333 mandates that status should be a native string. In practice - # this is mandated by Twisted's HTTP implementation too, so we enforce - # on both Python 2 and Python 3. + # this is mandated by Twisted's HTTP implementation too. if not isinstance(status, str): raise TypeError( "status must be str, not {!r} ({})".format( @@ -535,6 +498,9 @@ class WSGIResource: An L{IResource} implementation which delegates responsibility for all resources hierarchically inferior to it to a WSGI application. + The C{environ} argument passed to the application, includes the + C{REMOTE_PORT} key to complement the C{REMOTE_ADDR} key. + @ivar _reactor: An L{IReactorThreads} provider which will be passed on to L{_WSGIResponse} to schedule calls in the I/O thread. |
