diff options
author | shmel1k <shmel1k@ydb.tech> | 2023-11-26 18:16:14 +0300 |
---|---|---|
committer | shmel1k <shmel1k@ydb.tech> | 2023-11-26 18:43:30 +0300 |
commit | b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch) | |
tree | 218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py3/twisted/web | |
parent | 523f645a83a0ec97a0332dbc3863bb354c92a328 (diff) | |
download | ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz |
add kikimr_configure
Diffstat (limited to 'contrib/python/Twisted/py3/twisted/web')
41 files changed, 20898 insertions, 0 deletions
diff --git a/contrib/python/Twisted/py3/twisted/web/__init__.py b/contrib/python/Twisted/py3/twisted/web/__init__.py new file mode 100644 index 0000000000..806dc4a2a4 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/__init__.py @@ -0,0 +1,12 @@ +# -*- test-case-name: twisted.web.test -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Twisted Web: HTTP clients and servers, plus tools for implementing them. + +Contains a L{web server<twisted.web.server>} (including an +L{HTTP implementation<twisted.web.http>}, a +L{resource model<twisted.web.resource>}), and +a L{web client<twisted.web.client>}. +""" diff --git a/contrib/python/Twisted/py3/twisted/web/_auth/__init__.py b/contrib/python/Twisted/py3/twisted/web/_auth/__init__.py new file mode 100644 index 0000000000..6a58870091 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_auth/__init__.py @@ -0,0 +1,7 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP header-based authentication migrated from web2 +""" diff --git a/contrib/python/Twisted/py3/twisted/web/_auth/basic.py b/contrib/python/Twisted/py3/twisted/web/_auth/basic.py new file mode 100644 index 0000000000..9eed46928f --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_auth/basic.py @@ -0,0 +1,58 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP BASIC authentication. + +@see: U{http://tools.ietf.org/html/rfc1945} +@see: U{http://tools.ietf.org/html/rfc2616} +@see: U{http://tools.ietf.org/html/rfc2617} +""" + + +import binascii + +from zope.interface import implementer + +from twisted.cred import credentials, error +from twisted.web.iweb import ICredentialFactory + + +@implementer(ICredentialFactory) +class BasicCredentialFactory: + """ + Credential Factory for HTTP Basic Authentication + + @type authenticationRealm: L{bytes} + @ivar authenticationRealm: The HTTP authentication realm which will be issued in + challenges. + """ + + scheme = b"basic" + + def __init__(self, authenticationRealm): + self.authenticationRealm = authenticationRealm + + def getChallenge(self, request): + """ + Return a challenge including the HTTP authentication realm with which + this factory was created. + """ + return {"realm": self.authenticationRealm} + + def decode(self, response, request): + """ + Parse the base64-encoded, colon-separated username and password into a + L{credentials.UsernamePassword} instance. + """ + try: + creds = binascii.a2b_base64(response + b"===") + except binascii.Error: + raise error.LoginFailed("Invalid credentials") + + creds = creds.split(b":", 1) + if len(creds) == 2: + return credentials.UsernamePassword(*creds) + else: + raise error.LoginFailed("Invalid credentials") diff --git a/contrib/python/Twisted/py3/twisted/web/_auth/digest.py b/contrib/python/Twisted/py3/twisted/web/_auth/digest.py new file mode 100644 index 0000000000..e77f337905 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_auth/digest.py @@ -0,0 +1,56 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Implementation of RFC2617: HTTP Digest Authentication + +@see: U{http://www.faqs.org/rfcs/rfc2617.html} +""" + + +from zope.interface import implementer + +from twisted.cred import credentials +from twisted.web.iweb import ICredentialFactory + + +@implementer(ICredentialFactory) +class DigestCredentialFactory: + """ + Wrapper for L{digest.DigestCredentialFactory} that implements the + L{ICredentialFactory} interface. + """ + + scheme = b"digest" + + def __init__(self, algorithm, authenticationRealm): + """ + Create the digest credential factory that this object wraps. + """ + self.digest = credentials.DigestCredentialFactory( + algorithm, authenticationRealm + ) + + def getChallenge(self, request): + """ + Generate the challenge for use in the WWW-Authenticate header + + @param request: The L{IRequest} to with access was denied and for the + response to which this challenge is being generated. + + @return: The L{dict} that can be used to generate a WWW-Authenticate + header. + """ + return self.digest.getChallenge(request.getClientAddress().host) + + def decode(self, response, request): + """ + Create a L{twisted.cred.credentials.DigestedCredentials} object + from the given response and request. + + @see: L{ICredentialFactory.decode} + """ + return self.digest.decode( + response, request.method, request.getClientAddress().host + ) diff --git a/contrib/python/Twisted/py3/twisted/web/_auth/wrapper.py b/contrib/python/Twisted/py3/twisted/web/_auth/wrapper.py new file mode 100644 index 0000000000..cffdcff66c --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_auth/wrapper.py @@ -0,0 +1,236 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A guard implementation which supports HTTP header-based authentication +schemes. + +If no I{Authorization} header is supplied, an anonymous login will be +attempted by using a L{Anonymous} credentials object. If such a header is +supplied and does not contain allowed credentials, or if anonymous login is +denied, a 401 will be sent in the response along with I{WWW-Authenticate} +headers for each of the allowed authentication schemes. +""" + + +from zope.interface import implementer + +from twisted.cred import error +from twisted.cred.credentials import Anonymous +from twisted.logger import Logger +from twisted.python.components import proxyForInterface +from twisted.web import util +from twisted.web.resource import IResource, _UnsafeErrorPage + + +@implementer(IResource) +class UnauthorizedResource: + """ + Simple IResource to escape Resource dispatch + """ + + isLeaf = True + + def __init__(self, factories): + self._credentialFactories = factories + + def render(self, request): + """ + Send www-authenticate headers to the client + """ + + def ensureBytes(s): + return s.encode("ascii") if isinstance(s, str) else s + + def generateWWWAuthenticate(scheme, challenge): + lst = [] + for k, v in challenge.items(): + k = ensureBytes(k) + v = ensureBytes(v) + lst.append(k + b"=" + quoteString(v)) + return b" ".join([scheme, b", ".join(lst)]) + + def quoteString(s): + return b'"' + s.replace(b"\\", rb"\\").replace(b'"', rb"\"") + b'"' + + request.setResponseCode(401) + for fact in self._credentialFactories: + challenge = fact.getChallenge(request) + request.responseHeaders.addRawHeader( + b"www-authenticate", generateWWWAuthenticate(fact.scheme, challenge) + ) + if request.method == b"HEAD": + return b"" + return b"Unauthorized" + + def getChildWithDefault(self, path, request): + """ + Disable resource dispatch + """ + return self + + def putChild(self, path, child): + # IResource.putChild + raise NotImplementedError() + + +@implementer(IResource) +class HTTPAuthSessionWrapper: + """ + Wrap a portal, enforcing supported header-based authentication schemes. + + @ivar _portal: The L{Portal} which will be used to retrieve L{IResource} + avatars. + + @ivar _credentialFactories: A list of L{ICredentialFactory} providers which + will be used to decode I{Authorization} headers into L{ICredentials} + providers. + """ + + isLeaf = False + _log = Logger() + + def __init__(self, portal, credentialFactories): + """ + Initialize a session wrapper + + @type portal: C{Portal} + @param portal: The portal that will authenticate the remote client + + @type credentialFactories: C{Iterable} + @param credentialFactories: The portal that will authenticate the + remote client based on one submitted C{ICredentialFactory} + """ + self._portal = portal + self._credentialFactories = credentialFactories + + def _authorizedResource(self, request): + """ + Get the L{IResource} which the given request is authorized to receive. + If the proper authorization headers are present, the resource will be + requested from the portal. If not, an anonymous login attempt will be + made. + """ + authheader = request.getHeader(b"authorization") + if not authheader: + return util.DeferredResource(self._login(Anonymous())) + + factory, respString = self._selectParseHeader(authheader) + if factory is None: + return UnauthorizedResource(self._credentialFactories) + try: + credentials = factory.decode(respString, request) + except error.LoginFailed: + return UnauthorizedResource(self._credentialFactories) + except BaseException: + self._log.failure("Unexpected failure from credentials factory") + return _UnsafeErrorPage(500, "Internal Error", "") + else: + return util.DeferredResource(self._login(credentials)) + + def render(self, request): + """ + Find the L{IResource} avatar suitable for the given request, if + possible, and render it. Otherwise, perhaps render an error page + requiring authorization or describing an internal server failure. + """ + return self._authorizedResource(request).render(request) + + def getChildWithDefault(self, path, request): + """ + Inspect the Authorization HTTP header, and return a deferred which, + when fired after successful authentication, will return an authorized + C{Avatar}. On authentication failure, an C{UnauthorizedResource} will + be returned, essentially halting further dispatch on the wrapped + resource and all children + """ + # Don't consume any segments of the request - this class should be + # transparent! + request.postpath.insert(0, request.prepath.pop()) + return self._authorizedResource(request) + + def _login(self, credentials): + """ + Get the L{IResource} avatar for the given credentials. + + @return: A L{Deferred} which will be called back with an L{IResource} + avatar or which will errback if authentication fails. + """ + d = self._portal.login(credentials, None, IResource) + d.addCallbacks(self._loginSucceeded, self._loginFailed) + return d + + def _loginSucceeded(self, args): + """ + Handle login success by wrapping the resulting L{IResource} avatar + so that the C{logout} callback will be invoked when rendering is + complete. + """ + interface, avatar, logout = args + + class ResourceWrapper(proxyForInterface(IResource, "resource")): + """ + Wrap an L{IResource} so that whenever it or a child of it + completes rendering, the cred logout hook will be invoked. + + An assumption is made here that exactly one L{IResource} from + among C{avatar} and all of its children will be rendered. If + more than one is rendered, C{logout} will be invoked multiple + times and probably earlier than desired. + """ + + def getChildWithDefault(self, name, request): + """ + Pass through the lookup to the wrapped resource, wrapping + the result in L{ResourceWrapper} to ensure C{logout} is + called when rendering of the child is complete. + """ + return ResourceWrapper(self.resource.getChildWithDefault(name, request)) + + def render(self, request): + """ + Hook into response generation so that when rendering has + finished completely (with or without error), C{logout} is + called. + """ + request.notifyFinish().addBoth(lambda ign: logout()) + return super().render(request) + + return ResourceWrapper(avatar) + + def _loginFailed(self, result): + """ + Handle login failure by presenting either another challenge (for + expected authentication/authorization-related failures) or a server + error page (for anything else). + """ + if result.check(error.Unauthorized, error.LoginFailed): + return UnauthorizedResource(self._credentialFactories) + else: + self._log.failure( + "HTTPAuthSessionWrapper.getChildWithDefault encountered " + "unexpected error", + failure=result, + ) + return _UnsafeErrorPage(500, "Internal Error", "") + + def _selectParseHeader(self, header): + """ + Choose an C{ICredentialFactory} from C{_credentialFactories} + suitable to use to decode the given I{Authenticate} header. + + @return: A two-tuple of a factory and the remaining portion of the + header value to be decoded or a two-tuple of L{None} if no + factory can decode the header value. + """ + elements = header.split(b" ") + scheme = elements[0].lower() + for fact in self._credentialFactories: + if fact.scheme == scheme: + return (fact, b" ".join(elements[1:])) + return (None, None) + + def putChild(self, path, child): + # IResource.putChild + raise NotImplementedError() diff --git a/contrib/python/Twisted/py3/twisted/web/_element.py b/contrib/python/Twisted/py3/twisted/web/_element.py new file mode 100644 index 0000000000..81d724071e --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_element.py @@ -0,0 +1,200 @@ +# -*- test-case-name: twisted.web.test.test_template -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +import itertools +from typing import ( + TYPE_CHECKING, + Any, + Callable, + List, + Optional, + TypeVar, + Union, + overload, +) + +from zope.interface import implementer + +from twisted.web.error import ( + MissingRenderMethod, + MissingTemplateLoader, + UnexposedMethodError, +) +from twisted.web.iweb import IRenderable, IRequest, ITemplateLoader + +if TYPE_CHECKING: + from twisted.web.template import Flattenable, Tag + + +T = TypeVar("T") +_Tc = TypeVar("_Tc", bound=Callable[..., object]) + + +class Expose: + """ + Helper for exposing methods for various uses using a simple decorator-style + callable. + + Instances of this class can be called with one or more functions as + positional arguments. The names of these functions will be added to a list + on the class object of which they are methods. + """ + + def __call__(self, f: _Tc, /, *funcObjs: Callable[..., object]) -> _Tc: + """ + Add one or more functions to the set of exposed functions. + + This is a way to declare something about a class definition, similar to + L{zope.interface.implementer}. Use it like this:: + + magic = Expose('perform extra magic') + class Foo(Bar): + def twiddle(self, x, y): + ... + def frob(self, a, b): + ... + magic(twiddle, frob) + + Later you can query the object:: + + aFoo = Foo() + magic.get(aFoo, 'twiddle')(x=1, y=2) + + The call to C{get} will fail if the name it is given has not been + exposed using C{magic}. + + @param funcObjs: One or more function objects which will be exposed to + the client. + + @return: The first of C{funcObjs}. + """ + for fObj in itertools.chain([f], funcObjs): + exposedThrough: List[Expose] = getattr(fObj, "exposedThrough", []) + exposedThrough.append(self) + setattr(fObj, "exposedThrough", exposedThrough) + return f + + _nodefault = object() + + @overload + def get(self, instance: object, methodName: str) -> Callable[..., Any]: + ... + + @overload + def get( + self, instance: object, methodName: str, default: T + ) -> Union[Callable[..., Any], T]: + ... + + def get( + self, instance: object, methodName: str, default: object = _nodefault + ) -> object: + """ + Retrieve an exposed method with the given name from the given instance. + + @raise UnexposedMethodError: Raised if C{default} is not specified and + there is no exposed method with the given name. + + @return: A callable object for the named method assigned to the given + instance. + """ + method = getattr(instance, methodName, None) + exposedThrough = getattr(method, "exposedThrough", []) + if self not in exposedThrough: + if default is self._nodefault: + raise UnexposedMethodError(self, methodName) + return default + return method + + +def exposer(thunk: Callable[..., object]) -> Expose: + expose = Expose() + expose.__doc__ = thunk.__doc__ + return expose + + +@exposer +def renderer() -> None: + """ + Decorate with L{renderer} to use methods as template render directives. + + For example:: + + class Foo(Element): + @renderer + def twiddle(self, request, tag): + return tag('Hello, world.') + + <div xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1"> + <span t:render="twiddle" /> + </div> + + Will result in this final output:: + + <div> + <span>Hello, world.</span> + </div> + """ + + +@implementer(IRenderable) +class Element: + """ + Base for classes which can render part of a page. + + An Element is a renderer that can be embedded in a stan document and can + hook its template (from the loader) up to render methods. + + An Element might be used to encapsulate the rendering of a complex piece of + data which is to be displayed in multiple different contexts. The Element + allows the rendering logic to be easily re-used in different ways. + + Element returns render methods which are registered using + L{twisted.web._element.renderer}. For example:: + + class Menu(Element): + @renderer + def items(self, request, tag): + .... + + Render methods are invoked with two arguments: first, the + L{twisted.web.http.Request} being served and second, the tag object which + "invoked" the render method. + + @ivar loader: The factory which will be used to load documents to + return from C{render}. + """ + + loader: Optional[ITemplateLoader] = None + + def __init__(self, loader: Optional[ITemplateLoader] = None): + if loader is not None: + self.loader = loader + + def lookupRenderMethod( + self, name: str + ) -> Callable[[Optional[IRequest], "Tag"], "Flattenable"]: + """ + Look up and return the named render method. + """ + method = renderer.get(self, name, None) + if method is None: + raise MissingRenderMethod(self, name) + return method + + def render(self, request: Optional[IRequest]) -> "Flattenable": + """ + Implement L{IRenderable} to allow one L{Element} to be embedded in + another's template or rendering output. + + (This will simply load the template from the C{loader}; when used in a + template, the flattening engine will keep track of this object + separately as the object to lookup renderers on and call + L{Element.renderer} to look them up. The resulting object from this + method is not directly associated with this L{Element}.) + """ + loader = self.loader + if loader is None: + raise MissingTemplateLoader(self) + return loader.load() diff --git a/contrib/python/Twisted/py3/twisted/web/_flatten.py b/contrib/python/Twisted/py3/twisted/web/_flatten.py new file mode 100644 index 0000000000..87a8bf2dfb --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_flatten.py @@ -0,0 +1,487 @@ +# -*- test-case-name: twisted.web.test.test_flatten,twisted.web.test.test_template -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Context-free flattener/serializer for rendering Python objects, possibly +complex or arbitrarily nested, as strings. +""" +from __future__ import annotations + +from inspect import iscoroutine +from io import BytesIO +from sys import exc_info +from traceback import extract_tb +from types import GeneratorType +from typing import ( + Any, + Callable, + Coroutine, + Generator, + List, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + cast, +) + +from twisted.internet.defer import Deferred, ensureDeferred +from twisted.python.compat import nativeString +from twisted.python.failure import Failure +from twisted.web._stan import CDATA, CharRef, Comment, Tag, slot, voidElements +from twisted.web.error import FlattenerError, UnfilledSlot, UnsupportedType +from twisted.web.iweb import IRenderable, IRequest + +T = TypeVar("T") + +FlattenableRecursive = Any +""" +For documentation purposes, read C{FlattenableRecursive} as L{Flattenable}. +However, since mypy doesn't support recursive type definitions (yet?), +we'll put Any in the actual definition. +""" + +Flattenable = Union[ + bytes, + str, + slot, + CDATA, + Comment, + Tag, + Tuple[FlattenableRecursive, ...], + List[FlattenableRecursive], + Generator[FlattenableRecursive, None, None], + CharRef, + Deferred[FlattenableRecursive], + Coroutine[Deferred[FlattenableRecursive], object, FlattenableRecursive], + IRenderable, +] +""" +Type alias containing all types that can be flattened by L{flatten()}. +""" + +# The maximum number of bytes to synchronously accumulate in the flattener +# buffer before delivering them onwards. +BUFFER_SIZE = 2**16 + + +def escapeForContent(data: Union[bytes, str]) -> bytes: + """ + Escape some character or UTF-8 byte data for inclusion in an HTML or XML + document, by replacing metacharacters (C{&<>}) with their entity + equivalents (C{&<>}). + + This is used as an input to L{_flattenElement}'s C{dataEscaper} parameter. + + @param data: The string to escape. + + @return: The quoted form of C{data}. If C{data} is L{str}, return a utf-8 + encoded string. + """ + if isinstance(data, str): + data = data.encode("utf-8") + data = data.replace(b"&", b"&").replace(b"<", b"<").replace(b">", b">") + return data + + +def attributeEscapingDoneOutside(data: Union[bytes, str]) -> bytes: + """ + Escape some character or UTF-8 byte data for inclusion in the top level of + an attribute. L{attributeEscapingDoneOutside} actually passes the data + through unchanged, because L{writeWithAttributeEscaping} handles the + quoting of the text within attributes outside the generator returned by + L{_flattenElement}; this is used as the C{dataEscaper} argument to that + L{_flattenElement} call so that that generator does not redundantly escape + its text output. + + @param data: The string to escape. + + @return: The string, unchanged, except for encoding. + """ + if isinstance(data, str): + return data.encode("utf-8") + return data + + +def writeWithAttributeEscaping( + write: Callable[[bytes], object] +) -> Callable[[bytes], None]: + """ + Decorate a C{write} callable so that all output written is properly quoted + for inclusion within an XML attribute value. + + If a L{Tag <twisted.web.template.Tag>} C{x} is flattened within the context + of the contents of another L{Tag <twisted.web.template.Tag>} C{y}, the + metacharacters (C{<>&"}) delimiting C{x} should be passed through + unchanged, but the textual content of C{x} should still be quoted, as + usual. For example: C{<y><x>&</x></y>}. That is the default behavior + of L{_flattenElement} when L{escapeForContent} is passed as the + C{dataEscaper}. + + However, when a L{Tag <twisted.web.template.Tag>} C{x} is flattened within + the context of an I{attribute} of another L{Tag <twisted.web.template.Tag>} + C{y}, then the metacharacters delimiting C{x} should be quoted so that it + can be parsed from the attribute's value. In the DOM itself, this is not a + valid thing to do, but given that renderers and slots may be freely moved + around in a L{twisted.web.template} template, it is a condition which may + arise in a document and must be handled in a way which produces valid + output. So, for example, you should be able to get C{<y attr="<x />" + />}. This should also be true for other XML/HTML meta-constructs such as + comments and CDATA, so if you were to serialize a L{comment + <twisted.web.template.Comment>} in an attribute you should get C{<y + attr="<-- comment -->" />}. Therefore in order to capture these + meta-characters, flattening is done with C{write} callable that is wrapped + with L{writeWithAttributeEscaping}. + + The final case, and hopefully the much more common one as compared to + serializing L{Tag <twisted.web.template.Tag>} and arbitrary L{IRenderable} + objects within an attribute, is to serialize a simple string, and those + should be passed through for L{writeWithAttributeEscaping} to quote + without applying a second, redundant level of quoting. + + @param write: A callable which will be invoked with the escaped L{bytes}. + + @return: A callable that writes data with escaping. + """ + + def _write(data: bytes) -> None: + write(escapeForContent(data).replace(b'"', b""")) + + return _write + + +def escapedCDATA(data: Union[bytes, str]) -> bytes: + """ + Escape CDATA for inclusion in a document. + + @param data: The string to escape. + + @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8 + encoded string. + """ + if isinstance(data, str): + data = data.encode("utf-8") + return data.replace(b"]]>", b"]]]]><![CDATA[>") + + +def escapedComment(data: Union[bytes, str]) -> bytes: + """ + Within comments the sequence C{-->} can be mistaken as the end of the comment. + To ensure consistent parsing and valid output the sequence is replaced with C{-->}. + Furthermore, whitespace is added when a comment ends in a dash. This is done to break + the connection of the ending C{-} with the closing C{-->}. + + @param data: The string to escape. + + @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8 + encoded string. + """ + if isinstance(data, str): + data = data.encode("utf-8") + data = data.replace(b"-->", b"-->") + if data and data[-1:] == b"-": + data += b" " + return data + + +def _getSlotValue( + name: str, + slotData: Sequence[Optional[Mapping[str, Flattenable]]], + default: Optional[Flattenable] = None, +) -> Flattenable: + """ + Find the value of the named slot in the given stack of slot data. + """ + for slotFrame in reversed(slotData): + if slotFrame is not None and name in slotFrame: + return slotFrame[name] + else: + if default is not None: + return default + raise UnfilledSlot(name) + + +def _fork(d: Deferred[T]) -> Deferred[T]: + """ + Create a new L{Deferred} based on C{d} that will fire and fail with C{d}'s + result or error, but will not modify C{d}'s callback type. + """ + d2: Deferred[T] = Deferred(lambda _: d.cancel()) + + def callback(result: T) -> T: + d2.callback(result) + return result + + def errback(failure: Failure) -> Failure: + d2.errback(failure) + return failure + + d.addCallbacks(callback, errback) + return d2 + + +def _flattenElement( + request: Optional[IRequest], + root: Flattenable, + write: Callable[[bytes], object], + slotData: List[Optional[Mapping[str, Flattenable]]], + renderFactory: Optional[IRenderable], + dataEscaper: Callable[[Union[bytes, str]], bytes], + # This is annotated as Generator[T, None, None] instead of Iterator[T] + # because mypy does not consider an Iterator to be an instance of + # GeneratorType. +) -> Generator[Union[Generator[Any, Any, Any], Deferred[Flattenable]], None, None]: + """ + Make C{root} slightly more flat by yielding all its immediate contents as + strings, deferreds or generators that are recursive calls to itself. + + @param request: A request object which will be passed to + L{IRenderable.render}. + + @param root: An object to be made flatter. This may be of type C{unicode}, + L{str}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, L{list}, + L{types.GeneratorType}, L{Deferred}, or an object that implements + L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @param slotData: A L{list} of L{dict} mapping L{str} slot names to data + with which those slots will be replaced. + + @param renderFactory: If not L{None}, an object that provides + L{IRenderable}. + + @param dataEscaper: A 1-argument callable which takes L{bytes} or + L{unicode} and returns L{bytes}, quoted as appropriate for the + rendering context. This is really only one of two values: + L{attributeEscapingDoneOutside} or L{escapeForContent}, depending on + whether the rendering context is within an attribute or not. See the + explanation in L{writeWithAttributeEscaping}. + + @return: An iterator that eventually writes L{bytes} to C{write}. + It can yield other iterators or L{Deferred}s; if it yields another + iterator, the caller will iterate it; if it yields a L{Deferred}, + the result of that L{Deferred} will be another generator, in which + case it is iterated. See L{_flattenTree} for the trampoline that + consumes said values. + """ + + def keepGoing( + newRoot: Flattenable, + dataEscaper: Callable[[Union[bytes, str]], bytes] = dataEscaper, + renderFactory: Optional[IRenderable] = renderFactory, + write: Callable[[bytes], object] = write, + ) -> Generator[Union[Flattenable, Deferred[Flattenable]], None, None]: + return _flattenElement( + request, newRoot, write, slotData, renderFactory, dataEscaper + ) + + def keepGoingAsync(result: Deferred[Flattenable]) -> Deferred[Flattenable]: + return result.addCallback(keepGoing) + + if isinstance(root, (bytes, str)): + write(dataEscaper(root)) + elif isinstance(root, slot): + slotValue = _getSlotValue(root.name, slotData, root.default) + yield keepGoing(slotValue) + elif isinstance(root, CDATA): + write(b"<![CDATA[") + write(escapedCDATA(root.data)) + write(b"]]>") + elif isinstance(root, Comment): + write(b"<!--") + write(escapedComment(root.data)) + write(b"-->") + elif isinstance(root, Tag): + slotData.append(root.slotData) + rendererName = root.render + if rendererName is not None: + if renderFactory is None: + raise ValueError( + f'Tag wants to be rendered by method "{rendererName}" ' + f"but is not contained in any IRenderable" + ) + rootClone = root.clone(False) + rootClone.render = None + renderMethod = renderFactory.lookupRenderMethod(rendererName) + result = renderMethod(request, rootClone) + yield keepGoing(result) + slotData.pop() + return + + if not root.tagName: + yield keepGoing(root.children) + return + + write(b"<") + if isinstance(root.tagName, str): + tagName = root.tagName.encode("ascii") + else: + tagName = root.tagName + write(tagName) + for k, v in root.attributes.items(): + if isinstance(k, str): + k = k.encode("ascii") + write(b" " + k + b'="') + # Serialize the contents of the attribute, wrapping the results of + # that serialization so that _everything_ is quoted. + yield keepGoing( + v, attributeEscapingDoneOutside, write=writeWithAttributeEscaping(write) + ) + write(b'"') + if root.children or nativeString(tagName) not in voidElements: + write(b">") + # Regardless of whether we're in an attribute or not, switch back + # to the escapeForContent dataEscaper. The contents of a tag must + # be quoted no matter what; in the top-level document, just so + # they're valid, and if they're within an attribute, they have to + # be quoted so that after applying the *un*-quoting required to re- + # parse the tag within the attribute, all the quoting is still + # correct. + yield keepGoing(root.children, escapeForContent) + write(b"</" + tagName + b">") + else: + write(b" />") + + elif isinstance(root, (tuple, list, GeneratorType)): + for element in root: + yield keepGoing(element) + elif isinstance(root, CharRef): + escaped = "&#%d;" % (root.ordinal,) + write(escaped.encode("ascii")) + elif isinstance(root, Deferred): + yield keepGoingAsync(_fork(root)) + elif iscoroutine(root): + yield keepGoingAsync( + Deferred.fromCoroutine( + cast(Coroutine[Deferred[Flattenable], object, Flattenable], root) + ) + ) + elif IRenderable.providedBy(root): + result = root.render(request) + yield keepGoing(result, renderFactory=root) + else: + raise UnsupportedType(root) + + +async def _flattenTree( + request: Optional[IRequest], root: Flattenable, write: Callable[[bytes], object] +) -> None: + """ + Make C{root} into an iterable of L{bytes} and L{Deferred} by doing a depth + first traversal of the tree. + + @param request: A request object which will be passed to + L{IRenderable.render}. + + @param root: An object to be made flatter. This may be of type C{unicode}, + L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, + L{list}, L{types.GeneratorType}, L{Deferred}, or something providing + L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @return: A C{Deferred}-returning coroutine that resolves to C{None}. + """ + buf = [] + bufSize = 0 + + # Accumulate some bytes up to the buffer size so that we don't annoy the + # upstream writer with a million tiny string. + def bufferedWrite(bs: bytes) -> None: + nonlocal bufSize + buf.append(bs) + bufSize += len(bs) + if bufSize >= BUFFER_SIZE: + flushBuffer() + + # Deliver the buffered content to the upstream writer as a single string. + # This is how a "big enough" buffer gets delivered, how a buffer of any + # size is delivered before execution is suspended to wait for an + # asynchronous value, and how anything left in the buffer when we're + # finished is delivered. + def flushBuffer() -> None: + nonlocal bufSize + if bufSize > 0: + write(b"".join(buf)) + del buf[:] + bufSize = 0 + + stack: List[Generator[Any, Any, Any]] = [ + _flattenElement(request, root, bufferedWrite, [], None, escapeForContent) + ] + + while stack: + try: + frame = stack[-1].gi_frame + element = next(stack[-1]) + if isinstance(element, Deferred): + # Before suspending flattening for an unknown amount of time, + # flush whatever data we have collected so far. + flushBuffer() + element = await element + except StopIteration: + stack.pop() + except Exception as e: + stack.pop() + roots = [] + for generator in stack: + roots.append(generator.gi_frame.f_locals["root"]) + roots.append(frame.f_locals["root"]) + raise FlattenerError(e, roots, extract_tb(exc_info()[2])) + else: + stack.append(element) + + # Flush any data that remains in the buffer before finishing. + flushBuffer() + + +def flatten( + request: Optional[IRequest], root: Flattenable, write: Callable[[bytes], object] +) -> Deferred[None]: + """ + Incrementally write out a string representation of C{root} using C{write}. + + In order to create a string representation, C{root} will be decomposed into + simpler objects which will themselves be decomposed and so on until strings + or objects which can easily be converted to strings are encountered. + + @param request: A request object which will be passed to the C{render} + method of any L{IRenderable} provider which is encountered. + + @param root: An object to be made flatter. This may be of type L{str}, + L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, + L{list}, L{types.GeneratorType}, L{Deferred}, or something that + provides L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @return: A L{Deferred} which will be called back with C{None} when C{root} + has been completely flattened into C{write} or which will be errbacked + if an unexpected exception occurs. + """ + return ensureDeferred(_flattenTree(request, root, write)) + + +def flattenString(request: Optional[IRequest], root: Flattenable) -> Deferred[bytes]: + """ + Collate a string representation of C{root} into a single string. + + This is basically gluing L{flatten} to an L{io.BytesIO} and returning + the results. See L{flatten} for the exact meanings of C{request} and + C{root}. + + @return: A L{Deferred} which will be called back with a single UTF-8 encoded + string as its result when C{root} has been completely flattened or which + will be errbacked if an unexpected exception occurs. + """ + io = BytesIO() + d = flatten(request, root, io.write) + d.addCallback(lambda _: io.getvalue()) + return cast(Deferred[bytes], d) diff --git a/contrib/python/Twisted/py3/twisted/web/_http2.py b/contrib/python/Twisted/py3/twisted/web/_http2.py new file mode 100644 index 0000000000..57762e1805 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_http2.py @@ -0,0 +1,1283 @@ +# -*- test-case-name: twisted.web.test.test_http2 -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP2 Implementation + +This is the basic server-side protocol implementation used by the Twisted +Web server for HTTP2. This functionality is intended to be combined with the +HTTP/1.1 and HTTP/1.0 functionality in twisted.web.http to provide complete +protocol support for HTTP-type protocols. + +This API is currently considered private because it's in early draft form. When +it has stabilised, it'll be made public. +""" + + +import io +from collections import deque +from typing import List + +from zope.interface import implementer + +import h2.config # type: ignore[import] +import h2.connection # type: ignore[import] +import h2.errors # type: ignore[import] +import h2.events # type: ignore[import] +import h2.exceptions # type: ignore[import] +import priority # type: ignore[import] + +from twisted.internet._producer_helpers import _PullToPush +from twisted.internet.defer import Deferred +from twisted.internet.error import ConnectionLost +from twisted.internet.interfaces import ( + IConsumer, + IProtocol, + IPushProducer, + ISSLTransport, + ITransport, +) +from twisted.internet.protocol import Protocol +from twisted.logger import Logger +from twisted.protocols.policies import TimeoutMixin +from twisted.python.failure import Failure +from twisted.web.error import ExcessiveBufferingError + +# This API is currently considered private. +__all__: List[str] = [] + + +_END_STREAM_SENTINEL = object() + + +@implementer(IProtocol, IPushProducer) +class H2Connection(Protocol, TimeoutMixin): + """ + A class representing a single HTTP/2 connection. + + This implementation of L{IProtocol} works hand in hand with L{H2Stream}. + This is because we have the requirement to register multiple producers for + a single HTTP/2 connection, one for each stream. The standard Twisted + interfaces don't really allow for this, so instead there's a custom + interface between the two objects that allows them to work hand-in-hand here. + + @ivar conn: The HTTP/2 connection state machine. + @type conn: L{h2.connection.H2Connection} + + @ivar streams: A mapping of stream IDs to L{H2Stream} objects, used to call + specific methods on streams when events occur. + @type streams: L{dict}, mapping L{int} stream IDs to L{H2Stream} objects. + + @ivar priority: A HTTP/2 priority tree used to ensure that responses are + prioritised appropriately. + @type priority: L{priority.PriorityTree} + + @ivar _consumerBlocked: A flag tracking whether or not the L{IConsumer} + that is consuming this data has asked us to stop producing. + @type _consumerBlocked: L{bool} + + @ivar _sendingDeferred: A L{Deferred} used to restart the data-sending loop + when more response data has been produced. Will not be present if there + is outstanding data still to send. + @type _consumerBlocked: A L{twisted.internet.defer.Deferred}, or L{None} + + @ivar _outboundStreamQueues: A map of stream IDs to queues, used to store + data blocks that are yet to be sent on the connection. These are used + both to handle producers that do not respect L{IConsumer} but also to + allow priority to multiplex data appropriately. + @type _outboundStreamQueues: A L{dict} mapping L{int} stream IDs to + L{collections.deque} queues, which contain either L{bytes} objects or + C{_END_STREAM_SENTINEL}. + + @ivar _sender: A handle to the data-sending loop, allowing it to be + terminated if needed. + @type _sender: L{twisted.internet.task.LoopingCall} + + @ivar abortTimeout: The number of seconds to wait after we attempt to shut + the transport down cleanly to give up and forcibly terminate it. This + is only used when we time a connection out, to prevent errors causing + the FD to get leaked. If this is L{None}, we will wait forever. + @type abortTimeout: L{int} + + @ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be + used to forcibly close the transport if it doesn't close cleanly. + @type _abortingCall: L{twisted.internet.base.DelayedCall} + """ + + factory = None + site = None + abortTimeout = 15 + + _log = Logger() + _abortingCall = None + + def __init__(self, reactor=None): + config = h2.config.H2Configuration(client_side=False, header_encoding=None) + self.conn = h2.connection.H2Connection(config=config) + self.streams = {} + + self.priority = priority.PriorityTree() + self._consumerBlocked = None + self._sendingDeferred = None + self._outboundStreamQueues = {} + self._streamCleanupCallbacks = {} + self._stillProducing = True + + # Limit the number of buffered control frame (e.g. PING and + # SETTINGS) bytes. + self._maxBufferedControlFrameBytes = 1024 * 17 + self._bufferedControlFrames = deque() + self._bufferedControlFrameBytes = 0 + + if reactor is None: + from twisted.internet import reactor + self._reactor = reactor + + # Start the data sending function. + self._reactor.callLater(0, self._sendPrioritisedData) + + # Implementation of IProtocol + def connectionMade(self): + """ + Called by the reactor when a connection is received. May also be called + by the L{twisted.web.http._GenericHTTPChannelProtocol} during upgrade + to HTTP/2. + """ + self.setTimeout(self.timeOut) + self.conn.initiate_connection() + self.transport.write(self.conn.data_to_send()) + + def dataReceived(self, data): + """ + Called whenever a chunk of data is received from the transport. + + @param data: The data received from the transport. + @type data: L{bytes} + """ + try: + events = self.conn.receive_data(data) + except h2.exceptions.ProtocolError: + stillActive = self._tryToWriteControlData() + if stillActive: + self.transport.loseConnection() + self.connectionLost(Failure(), _cancelTimeouts=False) + return + + # Only reset the timeout if we've received an actual H2 + # protocol message + self.resetTimeout() + + for event in events: + if isinstance(event, h2.events.RequestReceived): + self._requestReceived(event) + elif isinstance(event, h2.events.DataReceived): + self._requestDataReceived(event) + elif isinstance(event, h2.events.StreamEnded): + self._requestEnded(event) + elif isinstance(event, h2.events.StreamReset): + self._requestAborted(event) + elif isinstance(event, h2.events.WindowUpdated): + self._handleWindowUpdate(event) + elif isinstance(event, h2.events.PriorityUpdated): + self._handlePriorityUpdate(event) + elif isinstance(event, h2.events.ConnectionTerminated): + self.transport.loseConnection() + self.connectionLost( + Failure(ConnectionLost("Remote peer sent GOAWAY")), + _cancelTimeouts=False, + ) + + self._tryToWriteControlData() + + def timeoutConnection(self): + """ + Called when the connection has been inactive for + L{self.timeOut<twisted.protocols.policies.TimeoutMixin.timeOut>} + seconds. Cleanly tears the connection down, attempting to notify the + peer if needed. + + We override this method to add two extra bits of functionality: + + - We want to log the timeout. + - We want to send a GOAWAY frame indicating that the connection is + being terminated, and whether it was clean or not. We have to do this + before the connection is torn down. + """ + self._log.info("Timing out client {client}", client=self.transport.getPeer()) + + # Check whether there are open streams. If there are, we're going to + # want to use the error code PROTOCOL_ERROR. If there aren't, use + # NO_ERROR. + if self.conn.open_outbound_streams > 0 or self.conn.open_inbound_streams > 0: + error_code = h2.errors.ErrorCodes.PROTOCOL_ERROR + else: + error_code = h2.errors.ErrorCodes.NO_ERROR + + self.conn.close_connection(error_code=error_code) + self.transport.write(self.conn.data_to_send()) + + # Don't let the client hold this connection open too long. + if self.abortTimeout is not None: + # We use self.callLater because that's what TimeoutMixin does, even + # though we have a perfectly good reactor sitting around. See + # https://twistedmatrix.com/trac/ticket/8488. + self._abortingCall = self.callLater( + self.abortTimeout, self.forceAbortClient + ) + + # We're done, throw the connection away. + self.transport.loseConnection() + + def forceAbortClient(self): + """ + Called if C{abortTimeout} seconds have passed since the timeout fired, + and the connection still hasn't gone away. This can really only happen + on extremely bad connections or when clients are maliciously attempting + to keep connections open. + """ + self._log.info( + "Forcibly timing out client: {client}", client=self.transport.getPeer() + ) + # We want to lose track of the _abortingCall so that no-one tries to + # cancel it. + self._abortingCall = None + self.transport.abortConnection() + + def connectionLost(self, reason, _cancelTimeouts=True): + """ + Called when the transport connection is lost. + + Informs all outstanding response handlers that the connection + has been lost, and cleans up all internal state. + + @param reason: See L{IProtocol.connectionLost} + + @param _cancelTimeouts: Propagate the C{reason} to this + connection's streams but don't cancel any timers, so that + peers who never read the data we've written are eventually + timed out. + """ + self._stillProducing = False + if _cancelTimeouts: + self.setTimeout(None) + + for stream in self.streams.values(): + stream.connectionLost(reason) + + for streamID in list(self.streams.keys()): + self._requestDone(streamID) + + # If we were going to force-close the transport, we don't have to now. + if _cancelTimeouts and self._abortingCall is not None: + self._abortingCall.cancel() + self._abortingCall = None + + # Implementation of IPushProducer + # + # Here's how we handle IPushProducer. We have multiple outstanding + # H2Streams. Each of these exposes an IConsumer interface to the response + # handler that allows it to push data into the H2Stream. The H2Stream then + # writes the data into the H2Connection object. + # + # The H2Connection needs to manage these writes to account for: + # + # - flow control + # - priority + # + # We manage each of these in different ways. + # + # For flow control, we simply use the equivalent of the IPushProducer + # interface. We simply tell the H2Stream: "Hey, you can't send any data + # right now, sorry!". When that stream becomes unblocked, we free it up + # again. This allows the H2Stream to propagate this backpressure up the + # chain. + # + # For priority, we need to keep a backlog of data frames that we can send, + # and interleave them appropriately. This backlog is most sensibly kept in + # the H2Connection object itself. We keep one queue per stream, which is + # where the writes go, and then we have a loop that manages popping these + # streams off in priority order. + # + # Logically then, we go as follows: + # + # 1. Stream calls writeDataToStream(). This causes a DataFrame to be placed + # on the queue for that stream. It also informs the priority + # implementation that this stream is unblocked. + # 2. The _sendPrioritisedData() function spins in a tight loop. Each + # iteration it asks the priority implementation which stream should send + # next, and pops a data frame off that stream's queue. If, after sending + # that frame, there is no data left on that stream's queue, the function + # informs the priority implementation that the stream is blocked. + # + # If all streams are blocked, or if there are no outstanding streams, the + # _sendPrioritisedData function waits to be awoken when more data is ready + # to send. + # + # Note that all of this only applies to *data*. Headers and other control + # frames deliberately skip this processing as they are not subject to flow + # control or priority constraints. Instead, they are stored in their own buffer + # which is used primarily to detect excessive buffering. + def stopProducing(self): + """ + Stop producing data. + + This tells the L{H2Connection} that its consumer has died, so it must + stop producing data for good. + """ + self.connectionLost(Failure(ConnectionLost("Producing stopped"))) + + def pauseProducing(self): + """ + Pause producing data. + + Tells the L{H2Connection} that it has produced too much data to process + for the time being, and to stop until resumeProducing() is called. + """ + self._consumerBlocked = Deferred() + # Ensure pending control data (if any) are sent first. + self._consumerBlocked.addCallback(self._flushBufferedControlData) + + def resumeProducing(self): + """ + Resume producing data. + + This tells the L{H2Connection} to re-add itself to the main loop and + produce more data for the consumer. + """ + if self._consumerBlocked is not None: + d = self._consumerBlocked + self._consumerBlocked = None + d.callback(None) + + def _sendPrioritisedData(self, *args): + """ + The data sending loop. This function repeatedly calls itself, either + from L{Deferred}s or from + L{reactor.callLater<twisted.internet.interfaces.IReactorTime.callLater>} + + This function sends data on streams according to the rules of HTTP/2 + priority. It ensures that the data from each stream is interleved + according to the priority signalled by the client, making sure that the + connection is used with maximal efficiency. + + This function will execute if data is available: if all data is + exhausted, the function will place a deferred onto the L{H2Connection} + object and wait until it is called to resume executing. + """ + # If producing has stopped, we're done. Don't reschedule ourselves + if not self._stillProducing: + return + + stream = None + + while stream is None: + try: + stream = next(self.priority) + except priority.DeadlockError: + # All streams are currently blocked or not progressing. Wait + # until a new one becomes available. + assert self._sendingDeferred is None + self._sendingDeferred = Deferred() + self._sendingDeferred.addCallback(self._sendPrioritisedData) + return + + # Wait behind the transport. + if self._consumerBlocked is not None: + self._consumerBlocked.addCallback(self._sendPrioritisedData) + return + + self.resetTimeout() + + remainingWindow = self.conn.local_flow_control_window(stream) + frameData = self._outboundStreamQueues[stream].popleft() + maxFrameSize = min(self.conn.max_outbound_frame_size, remainingWindow) + + if frameData is _END_STREAM_SENTINEL: + # There's no error handling here even though this can throw + # ProtocolError because we really shouldn't encounter this problem. + # If we do, that's a nasty bug. + self.conn.end_stream(stream) + self.transport.write(self.conn.data_to_send()) + + # Clean up the stream + self._requestDone(stream) + else: + # Respect the max frame size. + if len(frameData) > maxFrameSize: + excessData = frameData[maxFrameSize:] + frameData = frameData[:maxFrameSize] + self._outboundStreamQueues[stream].appendleft(excessData) + + # There's deliberately no error handling here, because this just + # absolutely should not happen. + # If for whatever reason the max frame length is zero and so we + # have no frame data to send, don't send any. + if frameData: + self.conn.send_data(stream, frameData) + self.transport.write(self.conn.data_to_send()) + + # If there's no data left, this stream is now blocked. + if not self._outboundStreamQueues[stream]: + self.priority.block(stream) + + # Also, if the stream's flow control window is exhausted, tell it + # to stop. + if self.remainingOutboundWindow(stream) <= 0: + self.streams[stream].flowControlBlocked() + + self._reactor.callLater(0, self._sendPrioritisedData) + + # Internal functions. + def _requestReceived(self, event): + """ + Internal handler for when a request has been received. + + @param event: The Hyper-h2 event that encodes information about the + received request. + @type event: L{h2.events.RequestReceived} + """ + stream = H2Stream( + event.stream_id, + self, + event.headers, + self.requestFactory, + self.site, + self.factory, + ) + self.streams[event.stream_id] = stream + self._streamCleanupCallbacks[event.stream_id] = Deferred() + self._outboundStreamQueues[event.stream_id] = deque() + + # Add the stream to the priority tree but immediately block it. + try: + self.priority.insert_stream(event.stream_id) + except priority.DuplicateStreamError: + # Stream already in the tree. This can happen if we received a + # PRIORITY frame before a HEADERS frame. Just move on: we set the + # stream up properly in _handlePriorityUpdate. + pass + else: + self.priority.block(event.stream_id) + + def _requestDataReceived(self, event): + """ + Internal handler for when a chunk of data is received for a given + request. + + @param event: The Hyper-h2 event that encodes information about the + received data. + @type event: L{h2.events.DataReceived} + """ + stream = self.streams[event.stream_id] + stream.receiveDataChunk(event.data, event.flow_controlled_length) + + def _requestEnded(self, event): + """ + Internal handler for when a request is complete, and we expect no + further data for that request. + + @param event: The Hyper-h2 event that encodes information about the + completed stream. + @type event: L{h2.events.StreamEnded} + """ + stream = self.streams[event.stream_id] + stream.requestComplete() + + def _requestAborted(self, event): + """ + Internal handler for when a request is aborted by a remote peer. + + @param event: The Hyper-h2 event that encodes information about the + reset stream. + @type event: L{h2.events.StreamReset} + """ + stream = self.streams[event.stream_id] + stream.connectionLost( + Failure(ConnectionLost("Stream reset with code %s" % event.error_code)) + ) + self._requestDone(event.stream_id) + + def _handlePriorityUpdate(self, event): + """ + Internal handler for when a stream priority is updated. + + @param event: The Hyper-h2 event that encodes information about the + stream reprioritization. + @type event: L{h2.events.PriorityUpdated} + """ + try: + self.priority.reprioritize( + stream_id=event.stream_id, + depends_on=event.depends_on or None, + weight=event.weight, + exclusive=event.exclusive, + ) + except priority.MissingStreamError: + # A PRIORITY frame arrived before the HEADERS frame that would + # trigger us to insert the stream into the tree. That's fine: we + # can create the stream here and mark it as blocked. + self.priority.insert_stream( + stream_id=event.stream_id, + depends_on=event.depends_on or None, + weight=event.weight, + exclusive=event.exclusive, + ) + self.priority.block(event.stream_id) + + def writeHeaders(self, version, code, reason, headers, streamID): + """ + Called by L{twisted.web.http.Request} objects to write a complete set + of HTTP headers to a stream. + + @param version: The HTTP version in use. Unused in HTTP/2. + @type version: L{bytes} + + @param code: The HTTP status code to write. + @type code: L{bytes} + + @param reason: The HTTP reason phrase to write. Unused in HTTP/2. + @type reason: L{bytes} + + @param headers: The headers to write to the stream. + @type headers: L{twisted.web.http_headers.Headers} + + @param streamID: The ID of the stream to write the headers to. + @type streamID: L{int} + """ + headers.insert(0, (b":status", code)) + + try: + self.conn.send_headers(streamID, headers) + except h2.exceptions.StreamClosedError: + # Stream was closed by the client at some point. We need to not + # explode here: just swallow the error. That's what write() does + # when a connection is lost, so that's what we do too. + return + else: + self._tryToWriteControlData() + + def writeDataToStream(self, streamID, data): + """ + May be called by L{H2Stream} objects to write response data to a given + stream. Writes a single data frame. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + + @param data: The data chunk to write to the stream. + @type data: L{bytes} + """ + self._outboundStreamQueues[streamID].append(data) + + # There's obviously no point unblocking this stream and the sending + # loop if the data can't actually be sent, so confirm that there's + # some room to send data. + if self.conn.local_flow_control_window(streamID) > 0: + self.priority.unblock(streamID) + if self._sendingDeferred is not None: + d = self._sendingDeferred + self._sendingDeferred = None + d.callback(streamID) + + if self.remainingOutboundWindow(streamID) <= 0: + self.streams[streamID].flowControlBlocked() + + def endRequest(self, streamID): + """ + Called by L{H2Stream} objects to signal completion of a response. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + """ + self._outboundStreamQueues[streamID].append(_END_STREAM_SENTINEL) + self.priority.unblock(streamID) + if self._sendingDeferred is not None: + d = self._sendingDeferred + self._sendingDeferred = None + d.callback(streamID) + + def abortRequest(self, streamID): + """ + Called by L{H2Stream} objects to request early termination of a stream. + This emits a RstStream frame and then removes all stream state. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + """ + self.conn.reset_stream(streamID) + stillActive = self._tryToWriteControlData() + if stillActive: + self._requestDone(streamID) + + def _requestDone(self, streamID): + """ + Called internally by the data sending loop to clean up state that was + being used for the stream. Called when the stream is complete. + + @param streamID: The ID of the stream to clean up state for. + @type streamID: L{int} + """ + del self._outboundStreamQueues[streamID] + self.priority.remove_stream(streamID) + del self.streams[streamID] + cleanupCallback = self._streamCleanupCallbacks.pop(streamID) + cleanupCallback.callback(streamID) + + def remainingOutboundWindow(self, streamID): + """ + Called to determine how much room is left in the send window for a + given stream. Allows us to handle blocking and unblocking producers. + + @param streamID: The ID of the stream whose flow control window we'll + check. + @type streamID: L{int} + + @return: The amount of room remaining in the send window for the given + stream, including the data queued to be sent. + @rtype: L{int} + """ + # TODO: This involves a fair bit of looping and computation for + # something that is called a lot. Consider caching values somewhere. + windowSize = self.conn.local_flow_control_window(streamID) + sendQueue = self._outboundStreamQueues[streamID] + alreadyConsumed = sum( + len(chunk) for chunk in sendQueue if chunk is not _END_STREAM_SENTINEL + ) + + return windowSize - alreadyConsumed + + def _handleWindowUpdate(self, event): + """ + Manage flow control windows. + + Streams that are blocked on flow control will register themselves with + the connection. This will fire deferreds that wake those streams up and + allow them to continue processing. + + @param event: The Hyper-h2 event that encodes information about the + flow control window change. + @type event: L{h2.events.WindowUpdated} + """ + streamID = event.stream_id + + if streamID: + if not self._streamIsActive(streamID): + # We may have already cleaned up our stream state, making this + # a late WINDOW_UPDATE frame. That's fine: the update is + # unnecessary but benign. We'll ignore it. + return + + # If we haven't got any data to send, don't unblock the stream. If + # we do, we'll eventually get an exception inside the + # _sendPrioritisedData loop some time later. + if self._outboundStreamQueues.get(streamID): + self.priority.unblock(streamID) + self.streams[streamID].windowUpdated() + else: + # Update strictly applies to all streams. + for stream in self.streams.values(): + stream.windowUpdated() + + # If we still have data to send for this stream, unblock it. + if self._outboundStreamQueues.get(stream.streamID): + self.priority.unblock(stream.streamID) + + def getPeer(self): + """ + Get the remote address of this connection. + + Treat this method with caution. It is the unfortunate result of the + CGI and Jabber standards, but should not be considered reliable for + the usual host of reasons; port forwarding, proxying, firewalls, IP + masquerading, etc. + + @return: An L{IAddress} provider. + """ + return self.transport.getPeer() + + def getHost(self): + """ + Similar to getPeer, but returns an address describing this side of the + connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getHost() + + def openStreamWindow(self, streamID, increment): + """ + Open the stream window by a given increment. + + @param streamID: The ID of the stream whose window needs to be opened. + @type streamID: L{int} + + @param increment: The amount by which the stream window must be + incremented. + @type increment: L{int} + """ + self.conn.acknowledge_received_data(increment, streamID) + self._tryToWriteControlData() + + def _isSecure(self): + """ + Returns L{True} if this channel is using a secure transport. + + @returns: L{True} if this channel is secure. + @rtype: L{bool} + """ + # A channel is secure if its transport is ISSLTransport. + return ISSLTransport(self.transport, None) is not None + + def _send100Continue(self, streamID): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + + @param streamID: The ID of the stream that needs the 100 Continue + response + @type streamID: L{int} + """ + headers = [(b":status", b"100")] + self.conn.send_headers(headers=headers, stream_id=streamID) + self._tryToWriteControlData() + + def _respondToBadRequestAndDisconnect(self, streamID): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + + Unlike in the HTTP/1.1 case, this does not actually disconnect the + underlying transport: there's no need. This instead just sends a 400 + response and terminates the stream. + + @param streamID: The ID of the stream that needs the 100 Continue + response + @type streamID: L{int} + """ + headers = [(b":status", b"400")] + self.conn.send_headers(headers=headers, stream_id=streamID, end_stream=True) + stillActive = self._tryToWriteControlData() + if stillActive: + stream = self.streams[streamID] + stream.connectionLost(Failure(ConnectionLost("Invalid request"))) + self._requestDone(streamID) + + def _streamIsActive(self, streamID): + """ + Checks whether Twisted has still got state for a given stream and so + can process events for that stream. + + @param streamID: The ID of the stream that needs processing. + @type streamID: L{int} + + @return: Whether the stream still has state allocated. + @rtype: L{bool} + """ + return streamID in self.streams + + def _tryToWriteControlData(self): + """ + Checks whether the connection is blocked on flow control and, + if it isn't, writes any buffered control data. + + @return: L{True} if the connection is still active and + L{False} if it was aborted because too many bytes have + been written but not consumed by the other end. + """ + bufferedBytes = self.conn.data_to_send() + if not bufferedBytes: + return True + + if self._consumerBlocked is None and not self._bufferedControlFrames: + # The consumer isn't blocked, and we don't have any buffered frames: + # write this directly. + self.transport.write(bufferedBytes) + return True + else: + # Either the consumer is blocked or we have buffered frames. If the + # consumer is blocked, we'll write this when we unblock. If we have + # buffered frames, we have presumably been re-entered from + # transport.write, and so to avoid reordering issues we'll buffer anyway. + self._bufferedControlFrames.append(bufferedBytes) + self._bufferedControlFrameBytes += len(bufferedBytes) + + if self._bufferedControlFrameBytes >= self._maxBufferedControlFrameBytes: + maxBuffCtrlFrameBytes = self._maxBufferedControlFrameBytes + self._log.error( + "Maximum number of control frame bytes buffered: " + "{bufferedControlFrameBytes} > = " + "{maxBufferedControlFrameBytes}. " + "Aborting connection to client: {client} ", + bufferedControlFrameBytes=self._bufferedControlFrameBytes, + maxBufferedControlFrameBytes=maxBuffCtrlFrameBytes, + client=self.transport.getPeer(), + ) + # We've exceeded a reasonable buffer size for max buffered + # control frames. This is a denial of service risk, so we're + # going to drop this connection. + self.transport.abortConnection() + self.connectionLost(Failure(ExcessiveBufferingError())) + return False + return True + + def _flushBufferedControlData(self, *args): + """ + Called when the connection is marked writable again after being marked unwritable. + Attempts to flush buffered control data if there is any. + """ + # To respect backpressure here we send each write in order, paying attention to whether + # we got blocked + while self._consumerBlocked is None and self._bufferedControlFrames: + nextWrite = self._bufferedControlFrames.popleft() + self._bufferedControlFrameBytes -= len(nextWrite) + self.transport.write(nextWrite) + + +@implementer(ITransport, IConsumer, IPushProducer) +class H2Stream: + """ + A class representing a single HTTP/2 stream. + + This class works hand-in-hand with L{H2Connection}. It acts to provide an + implementation of L{ITransport}, L{IConsumer}, and L{IProducer} that work + for a single HTTP/2 connection, while tightly cleaving to the interface + provided by those interfaces. It does this by having a tight coupling to + L{H2Connection}, which allows associating many of the functions of + L{ITransport}, L{IConsumer}, and L{IProducer} to objects on a + stream-specific level. + + @ivar streamID: The numerical stream ID that this object corresponds to. + @type streamID: L{int} + + @ivar producing: Whether this stream is currently allowed to produce data + to its consumer. + @type producing: L{bool} + + @ivar command: The HTTP verb used on the request. + @type command: L{unicode} + + @ivar path: The HTTP path used on the request. + @type path: L{unicode} + + @ivar producer: The object producing the response, if any. + @type producer: L{IProducer} + + @ivar site: The L{twisted.web.server.Site} object this stream belongs to, + if any. + @type site: L{twisted.web.server.Site} + + @ivar factory: The L{twisted.web.http.HTTPFactory} object that constructed + this stream's parent connection. + @type factory: L{twisted.web.http.HTTPFactory} + + @ivar _producerProducing: Whether the producer stored in producer is + currently producing data. + @type _producerProducing: L{bool} + + @ivar _inboundDataBuffer: Any data that has been received from the network + but has not yet been received by the consumer. + @type _inboundDataBuffer: A L{collections.deque} containing L{bytes} + + @ivar _conn: A reference to the connection this stream belongs to. + @type _conn: L{H2Connection} + + @ivar _request: A request object that this stream corresponds to. + @type _request: L{twisted.web.iweb.IRequest} + + @ivar _buffer: A buffer containing data produced by the producer that could + not be sent on the network at this time. + @type _buffer: L{io.BytesIO} + """ + + # We need a transport property for t.w.h.Request, but HTTP/2 doesn't want + # to expose it. So we just set it to None. + transport = None + + def __init__(self, streamID, connection, headers, requestFactory, site, factory): + """ + Initialize this HTTP/2 stream. + + @param streamID: The numerical stream ID that this object corresponds + to. + @type streamID: L{int} + + @param connection: The HTTP/2 connection this stream belongs to. + @type connection: L{H2Connection} + + @param headers: The HTTP/2 request headers. + @type headers: A L{list} of L{tuple}s of header name and header value, + both as L{bytes}. + + @param requestFactory: A function that builds appropriate request + request objects. + @type requestFactory: A callable that returns a + L{twisted.web.iweb.IRequest}. + + @param site: The L{twisted.web.server.Site} object this stream belongs + to, if any. + @type site: L{twisted.web.server.Site} + + @param factory: The L{twisted.web.http.HTTPFactory} object that + constructed this stream's parent connection. + @type factory: L{twisted.web.http.HTTPFactory} + """ + + self.streamID = streamID + self.site = site + self.factory = factory + self.producing = True + self.command = None + self.path = None + self.producer = None + self._producerProducing = False + self._hasStreamingProducer = None + self._inboundDataBuffer = deque() + self._conn = connection + self._request = requestFactory(self, queued=False) + self._buffer = io.BytesIO() + + self._convertHeaders(headers) + + def _convertHeaders(self, headers): + """ + This method converts the HTTP/2 header set into something that looks + like HTTP/1.1. In particular, it strips the 'special' headers and adds + a Host: header. + + @param headers: The HTTP/2 header set. + @type headers: A L{list} of L{tuple}s of header name and header value, + both as L{bytes}. + """ + gotLength = False + + for header in headers: + if not header[0].startswith(b":"): + gotLength = _addHeaderToRequest(self._request, header) or gotLength + elif header[0] == b":method": + self.command = header[1] + elif header[0] == b":path": + self.path = header[1] + elif header[0] == b":authority": + # This is essentially the Host: header from HTTP/1.1 + _addHeaderToRequest(self._request, (b"host", header[1])) + + if not gotLength: + if self.command in (b"GET", b"HEAD"): + self._request.gotLength(0) + else: + self._request.gotLength(None) + + self._request.parseCookies() + expectContinue = self._request.requestHeaders.getRawHeaders(b"expect") + if expectContinue and expectContinue[0].lower() == b"100-continue": + self._send100Continue() + + # Methods called by the H2Connection + def receiveDataChunk(self, data, flowControlledLength): + """ + Called when the connection has received a chunk of data from the + underlying transport. If the stream has been registered with a + consumer, and is currently able to push data, immediately passes it + through. Otherwise, buffers the chunk until we can start producing. + + @param data: The chunk of data that was received. + @type data: L{bytes} + + @param flowControlledLength: The total flow controlled length of this + chunk, which is used when we want to re-open the window. May be + different to C{len(data)}. + @type flowControlledLength: L{int} + """ + if not self.producing: + # Buffer data. + self._inboundDataBuffer.append((data, flowControlledLength)) + else: + self._request.handleContentChunk(data) + self._conn.openStreamWindow(self.streamID, flowControlledLength) + + def requestComplete(self): + """ + Called by the L{H2Connection} when the all data for a request has been + received. Currently, with the legacy L{twisted.web.http.Request} + object, just calls requestReceived unless the producer wants us to be + quiet. + """ + if self.producing: + self._request.requestReceived(self.command, self.path, b"HTTP/2") + else: + self._inboundDataBuffer.append((_END_STREAM_SENTINEL, None)) + + def connectionLost(self, reason): + """ + Called by the L{H2Connection} when a connection is lost or a stream is + reset. + + @param reason: The reason the connection was lost. + @type reason: L{str} + """ + self._request.connectionLost(reason) + + def windowUpdated(self): + """ + Called by the L{H2Connection} when this stream's flow control window + has been opened. + """ + # If we don't have a producer, we have no-one to tell. + if not self.producer: + return + + # If we're not blocked on flow control, we don't care. + if self._producerProducing: + return + + # We check whether the stream's flow control window is actually above + # 0, and then, if a producer is registered and we still have space in + # the window, we unblock it. + remainingWindow = self._conn.remainingOutboundWindow(self.streamID) + if not remainingWindow > 0: + return + + # We have a producer and space in the window, so that producer can + # start producing again! + self._producerProducing = True + self.producer.resumeProducing() + + def flowControlBlocked(self): + """ + Called by the L{H2Connection} when this stream's flow control window + has been exhausted. + """ + if not self.producer: + return + + if self._producerProducing: + self.producer.pauseProducing() + self._producerProducing = False + + # Methods called by the consumer (usually an IRequest). + def writeHeaders(self, version, code, reason, headers): + """ + Called by the consumer to write headers to the stream. + + @param version: The HTTP version. + @type version: L{bytes} + + @param code: The status code. + @type code: L{int} + + @param reason: The reason phrase. Ignored in HTTP/2. + @type reason: L{bytes} + + @param headers: The HTTP response headers. + @type headers: Any iterable of two-tuples of L{bytes}, representing header + names and header values. + """ + self._conn.writeHeaders(version, code, reason, headers, self.streamID) + + def requestDone(self, request): + """ + Called by a consumer to clean up whatever permanent state is in use. + + @param request: The request calling the method. + @type request: L{twisted.web.iweb.IRequest} + """ + self._conn.endRequest(self.streamID) + + def _send100Continue(self): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + """ + self._conn._send100Continue(self.streamID) + + def _respondToBadRequestAndDisconnect(self): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + + Unlike in the HTTP/1.1 case, this does not actually disconnect the + underlying transport: there's no need. This instead just sends a 400 + response and terminates the stream. + """ + self._conn._respondToBadRequestAndDisconnect(self.streamID) + + # Implementation: ITransport + def write(self, data): + """ + Write a single chunk of data into a data frame. + + @param data: The data chunk to send. + @type data: L{bytes} + """ + self._conn.writeDataToStream(self.streamID, data) + return + + def writeSequence(self, iovec): + """ + Write a sequence of chunks of data into data frames. + + @param iovec: A sequence of chunks to send. + @type iovec: An iterable of L{bytes} chunks. + """ + for chunk in iovec: + self.write(chunk) + + def loseConnection(self): + """ + Close the connection after writing all pending data. + """ + self._conn.endRequest(self.streamID) + + def abortConnection(self): + """ + Forcefully abort the connection by sending a RstStream frame. + """ + self._conn.abortRequest(self.streamID) + + def getPeer(self): + """ + Get information about the peer. + """ + return self._conn.getPeer() + + def getHost(self): + """ + Similar to getPeer, but for this side of the connection. + """ + return self._conn.getHost() + + def isSecure(self): + """ + Returns L{True} if this channel is using a secure transport. + + @returns: L{True} if this channel is secure. + @rtype: L{bool} + """ + return self._conn._isSecure() + + # Implementation: IConsumer + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + This sets self to be a consumer for a producer. When this object runs + out of data (as when a send(2) call on a socket succeeds in moving the + last data from a userspace buffer into a kernelspace buffer), it will + ask the producer to resumeProducing(). + + For L{IPullProducer} providers, C{resumeProducing} will be called once + each time data is required. + + For L{IPushProducer} providers, C{pauseProducing} will be called + whenever the write buffer fills up and C{resumeProducing} will only be + called when it empties. + + @param producer: The producer to register. + @type producer: L{IProducer} provider + + @param streaming: L{True} if C{producer} provides L{IPushProducer}, + L{False} if C{producer} provides L{IPullProducer}. + @type streaming: L{bool} + + @raise RuntimeError: If a producer is already registered. + + @return: L{None} + """ + if self.producer: + raise ValueError( + "registering producer %s before previous one (%s) was " + "unregistered" % (producer, self.producer) + ) + + if not streaming: + self.hasStreamingProducer = False + producer = _PullToPush(producer, self) + producer.startStreaming() + else: + self.hasStreamingProducer = True + + self.producer = producer + self._producerProducing = True + + def unregisterProducer(self): + """ + @see: L{IConsumer.unregisterProducer} + """ + # When the producer is unregistered, we're done. + if self.producer is not None and not self.hasStreamingProducer: + self.producer.stopStreaming() + + self._producerProducing = False + self.producer = None + self.hasStreamingProducer = None + + # Implementation: IPushProducer + def stopProducing(self): + """ + @see: L{IProducer.stopProducing} + """ + self.producing = False + self.abortConnection() + + def pauseProducing(self): + """ + @see: L{IPushProducer.pauseProducing} + """ + self.producing = False + + def resumeProducing(self): + """ + @see: L{IPushProducer.resumeProducing} + """ + self.producing = True + consumedLength = 0 + + while self.producing and self._inboundDataBuffer: + # Allow for pauseProducing to be called in response to a call to + # resumeProducing. + chunk, flowControlledLength = self._inboundDataBuffer.popleft() + + if chunk is _END_STREAM_SENTINEL: + self.requestComplete() + else: + consumedLength += flowControlledLength + self._request.handleContentChunk(chunk) + + self._conn.openStreamWindow(self.streamID, consumedLength) + + +def _addHeaderToRequest(request, header): + """ + Add a header tuple to a request header object. + + @param request: The request to add the header tuple to. + @type request: L{twisted.web.http.Request} + + @param header: The header tuple to add to the request. + @type header: A L{tuple} with two elements, the header name and header + value, both as L{bytes}. + + @return: If the header being added was the C{Content-Length} header. + @rtype: L{bool} + """ + requestHeaders = request.requestHeaders + name, value = header + values = requestHeaders.getRawHeaders(name) + + if values is not None: + values.append(value) + else: + requestHeaders.setRawHeaders(name, [value]) + + if name == b"content-length": + request.gotLength(int(value)) + return True + + return False diff --git a/contrib/python/Twisted/py3/twisted/web/_newclient.py b/contrib/python/Twisted/py3/twisted/web/_newclient.py new file mode 100644 index 0000000000..6fd1ac21ba --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_newclient.py @@ -0,0 +1,1727 @@ +# -*- test-case-name: twisted.web.test.test_newclient -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An U{HTTP 1.1<http://www.w3.org/Protocols/rfc2616/rfc2616.html>} client. + +The way to use the functionality provided by this module is to: + + - Connect a L{HTTP11ClientProtocol} to an HTTP server + - Create a L{Request} with the appropriate data + - Pass the request to L{HTTP11ClientProtocol.request} + - The returned Deferred will fire with a L{Response} object + - Create a L{IProtocol} provider which can handle the response body + - Connect it to the response with L{Response.deliverBody} + - When the protocol's C{connectionLost} method is called, the response is + complete. See L{Response.deliverBody} for details. + +Various other classes in this module support this usage: + + - HTTPParser is the basic HTTP parser. It can handle the parts of HTTP which + are symmetric between requests and responses. + + - HTTPClientParser extends HTTPParser to handle response-specific parts of + HTTP. One instance is created for each request to parse the corresponding + response. +""" + +import re + +from zope.interface import implementer + +from twisted.internet.defer import ( + CancelledError, + Deferred, + fail, + maybeDeferred, + succeed, +) +from twisted.internet.error import ConnectionDone +from twisted.internet.interfaces import IConsumer, IPushProducer +from twisted.internet.protocol import Protocol +from twisted.logger import Logger +from twisted.protocols.basic import LineReceiver +from twisted.python.compat import networkString +from twisted.python.components import proxyForInterface +from twisted.python.failure import Failure +from twisted.python.reflect import fullyQualifiedName +from twisted.web.http import ( + NO_CONTENT, + NOT_MODIFIED, + PotentialDataLoss, + _ChunkedTransferDecoder, + _DataLoss, + _IdentityTransferDecoder, +) +from twisted.web.http_headers import Headers +from twisted.web.iweb import UNKNOWN_LENGTH, IClientRequest, IResponse + +# States HTTPParser can be in +STATUS = "STATUS" +HEADER = "HEADER" +BODY = "BODY" +DONE = "DONE" +_moduleLog = Logger() + + +class BadHeaders(Exception): + """ + Headers passed to L{Request} were in some way invalid. + """ + + +class ExcessWrite(Exception): + """ + The body L{IBodyProducer} for a request tried to write data after + indicating it had finished writing data. + """ + + +class ParseError(Exception): + """ + Some received data could not be parsed. + + @ivar data: The string which could not be parsed. + """ + + def __init__(self, reason, data): + Exception.__init__(self, reason, data) + self.data = data + + +class BadResponseVersion(ParseError): + """ + The version string in a status line was unparsable. + """ + + +class _WrapperException(Exception): + """ + L{_WrapperException} is the base exception type for exceptions which + include one or more other exceptions as the low-level causes. + + @ivar reasons: A L{list} of one or more L{Failure} instances encountered + during an HTTP request. See subclass documentation for more details. + """ + + def __init__(self, reasons): + Exception.__init__(self, reasons) + self.reasons = reasons + + +class RequestGenerationFailed(_WrapperException): + """ + There was an error while creating the bytes which make up a request. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the request generation was considered to have failed. + """ + + +class RequestTransmissionFailed(_WrapperException): + """ + There was an error while sending the bytes which make up a request. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the request transmission was considered to have failed. + """ + + +class ConnectionAborted(Exception): + """ + The connection was explicitly aborted by application code. + """ + + +class WrongBodyLength(Exception): + """ + An L{IBodyProducer} declared the number of bytes it was going to + produce (via its C{length} attribute) and then produced a different number + of bytes. + """ + + +class ResponseDone(Exception): + """ + L{ResponseDone} may be passed to L{IProtocol.connectionLost} on the + protocol passed to L{Response.deliverBody} and indicates that the entire + response has been delivered. + """ + + +class ResponseFailed(_WrapperException): + """ + L{ResponseFailed} indicates that all of the response to a request was not + received for some reason. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the response was considered to have failed. + + @ivar response: If specified, the L{Response} received from the server (and + in particular the status code and the headers). + """ + + def __init__(self, reasons, response=None): + _WrapperException.__init__(self, reasons) + self.response = response + + +class ResponseNeverReceived(ResponseFailed): + """ + A L{ResponseFailed} that knows no response bytes at all have been received. + """ + + +class RequestNotSent(Exception): + """ + L{RequestNotSent} indicates that an attempt was made to issue a request but + for reasons unrelated to the details of the request itself, the request + could not be sent. For example, this may indicate that an attempt was made + to send a request using a protocol which is no longer connected to a + server. + """ + + +def _callAppFunction(function): + """ + Call C{function}. If it raises an exception, log it with a minimal + description of the source. + + @return: L{None} + """ + try: + function() + except BaseException: + _moduleLog.failure( + "Unexpected exception from {name}", name=fullyQualifiedName(function) + ) + + +class HTTPParser(LineReceiver): + """ + L{HTTPParser} handles the parsing side of HTTP processing. With a suitable + subclass, it can parse either the client side or the server side of the + connection. + + @ivar headers: All of the non-connection control message headers yet + received. + + @ivar state: State indicator for the response parsing state machine. One + of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}. + + @ivar _partialHeader: L{None} or a C{list} of the lines of a multiline + header while that header is being received. + """ + + # NOTE: According to HTTP spec, we're supposed to eat the + # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that + # doesn't sound like a good idea to me, because it makes it impossible to + # have a non-authenticating transparent proxy in front of an authenticating + # proxy. An authenticating proxy can eat them itself. -jknight + # + # Further, quoting + # http://homepages.tesco.net/J.deBoynePollard/FGA/web-proxy-connection-header.html + # regarding the 'Proxy-Connection' header: + # + # The Proxy-Connection: header is a mistake in how some web browsers + # use HTTP. Its name is the result of a false analogy. It is not a + # standard part of the protocol. There is a different standard + # protocol mechanism for doing what it does. And its existence + # imposes a requirement upon HTTP servers such that no proxy HTTP + # server can be standards-conforming in practice. + # + # -exarkun + + # Some servers (like http://news.ycombinator.com/) return status lines and + # HTTP headers delimited by \n instead of \r\n. + delimiter = b"\n" + + CONNECTION_CONTROL_HEADERS = { + b"content-length", + b"connection", + b"keep-alive", + b"te", + b"trailers", + b"transfer-encoding", + b"upgrade", + b"proxy-connection", + } + + def connectionMade(self): + self.headers = Headers() + self.connHeaders = Headers() + self.state = STATUS + self._partialHeader = None + + def switchToBodyMode(self, decoder): + """ + Switch to body parsing mode - interpret any more bytes delivered as + part of the message body and deliver them to the given decoder. + """ + if self.state == BODY: + raise RuntimeError("already in body mode") + + self.bodyDecoder = decoder + self.state = BODY + self.setRawMode() + + def lineReceived(self, line): + """ + Handle one line from a response. + """ + # Handle the normal CR LF case. + if line[-1:] == b"\r": + line = line[:-1] + + if self.state == STATUS: + self.statusReceived(line) + self.state = HEADER + elif self.state == HEADER: + if not line or line[0] not in b" \t": + if self._partialHeader is not None: + header = b"".join(self._partialHeader) + name, value = header.split(b":", 1) + value = value.strip() + self.headerReceived(name, value) + if not line: + # Empty line means the header section is over. + self.allHeadersReceived() + else: + # Line not beginning with LWS is another header. + self._partialHeader = [line] + else: + # A line beginning with LWS is a continuation of a header + # begun on a previous line. + self._partialHeader.append(line) + + def rawDataReceived(self, data): + """ + Pass data from the message body to the body decoder object. + """ + self.bodyDecoder.dataReceived(data) + + def isConnectionControlHeader(self, name): + """ + Return C{True} if the given lower-cased name is the name of a + connection control header (rather than an entity header). + + According to RFC 2616, section 14.10, the tokens in the Connection + header are probably relevant here. However, I am not sure what the + practical consequences of either implementing or ignoring that are. + So I leave it unimplemented for the time being. + """ + return name in self.CONNECTION_CONTROL_HEADERS + + def statusReceived(self, status): + """ + Callback invoked whenever the first line of a new message is received. + Override this. + + @param status: The first line of an HTTP request or response message + without trailing I{CR LF}. + @type status: C{bytes} + """ + + def headerReceived(self, name, value): + """ + Store the given header in C{self.headers}. + """ + name = name.lower() + if self.isConnectionControlHeader(name): + headers = self.connHeaders + else: + headers = self.headers + headers.addRawHeader(name, value) + + def allHeadersReceived(self): + """ + Callback invoked after the last header is passed to C{headerReceived}. + Override this to change to the C{BODY} or C{DONE} state. + """ + self.switchToBodyMode(None) + + +class HTTPClientParser(HTTPParser): + """ + An HTTP parser which only handles HTTP responses. + + @ivar request: The request with which the expected response is associated. + @type request: L{Request} + + @ivar NO_BODY_CODES: A C{set} of response codes which B{MUST NOT} have a + body. + + @ivar finisher: A callable to invoke when this response is fully parsed. + + @ivar _responseDeferred: A L{Deferred} which will be called back with the + response when all headers in the response have been received. + Thereafter, L{None}. + + @ivar _everReceivedData: C{True} if any bytes have been received. + """ + + NO_BODY_CODES = {NO_CONTENT, NOT_MODIFIED} + + _transferDecoders = { + b"chunked": _ChunkedTransferDecoder, + } + + bodyDecoder = None + _log = Logger() + + def __init__(self, request, finisher): + self.request = request + self.finisher = finisher + self._responseDeferred = Deferred() + self._everReceivedData = False + + def dataReceived(self, data): + """ + Override so that we know if any response has been received. + """ + self._everReceivedData = True + HTTPParser.dataReceived(self, data) + + def parseVersion(self, strversion): + """ + Parse version strings of the form Protocol '/' Major '.' Minor. E.g. + b'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError + on bad syntax. + """ + try: + proto, strnumber = strversion.split(b"/") + major, minor = strnumber.split(b".") + major, minor = int(major), int(minor) + except ValueError as e: + raise BadResponseVersion(str(e), strversion) + if major < 0 or minor < 0: + raise BadResponseVersion("version may not be negative", strversion) + return (proto, major, minor) + + def statusReceived(self, status): + """ + Parse the status line into its components and create a response object + to keep track of this response's state. + """ + parts = status.split(b" ", 2) + if len(parts) == 2: + # Some broken servers omit the required `phrase` portion of + # `status-line`. One such server identified as + # "cloudflare-nginx". Others fail to identify themselves + # entirely. Fill in an empty phrase for such cases. + version, codeBytes = parts + phrase = b"" + elif len(parts) == 3: + version, codeBytes, phrase = parts + else: + raise ParseError("wrong number of parts", status) + + try: + statusCode = int(codeBytes) + except ValueError: + raise ParseError("non-integer status code", status) + + self.response = Response._construct( + self.parseVersion(version), + statusCode, + phrase, + self.headers, + self.transport, + self.request, + ) + + def _finished(self, rest): + """ + Called to indicate that an entire response has been received. No more + bytes will be interpreted by this L{HTTPClientParser}. Extra bytes are + passed up and the state of this L{HTTPClientParser} is set to I{DONE}. + + @param rest: A C{bytes} giving any extra bytes delivered to this + L{HTTPClientParser} which are not part of the response being + parsed. + """ + self.state = DONE + self.finisher(rest) + + def isConnectionControlHeader(self, name): + """ + Content-Length in the response to a HEAD request is an entity header, + not a connection control header. + """ + if self.request.method == b"HEAD" and name == b"content-length": + return False + return HTTPParser.isConnectionControlHeader(self, name) + + def allHeadersReceived(self): + """ + Figure out how long the response body is going to be by examining + headers and stuff. + """ + if 100 <= self.response.code < 200: + # RFC 7231 Section 6.2 says that if we receive a 1XX status code + # and aren't expecting it, we MAY ignore it. That's what we're + # going to do. We reset the parser here, but we leave + # _everReceivedData in its True state because we have, in fact, + # received data. + self._log.info( + "Ignoring unexpected {code} response", code=self.response.code + ) + self.connectionMade() + del self.response + return + + if self.response.code in self.NO_BODY_CODES or self.request.method == b"HEAD": + self.response.length = 0 + # The order of the next two lines might be of interest when adding + # support for pipelining. + self._finished(self.clearLineBuffer()) + self.response._bodyDataFinished() + else: + transferEncodingHeaders = self.connHeaders.getRawHeaders( + b"transfer-encoding" + ) + if transferEncodingHeaders: + # This could be a KeyError. However, that would mean we do not + # know how to decode the response body, so failing the request + # is as good a behavior as any. Perhaps someday we will want + # to normalize/document/test this specifically, but failing + # seems fine to me for now. + transferDecoder = self._transferDecoders[ + transferEncodingHeaders[0].lower() + ] + + # If anyone ever invents a transfer encoding other than + # chunked (yea right), and that transfer encoding can predict + # the length of the response body, it might be sensible to + # allow the transfer decoder to set the response object's + # length attribute. + else: + contentLengthHeaders = self.connHeaders.getRawHeaders(b"content-length") + if contentLengthHeaders is None: + contentLength = None + elif len(contentLengthHeaders) == 1: + contentLength = int(contentLengthHeaders[0]) + self.response.length = contentLength + else: + # "HTTP Message Splitting" or "HTTP Response Smuggling" + # potentially happening. Or it's just a buggy server. + raise ValueError( + "Too many Content-Length headers; " "response is invalid" + ) + + if contentLength == 0: + self._finished(self.clearLineBuffer()) + transferDecoder = None + else: + transferDecoder = lambda x, y: _IdentityTransferDecoder( + contentLength, x, y + ) + + if transferDecoder is None: + self.response._bodyDataFinished() + else: + # Make sure as little data as possible from the response body + # gets delivered to the response object until the response + # object actually indicates it is ready to handle bytes + # (probably because an application gave it a way to interpret + # them). + self.transport.pauseProducing() + self.switchToBodyMode( + transferDecoder(self.response._bodyDataReceived, self._finished) + ) + + # This must be last. If it were first, then application code might + # change some state (for example, registering a protocol to receive the + # response body). Then the pauseProducing above would be wrong since + # the response is ready for bytes and nothing else would ever resume + # the transport. + self._responseDeferred.callback(self.response) + del self._responseDeferred + + def connectionLost(self, reason): + if self.bodyDecoder is not None: + try: + try: + self.bodyDecoder.noMoreData() + except PotentialDataLoss: + self.response._bodyDataFinished(Failure()) + except _DataLoss: + self.response._bodyDataFinished( + Failure(ResponseFailed([reason, Failure()], self.response)) + ) + else: + self.response._bodyDataFinished() + except BaseException: + # Handle exceptions from both the except suites and the else + # suite. Those functions really shouldn't raise exceptions, + # but maybe there's some buggy application code somewhere + # making things difficult. + self._log.failure("") + elif self.state != DONE: + if self._everReceivedData: + exceptionClass = ResponseFailed + else: + exceptionClass = ResponseNeverReceived + self._responseDeferred.errback(Failure(exceptionClass([reason]))) + del self._responseDeferred + + +_VALID_METHOD = re.compile( + rb"\A[%s]+\Z" + % ( + bytes().join( + ( + b"!", + b"#", + b"$", + b"%", + b"&", + b"'", + b"*", + b"+", + b"-", + b".", + b"^", + b"_", + b"`", + b"|", + b"~", + b"\x30-\x39", + b"\x41-\x5a", + b"\x61-\x7A", + ), + ), + ), +) + + +def _ensureValidMethod(method): + """ + An HTTP method is an HTTP token, which consists of any visible + ASCII character that is not a delimiter (i.e. one of + C{"(),/:;<=>?@[\\]{}}.) + + @param method: the method to check + @type method: L{bytes} + + @return: the method if it is valid + @rtype: L{bytes} + + @raise ValueError: if the method is not valid + + @see: U{https://tools.ietf.org/html/rfc7230#section-3.1.1}, + U{https://tools.ietf.org/html/rfc7230#section-3.2.6}, + U{https://tools.ietf.org/html/rfc5234#appendix-B.1} + """ + if _VALID_METHOD.match(method): + return method + raise ValueError(f"Invalid method {method!r}") + + +_VALID_URI = re.compile(rb"\A[\x21-\x7e]+\Z") + + +def _ensureValidURI(uri): + """ + A valid URI cannot contain control characters (i.e., characters + between 0-32, inclusive and 127) or non-ASCII characters (i.e., + characters with values between 128-255, inclusive). + + @param uri: the URI to check + @type uri: L{bytes} + + @return: the URI if it is valid + @rtype: L{bytes} + + @raise ValueError: if the URI is not valid + + @see: U{https://tools.ietf.org/html/rfc3986#section-3.3}, + U{https://tools.ietf.org/html/rfc3986#appendix-A}, + U{https://tools.ietf.org/html/rfc5234#appendix-B.1} + """ + if _VALID_URI.match(uri): + return uri + raise ValueError(f"Invalid URI {uri!r}") + + +@implementer(IClientRequest) +class Request: + """ + A L{Request} instance describes an HTTP request to be sent to an HTTP + server. + + @ivar method: See L{__init__}. + @ivar uri: See L{__init__}. + @ivar headers: See L{__init__}. + @ivar bodyProducer: See L{__init__}. + @ivar persistent: See L{__init__}. + + @ivar _parsedURI: Parsed I{URI} for the request, or L{None}. + @type _parsedURI: L{twisted.web.client.URI} or L{None} + """ + + _log = Logger() + + def __init__(self, method, uri, headers, bodyProducer, persistent=False): + """ + @param method: The HTTP method for this request, ex: b'GET', b'HEAD', + b'POST', etc. + @type method: L{bytes} + + @param uri: The relative URI of the resource to request. For example, + C{b'/foo/bar?baz=quux'}. + @type uri: L{bytes} + + @param headers: Headers to be sent to the server. It is important to + note that this object does not create any implicit headers. So it + is up to the HTTP Client to add required headers such as 'Host'. + @type headers: L{twisted.web.http_headers.Headers} + + @param bodyProducer: L{None} or an L{IBodyProducer} provider which + produces the content body to send to the remote HTTP server. + + @param persistent: Set to C{True} when you use HTTP persistent + connection, defaults to C{False}. + @type persistent: L{bool} + """ + self.method = _ensureValidMethod(method) + self.uri = _ensureValidURI(uri) + self.headers = headers + self.bodyProducer = bodyProducer + self.persistent = persistent + self._parsedURI = None + + @classmethod + def _construct( + cls, method, uri, headers, bodyProducer, persistent=False, parsedURI=None + ): + """ + Private constructor. + + @param method: See L{__init__}. + @param uri: See L{__init__}. + @param headers: See L{__init__}. + @param bodyProducer: See L{__init__}. + @param persistent: See L{__init__}. + @param parsedURI: See L{Request._parsedURI}. + + @return: L{Request} instance. + """ + request = cls(method, uri, headers, bodyProducer, persistent) + request._parsedURI = parsedURI + return request + + @property + def absoluteURI(self): + """ + The absolute URI of the request as C{bytes}, or L{None} if the + absolute URI cannot be determined. + """ + return getattr(self._parsedURI, "toBytes", lambda: None)() + + def _writeHeaders(self, transport, TEorCL): + hosts = self.headers.getRawHeaders(b"host", ()) + if len(hosts) != 1: + raise BadHeaders("Exactly one Host header required") + + # In the future, having the protocol version be a parameter to this + # method would probably be good. It would be nice if this method + # weren't limited to issuing HTTP/1.1 requests. + requestLines = [] + requestLines.append( + b" ".join( + [ + _ensureValidMethod(self.method), + _ensureValidURI(self.uri), + b"HTTP/1.1\r\n", + ] + ), + ) + if not self.persistent: + requestLines.append(b"Connection: close\r\n") + if TEorCL is not None: + requestLines.append(TEorCL) + for name, values in self.headers.getAllRawHeaders(): + requestLines.extend([name + b": " + v + b"\r\n" for v in values]) + requestLines.append(b"\r\n") + transport.writeSequence(requestLines) + + def _writeToBodyProducerChunked(self, transport): + """ + Write this request to the given transport using chunked + transfer-encoding to frame the body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders(transport, b"Transfer-Encoding: chunked\r\n") + encoder = ChunkedEncoder(transport) + encoder.registerProducer(self.bodyProducer, True) + d = self.bodyProducer.startProducing(encoder) + + def cbProduced(ignored): + encoder.unregisterProducer() + + def ebProduced(err): + encoder._allowNoMoreWrites() + # Don't call the encoder's unregisterProducer because it will write + # a zero-length chunk. This would indicate to the server that the + # request body is complete. There was an error, though, so we + # don't want to do that. + transport.unregisterProducer() + return err + + d.addCallbacks(cbProduced, ebProduced) + return d + + def _writeToBodyProducerContentLength(self, transport): + """ + Write this request to the given transport using content-length to frame + the body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders( + transport, + networkString("Content-Length: %d\r\n" % (self.bodyProducer.length,)), + ) + + # This Deferred is used to signal an error in the data written to the + # encoder below. It can only errback and it will only do so before too + # many bytes have been written to the encoder and before the producer + # Deferred fires. + finishedConsuming = Deferred() + + # This makes sure the producer writes the correct number of bytes for + # the request body. + encoder = LengthEnforcingConsumer( + self.bodyProducer, transport, finishedConsuming + ) + + transport.registerProducer(self.bodyProducer, True) + + finishedProducing = self.bodyProducer.startProducing(encoder) + + def combine(consuming, producing): + # This Deferred is returned and will be fired when the first of + # consuming or producing fires. If it's cancelled, forward that + # cancellation to the producer. + def cancelConsuming(ign): + finishedProducing.cancel() + + ultimate = Deferred(cancelConsuming) + + # Keep track of what has happened so far. This initially + # contains None, then an integer uniquely identifying what + # sequence of events happened. See the callbacks and errbacks + # defined below for the meaning of each value. + state = [None] + + def ebConsuming(err): + if state == [None]: + # The consuming Deferred failed first. This means the + # overall writeTo Deferred is going to errback now. The + # producing Deferred should not fire later (because the + # consumer should have called stopProducing on the + # producer), but if it does, a callback will be ignored + # and an errback will be logged. + state[0] = 1 + ultimate.errback(err) + else: + # The consuming Deferred errbacked after the producing + # Deferred fired. This really shouldn't ever happen. + # If it does, I goofed. Log the error anyway, just so + # there's a chance someone might notice and complain. + self._log.failure( + "Buggy state machine in {request}/[{state}]: " + "ebConsuming called", + failure=err, + request=repr(self), + state=state[0], + ) + + def cbProducing(result): + if state == [None]: + # The producing Deferred succeeded first. Nothing will + # ever happen to the consuming Deferred. Tell the + # encoder we're done so it can check what the producer + # wrote and make sure it was right. + state[0] = 2 + try: + encoder._noMoreWritesExpected() + except BaseException: + # Fail the overall writeTo Deferred - something the + # producer did was wrong. + ultimate.errback() + else: + # Success - succeed the overall writeTo Deferred. + ultimate.callback(None) + # Otherwise, the consuming Deferred already errbacked. The + # producing Deferred wasn't supposed to fire, but it did + # anyway. It's buggy, but there's not really anything to be + # done about it. Just ignore this result. + + def ebProducing(err): + if state == [None]: + # The producing Deferred failed first. This means the + # overall writeTo Deferred is going to errback now. + # Tell the encoder that we're done so it knows to reject + # further writes from the producer (which should not + # happen, but the producer may be buggy). + state[0] = 3 + encoder._allowNoMoreWrites() + ultimate.errback(err) + else: + # The producing Deferred failed after the consuming + # Deferred failed. It shouldn't have, so it's buggy. + # Log the exception in case anyone who can fix the code + # is watching. + self._log.failure("Producer is buggy", failure=err) + + consuming.addErrback(ebConsuming) + producing.addCallbacks(cbProducing, ebProducing) + + return ultimate + + d = combine(finishedConsuming, finishedProducing) + + def f(passthrough): + # Regardless of what happens with the overall Deferred, once it + # fires, the producer registered way up above the definition of + # combine should be unregistered. + transport.unregisterProducer() + return passthrough + + d.addBoth(f) + return d + + def _writeToEmptyBodyContentLength(self, transport): + """ + Write this request to the given transport using content-length to frame + the (empty) body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders(transport, b"Content-Length: 0\r\n") + return succeed(None) + + def writeTo(self, transport): + """ + Format this L{Request} as an HTTP/1.1 request and write it to the given + transport. If bodyProducer is not None, it will be associated with an + L{IConsumer}. + + @param transport: The transport to which to write. + @type transport: L{twisted.internet.interfaces.ITransport} provider + + @return: A L{Deferred} which fires with L{None} when the request has + been completely written to the transport or with a L{Failure} if + there is any problem generating the request bytes. + """ + if self.bodyProducer is None: + # If the method semantics anticipate a body, include a + # Content-Length even if it is 0. + # https://tools.ietf.org/html/rfc7230#section-3.3.2 + if self.method in (b"PUT", b"POST"): + self._writeToEmptyBodyContentLength(transport) + else: + self._writeHeaders(transport, None) + elif self.bodyProducer.length is UNKNOWN_LENGTH: + return self._writeToBodyProducerChunked(transport) + else: + return self._writeToBodyProducerContentLength(transport) + + def stopWriting(self): + """ + Stop writing this request to the transport. This can only be called + after C{writeTo} and before the L{Deferred} returned by C{writeTo} + fires. It should cancel any asynchronous task started by C{writeTo}. + The L{Deferred} returned by C{writeTo} need not be fired if this method + is called. + """ + # If bodyProducer is None, then the Deferred returned by writeTo has + # fired already and this method cannot be called. + _callAppFunction(self.bodyProducer.stopProducing) + + +class LengthEnforcingConsumer: + """ + An L{IConsumer} proxy which enforces an exact length requirement on the + total data written to it. + + @ivar _length: The number of bytes remaining to be written. + + @ivar _producer: The L{IBodyProducer} which is writing to this + consumer. + + @ivar _consumer: The consumer to which at most C{_length} bytes will be + forwarded. + + @ivar _finished: A L{Deferred} which will be fired with a L{Failure} if too + many bytes are written to this consumer. + """ + + def __init__(self, producer, consumer, finished): + self._length = producer.length + self._producer = producer + self._consumer = consumer + self._finished = finished + + def _allowNoMoreWrites(self): + """ + Indicate that no additional writes are allowed. Attempts to write + after calling this method will be met with an exception. + """ + self._finished = None + + def write(self, bytes): + """ + Write C{bytes} to the underlying consumer unless + C{_noMoreWritesExpected} has been called or there are/have been too + many bytes. + """ + if self._finished is None: + # No writes are supposed to happen any more. Try to convince the + # calling code to stop calling this method by calling its + # stopProducing method and then throwing an exception at it. This + # exception isn't documented as part of the API because you're + # never supposed to expect it: only buggy code will ever receive + # it. + self._producer.stopProducing() + raise ExcessWrite() + + if len(bytes) <= self._length: + self._length -= len(bytes) + self._consumer.write(bytes) + else: + # No synchronous exception is raised in *this* error path because + # we still have _finished which we can use to report the error to a + # better place than the direct caller of this method (some + # arbitrary application code). + _callAppFunction(self._producer.stopProducing) + self._finished.errback(WrongBodyLength("too many bytes written")) + self._allowNoMoreWrites() + + def _noMoreWritesExpected(self): + """ + Called to indicate no more bytes will be written to this consumer. + Check to see that the correct number have been written. + + @raise WrongBodyLength: If not enough bytes have been written. + """ + if self._finished is not None: + self._allowNoMoreWrites() + if self._length: + raise WrongBodyLength("too few bytes written") + + +def makeStatefulDispatcher(name, template): + """ + Given a I{dispatch} name and a function, return a function which can be + used as a method and which, when called, will call another method defined + on the instance and return the result. The other method which is called is + determined by the value of the C{_state} attribute of the instance. + + @param name: A string which is used to construct the name of the subsidiary + method to invoke. The subsidiary method is named like C{'_%s_%s' % + (name, _state)}. + + @param template: A function object which is used to give the returned + function a docstring. + + @return: The dispatcher function. + """ + + def dispatcher(self, *args, **kwargs): + func = getattr(self, "_" + name + "_" + self._state, None) + if func is None: + raise RuntimeError(f"{self!r} has no {name} method in state {self._state}") + return func(*args, **kwargs) + + dispatcher.__doc__ = template.__doc__ + return dispatcher + + +# This proxy class is used only in the private constructor of the Response +# class below, in order to prevent users relying on any property of the +# concrete request object: they can only use what is provided by +# IClientRequest. +_ClientRequestProxy = proxyForInterface(IClientRequest) + + +@implementer(IResponse) +class Response: + """ + A L{Response} instance describes an HTTP response received from an HTTP + server. + + L{Response} should not be subclassed or instantiated. + + @ivar _transport: See L{__init__}. + + @ivar _bodyProtocol: The L{IProtocol} provider to which the body is + delivered. L{None} before one has been registered with + C{deliverBody}. + + @ivar _bodyBuffer: A C{list} of the strings passed to C{bodyDataReceived} + before C{deliverBody} is called. L{None} afterwards. + + @ivar _state: Indicates what state this L{Response} instance is in, + particularly with respect to delivering bytes from the response body + to an application-supplied protocol object. This may be one of + C{'INITIAL'}, C{'CONNECTED'}, C{'DEFERRED_CLOSE'}, or C{'FINISHED'}, + with the following meanings: + + - INITIAL: This is the state L{Response} objects start in. No + protocol has yet been provided and the underlying transport may + still have bytes to deliver to it. + + - DEFERRED_CLOSE: If the underlying transport indicates all bytes + have been delivered but no application-provided protocol is yet + available, the L{Response} moves to this state. Data is + buffered and waiting for a protocol to be delivered to. + + - CONNECTED: If a protocol is provided when the state is INITIAL, + the L{Response} moves to this state. Any buffered data is + delivered and any data which arrives from the transport + subsequently is given directly to the protocol. + + - FINISHED: If a protocol is provided in the DEFERRED_CLOSE state, + the L{Response} moves to this state after delivering all + buffered data to the protocol. Otherwise, if the L{Response} is + in the CONNECTED state, if the transport indicates there is no + more data, the L{Response} moves to this state. Nothing else + can happen once the L{Response} is in this state. + @type _state: C{str} + """ + + length = UNKNOWN_LENGTH + + _bodyProtocol = None + _bodyFinished = False + + def __init__(self, version, code, phrase, headers, _transport): + """ + @param version: HTTP version components protocol, major, minor. E.g. + C{(b'HTTP', 1, 1)} to mean C{b'HTTP/1.1'}. + + @param code: HTTP status code. + @type code: L{int} + + @param phrase: HTTP reason phrase, intended to give a short description + of the HTTP status code. + + @param headers: HTTP response headers. + @type headers: L{twisted.web.http_headers.Headers} + + @param _transport: The transport which is delivering this response. + """ + self.version = version + self.code = code + self.phrase = phrase + self.headers = headers + self._transport = _transport + self._bodyBuffer = [] + self._state = "INITIAL" + self.request = None + self.previousResponse = None + + @classmethod + def _construct(cls, version, code, phrase, headers, _transport, request): + """ + Private constructor. + + @param version: See L{__init__}. + @param code: See L{__init__}. + @param phrase: See L{__init__}. + @param headers: See L{__init__}. + @param _transport: See L{__init__}. + @param request: See L{IResponse.request}. + + @return: L{Response} instance. + """ + response = Response(version, code, phrase, headers, _transport) + response.request = _ClientRequestProxy(request) + return response + + def setPreviousResponse(self, previousResponse): + self.previousResponse = previousResponse + + def deliverBody(self, protocol): + """ + Dispatch the given L{IProtocol} depending of the current state of the + response. + """ + + deliverBody = makeStatefulDispatcher("deliverBody", deliverBody) + + def _deliverBody_INITIAL(self, protocol): + """ + Deliver any buffered data to C{protocol} and prepare to deliver any + future data to it. Move to the C{'CONNECTED'} state. + """ + protocol.makeConnection(self._transport) + self._bodyProtocol = protocol + for data in self._bodyBuffer: + self._bodyProtocol.dataReceived(data) + self._bodyBuffer = None + + self._state = "CONNECTED" + + # Now that there's a protocol to consume the body, resume the + # transport. It was previously paused by HTTPClientParser to avoid + # reading too much data before it could be handled. We need to do this + # after we transition our state as it may recursively lead to more data + # being delivered, or even the body completing. + self._transport.resumeProducing() + + def _deliverBody_CONNECTED(self, protocol): + """ + It is invalid to attempt to deliver data to a protocol when it is + already being delivered to another protocol. + """ + raise RuntimeError( + "Response already has protocol %r, cannot deliverBody " + "again" % (self._bodyProtocol,) + ) + + def _deliverBody_DEFERRED_CLOSE(self, protocol): + """ + Deliver any buffered data to C{protocol} and then disconnect the + protocol. Move to the C{'FINISHED'} state. + """ + # Unlike _deliverBody_INITIAL, there is no need to resume the + # transport here because all of the response data has been received + # already. Some higher level code may want to resume the transport if + # that code expects further data to be received over it. + + protocol.makeConnection(self._transport) + + for data in self._bodyBuffer: + protocol.dataReceived(data) + self._bodyBuffer = None + protocol.connectionLost(self._reason) + self._state = "FINISHED" + + def _deliverBody_FINISHED(self, protocol): + """ + It is invalid to attempt to deliver data to a protocol after the + response body has been delivered to another protocol. + """ + raise RuntimeError("Response already finished, cannot deliverBody now.") + + def _bodyDataReceived(self, data): + """ + Called by HTTPClientParser with chunks of data from the response body. + They will be buffered or delivered to the protocol passed to + deliverBody. + """ + + _bodyDataReceived = makeStatefulDispatcher("bodyDataReceived", _bodyDataReceived) + + def _bodyDataReceived_INITIAL(self, data): + """ + Buffer any data received for later delivery to a protocol passed to + C{deliverBody}. + + Little or no data should be buffered by this method, since the + transport has been paused and will not be resumed until a protocol + is supplied. + """ + self._bodyBuffer.append(data) + + def _bodyDataReceived_CONNECTED(self, data): + """ + Deliver any data received to the protocol to which this L{Response} + is connected. + """ + self._bodyProtocol.dataReceived(data) + + def _bodyDataReceived_DEFERRED_CLOSE(self, data): + """ + It is invalid for data to be delivered after it has been indicated + that the response body has been completely delivered. + """ + raise RuntimeError("Cannot receive body data after _bodyDataFinished") + + def _bodyDataReceived_FINISHED(self, data): + """ + It is invalid for data to be delivered after the response body has + been delivered to a protocol. + """ + raise RuntimeError("Cannot receive body data after " "protocol disconnected") + + def _bodyDataFinished(self, reason=None): + """ + Called by HTTPClientParser when no more body data is available. If the + optional reason is supplied, this indicates a problem or potential + problem receiving all of the response body. + """ + + _bodyDataFinished = makeStatefulDispatcher("bodyDataFinished", _bodyDataFinished) + + def _bodyDataFinished_INITIAL(self, reason=None): + """ + Move to the C{'DEFERRED_CLOSE'} state to wait for a protocol to + which to deliver the response body. + """ + self._state = "DEFERRED_CLOSE" + if reason is None: + reason = Failure(ResponseDone("Response body fully received")) + self._reason = reason + + def _bodyDataFinished_CONNECTED(self, reason=None): + """ + Disconnect the protocol and move to the C{'FINISHED'} state. + """ + if reason is None: + reason = Failure(ResponseDone("Response body fully received")) + self._bodyProtocol.connectionLost(reason) + self._bodyProtocol = None + self._state = "FINISHED" + + def _bodyDataFinished_DEFERRED_CLOSE(self): + """ + It is invalid to attempt to notify the L{Response} of the end of the + response body data more than once. + """ + raise RuntimeError("Cannot finish body data more than once") + + def _bodyDataFinished_FINISHED(self): + """ + It is invalid to attempt to notify the L{Response} of the end of the + response body data more than once. + """ + raise RuntimeError("Cannot finish body data after " "protocol disconnected") + + +@implementer(IConsumer) +class ChunkedEncoder: + """ + Helper object which exposes L{IConsumer} on top of L{HTTP11ClientProtocol} + for streaming request bodies to the server. + """ + + def __init__(self, transport): + self.transport = transport + + def _allowNoMoreWrites(self): + """ + Indicate that no additional writes are allowed. Attempts to write + after calling this method will be met with an exception. + """ + self.transport = None + + def registerProducer(self, producer, streaming): + """ + Register the given producer with C{self.transport}. + """ + self.transport.registerProducer(producer, streaming) + + def write(self, data): + """ + Write the given request body bytes to the transport using chunked + encoding. + + @type data: C{bytes} + """ + if self.transport is None: + raise ExcessWrite() + self.transport.writeSequence( + (networkString("%x\r\n" % len(data)), data, b"\r\n") + ) + + def unregisterProducer(self): + """ + Indicate that the request body is complete and finish the request. + """ + self.write(b"") + self.transport.unregisterProducer() + self._allowNoMoreWrites() + + +@implementer(IPushProducer) +class TransportProxyProducer: + """ + An L{twisted.internet.interfaces.IPushProducer} implementation which + wraps another such thing and proxies calls to it until it is told to stop. + + @ivar _producer: The wrapped L{twisted.internet.interfaces.IPushProducer} + provider or L{None} after this proxy has been stopped. + """ + + # LineReceiver uses this undocumented attribute of transports to decide + # when to stop calling lineReceived or rawDataReceived (if it finds it to + # be true, it doesn't bother to deliver any more data). Set disconnecting + # to False here and never change it to true so that all data is always + # delivered to us and so that LineReceiver doesn't fail with an + # AttributeError. + disconnecting = False + + def __init__(self, producer): + self._producer = producer + + def stopProxying(self): + """ + Stop forwarding calls of L{twisted.internet.interfaces.IPushProducer} + methods to the underlying L{twisted.internet.interfaces.IPushProducer} + provider. + """ + self._producer = None + + def stopProducing(self): + """ + Proxy the stoppage to the underlying producer, unless this proxy has + been stopped. + """ + if self._producer is not None: + self._producer.stopProducing() + + def resumeProducing(self): + """ + Proxy the resumption to the underlying producer, unless this proxy has + been stopped. + """ + if self._producer is not None: + self._producer.resumeProducing() + + def pauseProducing(self): + """ + Proxy the pause to the underlying producer, unless this proxy has been + stopped. + """ + if self._producer is not None: + self._producer.pauseProducing() + + def loseConnection(self): + """ + Proxy the request to lose the connection to the underlying producer, + unless this proxy has been stopped. + """ + if self._producer is not None: + self._producer.loseConnection() + + +class HTTP11ClientProtocol(Protocol): + """ + L{HTTP11ClientProtocol} is an implementation of the HTTP 1.1 client + protocol. It supports as few features as possible. + + @ivar _parser: After a request is issued, the L{HTTPClientParser} to + which received data making up the response to that request is + delivered. + + @ivar _finishedRequest: After a request is issued, the L{Deferred} which + will fire when a L{Response} object corresponding to that request is + available. This allows L{HTTP11ClientProtocol} to fail the request + if there is a connection or parsing problem. + + @ivar _currentRequest: After a request is issued, the L{Request} + instance used to make that request. This allows + L{HTTP11ClientProtocol} to stop request generation if necessary (for + example, if the connection is lost). + + @ivar _transportProxy: After a request is issued, the + L{TransportProxyProducer} to which C{_parser} is connected. This + allows C{_parser} to pause and resume the transport in a way which + L{HTTP11ClientProtocol} can exert some control over. + + @ivar _responseDeferred: After a request is issued, the L{Deferred} from + C{_parser} which will fire with a L{Response} when one has been + received. This is eventually chained with C{_finishedRequest}, but + only in certain cases to avoid double firing that Deferred. + + @ivar _state: Indicates what state this L{HTTP11ClientProtocol} instance + is in with respect to transmission of a request and reception of a + response. This may be one of the following strings: + + - QUIESCENT: This is the state L{HTTP11ClientProtocol} instances + start in. Nothing is happening: no request is being sent and no + response is being received or expected. + + - TRANSMITTING: When a request is made (via L{request}), the + instance moves to this state. L{Request.writeTo} has been used + to start to send a request but it has not yet finished. + + - TRANSMITTING_AFTER_RECEIVING_RESPONSE: The server has returned a + complete response but the request has not yet been fully sent + yet. The instance will remain in this state until the request + is fully sent. + + - GENERATION_FAILED: There was an error while the request. The + request was not fully sent to the network. + + - WAITING: The request was fully sent to the network. The + instance is now waiting for the response to be fully received. + + - ABORTING: Application code has requested that the HTTP connection + be aborted. + + - CONNECTION_LOST: The connection has been lost. + @type _state: C{str} + + @ivar _abortDeferreds: A list of C{Deferred} instances that will fire when + the connection is lost. + """ + + _state = "QUIESCENT" + _parser = None + _finishedRequest = None + _currentRequest = None + _transportProxy = None + _responseDeferred = None + _log = Logger() + + def __init__(self, quiescentCallback=lambda c: None): + self._quiescentCallback = quiescentCallback + self._abortDeferreds = [] + + @property + def state(self): + return self._state + + def request(self, request): + """ + Issue C{request} over C{self.transport} and return a L{Deferred} which + will fire with a L{Response} instance or an error. + + @param request: The object defining the parameters of the request to + issue. + @type request: L{Request} + + @rtype: L{Deferred} + @return: The deferred may errback with L{RequestGenerationFailed} if + the request was not fully written to the transport due to a local + error. It may errback with L{RequestTransmissionFailed} if it was + not fully written to the transport due to a network error. It may + errback with L{ResponseFailed} if the request was sent (not + necessarily received) but some or all of the response was lost. It + may errback with L{RequestNotSent} if it is not possible to send + any more requests using this L{HTTP11ClientProtocol}. + """ + if self._state != "QUIESCENT": + return fail(RequestNotSent()) + + self._state = "TRANSMITTING" + _requestDeferred = maybeDeferred(request.writeTo, self.transport) + + def cancelRequest(ign): + # Explicitly cancel the request's deferred if it's still trying to + # write when this request is cancelled. + if self._state in ("TRANSMITTING", "TRANSMITTING_AFTER_RECEIVING_RESPONSE"): + _requestDeferred.cancel() + else: + self.transport.abortConnection() + self._disconnectParser(Failure(CancelledError())) + + self._finishedRequest = Deferred(cancelRequest) + + # Keep track of the Request object in case we need to call stopWriting + # on it. + self._currentRequest = request + + self._transportProxy = TransportProxyProducer(self.transport) + self._parser = HTTPClientParser(request, self._finishResponse) + self._parser.makeConnection(self._transportProxy) + self._responseDeferred = self._parser._responseDeferred + + def cbRequestWritten(ignored): + if self._state == "TRANSMITTING": + self._state = "WAITING" + self._responseDeferred.chainDeferred(self._finishedRequest) + + def ebRequestWriting(err): + if self._state == "TRANSMITTING": + self._state = "GENERATION_FAILED" + self.transport.abortConnection() + self._finishedRequest.errback(Failure(RequestGenerationFailed([err]))) + else: + self._log.failure( + "Error writing request, but not in valid state " + "to finalize request: {state}", + failure=err, + state=self._state, + ) + + _requestDeferred.addCallbacks(cbRequestWritten, ebRequestWriting) + + return self._finishedRequest + + def _finishResponse(self, rest): + """ + Called by an L{HTTPClientParser} to indicate that it has parsed a + complete response. + + @param rest: A C{bytes} giving any trailing bytes which were given to + the L{HTTPClientParser} which were not part of the response it + was parsing. + """ + + _finishResponse = makeStatefulDispatcher("finishResponse", _finishResponse) + + def _finishResponse_WAITING(self, rest): + # Currently the rest parameter is ignored. Don't forget to use it if + # we ever add support for pipelining. And maybe check what trailers + # mean. + if self._state == "WAITING": + self._state = "QUIESCENT" + else: + # The server sent the entire response before we could send the + # whole request. That sucks. Oh well. Fire the request() + # Deferred with the response. But first, make sure that if the + # request does ever finish being written that it won't try to fire + # that Deferred. + self._state = "TRANSMITTING_AFTER_RECEIVING_RESPONSE" + self._responseDeferred.chainDeferred(self._finishedRequest) + + # This will happen if we're being called due to connection being lost; + # if so, no need to disconnect parser again, or to call + # _quiescentCallback. + if self._parser is None: + return + + reason = ConnectionDone("synthetic!") + connHeaders = self._parser.connHeaders.getRawHeaders(b"connection", ()) + if ( + (b"close" in connHeaders) + or self._state != "QUIESCENT" + or not self._currentRequest.persistent + ): + self._giveUp(Failure(reason)) + else: + # Just in case we had paused the transport, resume it before + # considering it quiescent again. + self.transport.resumeProducing() + + # We call the quiescent callback first, to ensure connection gets + # added back to connection pool before we finish the request. + try: + self._quiescentCallback(self) + except BaseException: + # If callback throws exception, just log it and disconnect; + # keeping persistent connections around is an optimisation: + self._log.failure("") + self.transport.loseConnection() + self._disconnectParser(reason) + + _finishResponse_TRANSMITTING = _finishResponse_WAITING + + def _disconnectParser(self, reason): + """ + If there is still a parser, call its C{connectionLost} method with the + given reason. If there is not, do nothing. + + @type reason: L{Failure} + """ + if self._parser is not None: + parser = self._parser + self._parser = None + self._currentRequest = None + self._finishedRequest = None + self._responseDeferred = None + + # The parser is no longer allowed to do anything to the real + # transport. Stop proxying from the parser's transport to the real + # transport before telling the parser it's done so that it can't do + # anything. + self._transportProxy.stopProxying() + self._transportProxy = None + parser.connectionLost(reason) + + def _giveUp(self, reason): + """ + Lose the underlying connection and disconnect the parser with the given + L{Failure}. + + Use this method instead of calling the transport's loseConnection + method directly otherwise random things will break. + """ + self.transport.loseConnection() + self._disconnectParser(reason) + + def dataReceived(self, bytes): + """ + Handle some stuff from some place. + """ + try: + self._parser.dataReceived(bytes) + except BaseException: + self._giveUp(Failure()) + + def connectionLost(self, reason): + """ + The underlying transport went away. If appropriate, notify the parser + object. + """ + + connectionLost = makeStatefulDispatcher("connectionLost", connectionLost) + + def _connectionLost_QUIESCENT(self, reason): + """ + Nothing is currently happening. Move to the C{'CONNECTION_LOST'} + state but otherwise do nothing. + """ + self._state = "CONNECTION_LOST" + + def _connectionLost_GENERATION_FAILED(self, reason): + """ + The connection was in an inconsistent state. Move to the + C{'CONNECTION_LOST'} state but otherwise do nothing. + """ + self._state = "CONNECTION_LOST" + + def _connectionLost_TRANSMITTING(self, reason): + """ + Fail the L{Deferred} for the current request, notify the request + object that it does not need to continue transmitting itself, and + move to the C{'CONNECTION_LOST'} state. + """ + self._state = "CONNECTION_LOST" + self._finishedRequest.errback(Failure(RequestTransmissionFailed([reason]))) + del self._finishedRequest + + # Tell the request that it should stop bothering now. + self._currentRequest.stopWriting() + + def _connectionLost_TRANSMITTING_AFTER_RECEIVING_RESPONSE(self, reason): + """ + Move to the C{'CONNECTION_LOST'} state. + """ + self._state = "CONNECTION_LOST" + + def _connectionLost_WAITING(self, reason): + """ + Disconnect the response parser so that it can propagate the event as + necessary (for example, to call an application protocol's + C{connectionLost} method, or to fail a request L{Deferred}) and move + to the C{'CONNECTION_LOST'} state. + """ + self._disconnectParser(reason) + self._state = "CONNECTION_LOST" + + def _connectionLost_ABORTING(self, reason): + """ + Disconnect the response parser with a L{ConnectionAborted} failure, and + move to the C{'CONNECTION_LOST'} state. + """ + self._disconnectParser(Failure(ConnectionAborted())) + self._state = "CONNECTION_LOST" + for d in self._abortDeferreds: + d.callback(None) + self._abortDeferreds = [] + + def abort(self): + """ + Close the connection and cause all outstanding L{request} L{Deferred}s + to fire with an error. + """ + if self._state == "CONNECTION_LOST": + return succeed(None) + self.transport.loseConnection() + self._state = "ABORTING" + d = Deferred() + self._abortDeferreds.append(d) + return d diff --git a/contrib/python/Twisted/py3/twisted/web/_responses.py b/contrib/python/Twisted/py3/twisted/web/_responses.py new file mode 100644 index 0000000000..2b93229350 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_responses.py @@ -0,0 +1,110 @@ +# -*- test-case-name: twisted.web.test.test_http -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP response code definitions. +""" + + +_CONTINUE = 100 +SWITCHING = 101 + +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 + +MULTIPLE_CHOICE = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 +PERMANENT_REDIRECT = 308 + +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTH_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 + +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE_SPACE = 507 +NOT_EXTENDED = 510 + +RESPONSES = { + # 100 + _CONTINUE: b"Continue", + SWITCHING: b"Switching Protocols", + # 200 + OK: b"OK", + CREATED: b"Created", + ACCEPTED: b"Accepted", + NON_AUTHORITATIVE_INFORMATION: b"Non-Authoritative Information", + NO_CONTENT: b"No Content", + RESET_CONTENT: b"Reset Content.", + PARTIAL_CONTENT: b"Partial Content", + MULTI_STATUS: b"Multi-Status", + # 300 + MULTIPLE_CHOICE: b"Multiple Choices", + MOVED_PERMANENTLY: b"Moved Permanently", + FOUND: b"Found", + SEE_OTHER: b"See Other", + NOT_MODIFIED: b"Not Modified", + USE_PROXY: b"Use Proxy", + # 306 not defined?? + TEMPORARY_REDIRECT: b"Temporary Redirect", + PERMANENT_REDIRECT: b"Permanent Redirect", + # 400 + BAD_REQUEST: b"Bad Request", + UNAUTHORIZED: b"Unauthorized", + PAYMENT_REQUIRED: b"Payment Required", + FORBIDDEN: b"Forbidden", + NOT_FOUND: b"Not Found", + NOT_ALLOWED: b"Method Not Allowed", + NOT_ACCEPTABLE: b"Not Acceptable", + PROXY_AUTH_REQUIRED: b"Proxy Authentication Required", + REQUEST_TIMEOUT: b"Request Time-out", + CONFLICT: b"Conflict", + GONE: b"Gone", + LENGTH_REQUIRED: b"Length Required", + PRECONDITION_FAILED: b"Precondition Failed", + REQUEST_ENTITY_TOO_LARGE: b"Request Entity Too Large", + REQUEST_URI_TOO_LONG: b"Request-URI Too Long", + UNSUPPORTED_MEDIA_TYPE: b"Unsupported Media Type", + REQUESTED_RANGE_NOT_SATISFIABLE: b"Requested Range not satisfiable", + EXPECTATION_FAILED: b"Expectation Failed", + # 500 + INTERNAL_SERVER_ERROR: b"Internal Server Error", + NOT_IMPLEMENTED: b"Not Implemented", + BAD_GATEWAY: b"Bad Gateway", + SERVICE_UNAVAILABLE: b"Service Unavailable", + GATEWAY_TIMEOUT: b"Gateway Time-out", + HTTP_VERSION_NOT_SUPPORTED: b"HTTP Version not supported", + INSUFFICIENT_STORAGE_SPACE: b"Insufficient Storage Space", + NOT_EXTENDED: b"Not Extended", +} diff --git a/contrib/python/Twisted/py3/twisted/web/_stan.py b/contrib/python/Twisted/py3/twisted/web/_stan.py new file mode 100644 index 0000000000..88e82d2dfe --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_stan.py @@ -0,0 +1,360 @@ +# -*- test-case-name: twisted.web.test.test_stan -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An s-expression-like syntax for expressing xml in pure python. + +Stan tags allow you to build XML documents using Python. + +Stan is a DOM, or Document Object Model, implemented using basic Python types +and functions called "flatteners". A flattener is a function that knows how to +turn an object of a specific type into something that is closer to an HTML +string. Stan differs from the W3C DOM by not being as cumbersome and heavy +weight. Since the object model is built using simple python types such as lists, +strings, and dictionaries, the API is simpler and constructing a DOM less +cumbersome. + +@var voidElements: the names of HTML 'U{void + elements<http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#void-elements>}'; + those which can't have contents and can therefore be self-closing in the + output. +""" + + +from inspect import iscoroutine, isgenerator +from typing import TYPE_CHECKING, Dict, List, Optional, Union +from warnings import warn + +import attr + +if TYPE_CHECKING: + from twisted.web.template import Flattenable + + +@attr.s(hash=False, eq=False, auto_attribs=True) +class slot: + """ + Marker for markup insertion in a template. + """ + + name: str + """ + The name of this slot. + + The key which must be used in L{Tag.fillSlots} to fill it. + """ + + children: List["Tag"] = attr.ib(init=False, factory=list) + """ + The L{Tag} objects included in this L{slot}'s template. + """ + + default: Optional["Flattenable"] = None + """ + The default contents of this slot, if it is left unfilled. + + If this is L{None}, an L{UnfilledSlot} will be raised, rather than + L{None} actually being used. + """ + + filename: Optional[str] = None + """ + The name of the XML file from which this tag was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + lineNumber: Optional[int] = None + """ + The line number on which this tag was encountered in the XML file + from which it was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + columnNumber: Optional[int] = None + """ + The column number at which this tag was encountered in the XML file + from which it was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + +@attr.s(hash=False, eq=False, repr=False, auto_attribs=True) +class Tag: + """ + A L{Tag} represents an XML tags with a tag name, attributes, and children. + A L{Tag} can be constructed using the special L{twisted.web.template.tags} + object, or it may be constructed directly with a tag name. L{Tag}s have a + special method, C{__call__}, which makes representing trees of XML natural + using pure python syntax. + """ + + tagName: Union[bytes, str] + """ + The name of the represented element. + + For a tag like C{<div></div>}, this would be C{"div"}. + """ + + attributes: Dict[Union[bytes, str], "Flattenable"] = attr.ib(factory=dict) + """The attributes of the element.""" + + children: List["Flattenable"] = attr.ib(factory=list) + """The contents of this C{Tag}.""" + + render: Optional[str] = None + """ + The name of the render method to use for this L{Tag}. + + This name will be looked up at render time by the + L{twisted.web.template.Element} doing the rendering, + via L{twisted.web.template.Element.lookupRenderMethod}, + to determine which method to call. + """ + + filename: Optional[str] = None + """ + The name of the XML file from which this tag was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + lineNumber: Optional[int] = None + """ + The line number on which this tag was encountered in the XML file + from which it was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + columnNumber: Optional[int] = None + """ + The column number at which this tag was encountered in the XML file + from which it was parsed. + + If it was not parsed from an XML file, L{None}. + """ + + slotData: Optional[Dict[str, "Flattenable"]] = attr.ib(init=False, default=None) + """ + The data which can fill slots. + + If present, a dictionary mapping slot names to renderable values. + The values in this dict might be anything that can be present as + the child of a L{Tag}: strings, lists, L{Tag}s, generators, etc. + """ + + def fillSlots(self, **slots: "Flattenable") -> "Tag": + """ + Remember the slots provided at this position in the DOM. + + During the rendering of children of this node, slots with names in + C{slots} will be rendered as their corresponding values. + + @return: C{self}. This enables the idiom C{return tag.fillSlots(...)} in + renderers. + """ + if self.slotData is None: + self.slotData = {} + self.slotData.update(slots) + return self + + def __call__(self, *children: "Flattenable", **kw: "Flattenable") -> "Tag": + """ + Add children and change attributes on this tag. + + This is implemented using __call__ because it then allows the natural + syntax:: + + table(tr1, tr2, width="100%", height="50%", border="1") + + Children may be other tag instances, strings, functions, or any other + object which has a registered flatten. + + Attributes may be 'transparent' tag instances (so that + C{a(href=transparent(data="foo", render=myhrefrenderer))} works), + strings, functions, or any other object which has a registered + flattener. + + If the attribute is a python keyword, such as 'class', you can add an + underscore to the name, like 'class_'. + + There is one special keyword argument, 'render', which will be used as + the name of the renderer and saved as the 'render' attribute of this + instance, rather than the DOM 'render' attribute in the attributes + dictionary. + """ + self.children.extend(children) + + for k, v in kw.items(): + if k[-1] == "_": + k = k[:-1] + + if k == "render": + if not isinstance(v, str): + raise TypeError( + f'Value for "render" attribute must be str, got {v!r}' + ) + self.render = v + else: + self.attributes[k] = v + return self + + def _clone(self, obj: "Flattenable", deep: bool) -> "Flattenable": + """ + Clone a C{Flattenable} object; used by L{Tag.clone}. + + Note that both lists and tuples are cloned into lists. + + @param obj: an object with a clone method, a list or tuple, or something + which should be immutable. + + @param deep: whether to continue cloning child objects; i.e. the + contents of lists, the sub-tags within a tag. + + @return: a clone of C{obj}. + """ + if hasattr(obj, "clone"): + return obj.clone(deep) + elif isinstance(obj, (list, tuple)): + return [self._clone(x, deep) for x in obj] + elif isgenerator(obj): + warn( + "Cloning a Tag which contains a generator is unsafe, " + "since the generator can be consumed only once; " + "this is deprecated since Twisted 21.7.0 and will raise " + "an exception in the future", + DeprecationWarning, + ) + return obj + elif iscoroutine(obj): + warn( + "Cloning a Tag which contains a coroutine is unsafe, " + "since the coroutine can run only once; " + "this is deprecated since Twisted 21.7.0 and will raise " + "an exception in the future", + DeprecationWarning, + ) + return obj + else: + return obj + + def clone(self, deep: bool = True) -> "Tag": + """ + Return a clone of this tag. If deep is True, clone all of this tag's + children. Otherwise, just shallow copy the children list without copying + the children themselves. + """ + if deep: + newchildren = [self._clone(x, True) for x in self.children] + else: + newchildren = self.children[:] + newattrs = self.attributes.copy() + for key in newattrs.keys(): + newattrs[key] = self._clone(newattrs[key], True) + + newslotdata = None + if self.slotData: + newslotdata = self.slotData.copy() + for key in newslotdata: + newslotdata[key] = self._clone(newslotdata[key], True) + + newtag = Tag( + self.tagName, + attributes=newattrs, + children=newchildren, + render=self.render, + filename=self.filename, + lineNumber=self.lineNumber, + columnNumber=self.columnNumber, + ) + newtag.slotData = newslotdata + + return newtag + + def clear(self) -> "Tag": + """ + Clear any existing children from this tag. + """ + self.children = [] + return self + + def __repr__(self) -> str: + rstr = "" + if self.attributes: + rstr += ", attributes=%r" % self.attributes + if self.children: + rstr += ", children=%r" % self.children + return f"Tag({self.tagName!r}{rstr})" + + +voidElements = ( + "img", + "br", + "hr", + "base", + "meta", + "link", + "param", + "area", + "input", + "col", + "basefont", + "isindex", + "frame", + "command", + "embed", + "keygen", + "source", + "track", + "wbs", +) + + +@attr.s(hash=False, eq=False, repr=False, auto_attribs=True) +class CDATA: + """ + A C{<![CDATA[]]>} block from a template. Given a separate representation in + the DOM so that they may be round-tripped through rendering without losing + information. + """ + + data: str + """The data between "C{<![CDATA[}" and "C{]]>}".""" + + def __repr__(self) -> str: + return f"CDATA({self.data!r})" + + +@attr.s(hash=False, eq=False, repr=False, auto_attribs=True) +class Comment: + """ + A C{<!-- -->} comment from a template. Given a separate representation in + the DOM so that they may be round-tripped through rendering without losing + information. + """ + + data: str + """The data between "C{<!--}" and "C{-->}".""" + + def __repr__(self) -> str: + return f"Comment({self.data!r})" + + +@attr.s(hash=False, eq=False, repr=False, auto_attribs=True) +class CharRef: + """ + A numeric character reference. Given a separate representation in the DOM + so that non-ASCII characters may be output as pure ASCII. + + @since: 12.0 + """ + + ordinal: int + """The ordinal value of the unicode character to which this object refers.""" + + def __repr__(self) -> str: + return "CharRef(%d)" % (self.ordinal,) diff --git a/contrib/python/Twisted/py3/twisted/web/_template_util.py b/contrib/python/Twisted/py3/twisted/web/_template_util.py new file mode 100644 index 0000000000..4a9f7f2100 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/_template_util.py @@ -0,0 +1,1112 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +twisted.web.util and twisted.web.template merged to avoid cyclic deps +""" + +import io +import linecache +import warnings +from collections import OrderedDict +from html import escape +from typing import ( + IO, + Any, + AnyStr, + Callable, + Dict, + List, + Mapping, + Optional, + Tuple, + Union, + cast, +) +from xml.sax import handler, make_parser +from xml.sax.xmlreader import Locator + +from zope.interface import implementer + +from twisted.internet.defer import Deferred +from twisted.logger import Logger +from twisted.python import urlpath +from twisted.python.failure import Failure +from twisted.python.filepath import FilePath +from twisted.python.reflect import fullyQualifiedName +from twisted.web import resource +from twisted.web._element import Element, renderer +from twisted.web._flatten import Flattenable, flatten, flattenString +from twisted.web._stan import CDATA, Comment, Tag, slot +from twisted.web.iweb import IRenderable, IRequest, ITemplateLoader + + +def _PRE(text): + """ + Wraps <pre> tags around some text and HTML-escape it. + + This is here since once twisted.web.html was deprecated it was hard to + migrate the html.PRE from current code to twisted.web.template. + + For new code consider using twisted.web.template. + + @return: Escaped text wrapped in <pre> tags. + @rtype: C{str} + """ + return f"<pre>{escape(text)}</pre>" + + +def redirectTo(URL: bytes, request: IRequest) -> bytes: + """ + Generate a redirect to the given location. + + @param URL: A L{bytes} giving the location to which to redirect. + + @param request: The request object to use to generate the redirect. + @type request: L{IRequest<twisted.web.iweb.IRequest>} provider + + @raise TypeError: If the type of C{URL} a L{str} instead of L{bytes}. + + @return: A L{bytes} containing HTML which tries to convince the client + agent + to visit the new location even if it doesn't respect the I{FOUND} + response code. This is intended to be returned from a render method, + eg:: + + def render_GET(self, request): + return redirectTo(b"http://example.com/", request) + """ + if not isinstance(URL, bytes): + raise TypeError("URL must be bytes") + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.redirect(URL) + # FIXME: The URL should be HTML-escaped. + # https://twistedmatrix.com/trac/ticket/9839 + content = b""" +<html> + <head> + <meta http-equiv=\"refresh\" content=\"0;URL=%(url)s\"> + </head> + <body bgcolor=\"#FFFFFF\" text=\"#000000\"> + <a href=\"%(url)s\">click here</a> + </body> +</html> +""" % { + b"url": URL + } + return content + + +class Redirect(resource.Resource): + """ + Resource that redirects to a specific URL. + + @ivar url: Redirect target URL to put in the I{Location} response header. + @type url: L{bytes} + """ + + isLeaf = True + + def __init__(self, url: bytes): + super().__init__() + self.url = url + + def render(self, request): + return redirectTo(self.url, request) + + def getChild(self, name, request): + return self + + +# FIXME: This is totally broken, see https://twistedmatrix.com/trac/ticket/9838 +class ChildRedirector(Redirect): + isLeaf = False + + def __init__(self, url): + # XXX is this enough? + if ( + (url.find("://") == -1) + and (not url.startswith("..")) + and (not url.startswith("/")) + ): + raise ValueError( + ( + "It seems you've given me a redirect (%s) that is a child of" + " myself! That's not good, it'll cause an infinite redirect." + ) + % url + ) + Redirect.__init__(self, url) + + def getChild(self, name, request): + newUrl = self.url + if not newUrl.endswith("/"): + newUrl += "/" + newUrl += name + return ChildRedirector(newUrl) + + +class ParentRedirect(resource.Resource): + """ + Redirect to the nearest directory and strip any query string. + + This generates redirects like:: + + / \u2192 / + /foo \u2192 / + /foo?bar \u2192 / + /foo/ \u2192 /foo/ + /foo/bar \u2192 /foo/ + /foo/bar?baz \u2192 /foo/ + + However, the generated I{Location} header contains an absolute URL rather + than a path. + + The response is the same regardless of HTTP method. + """ + + isLeaf = 1 + + def render(self, request: IRequest) -> bytes: + """ + Respond to all requests by redirecting to nearest directory. + """ + here = str(urlpath.URLPath.fromRequest(request).here()).encode("ascii") + return redirectTo(here, request) + + +class DeferredResource(resource.Resource): + """ + I wrap up a Deferred that will eventually result in a Resource + object. + """ + + isLeaf = 1 + + def __init__(self, d): + resource.Resource.__init__(self) + self.d = d + + def getChild(self, name, request): + return self + + def render(self, request): + self.d.addCallback(self._cbChild, request).addErrback(self._ebChild, request) + from twisted.web.server import NOT_DONE_YET + + return NOT_DONE_YET + + def _cbChild(self, child, request): + request.render(resource.getChildForRequest(child, request)) + + def _ebChild(self, reason, request): + request.processingFailed(reason) + + +class _SourceLineElement(Element): + """ + L{_SourceLineElement} is an L{IRenderable} which can render a single line of + source code. + + @ivar number: A C{int} giving the line number of the source code to be + rendered. + @ivar source: A C{str} giving the source code to be rendered. + """ + + def __init__(self, loader, number, source): + Element.__init__(self, loader) + self.number = number + self.source = source + + @renderer + def sourceLine(self, request, tag): + """ + Render the line of source as a child of C{tag}. + """ + return tag(self.source.replace(" ", " \N{NO-BREAK SPACE}")) + + @renderer + def lineNumber(self, request, tag): + """ + Render the line number as a child of C{tag}. + """ + return tag(str(self.number)) + + +class _SourceFragmentElement(Element): + """ + L{_SourceFragmentElement} is an L{IRenderable} which can render several lines + of source code near the line number of a particular frame object. + + @ivar frame: A L{Failure<twisted.python.failure.Failure>}-style frame object + for which to load a source line to render. This is really a tuple + holding some information from a frame object. See + L{Failure.frames<twisted.python.failure.Failure>} for specifics. + """ + + def __init__(self, loader, frame): + Element.__init__(self, loader) + self.frame = frame + + def _getSourceLines(self): + """ + Find the source line references by C{self.frame} and yield, in source + line order, it and the previous and following lines. + + @return: A generator which yields two-tuples. Each tuple gives a source + line number and the contents of that source line. + """ + filename = self.frame[1] + lineNumber = self.frame[2] + for snipLineNumber in range(lineNumber - 1, lineNumber + 2): + yield (snipLineNumber, linecache.getline(filename, snipLineNumber).rstrip()) + + @renderer + def sourceLines(self, request, tag): + """ + Render the source line indicated by C{self.frame} and several + surrounding lines. The active line will be given a I{class} of + C{"snippetHighlightLine"}. Other lines will be given a I{class} of + C{"snippetLine"}. + """ + for lineNumber, sourceLine in self._getSourceLines(): + newTag = tag.clone() + if lineNumber == self.frame[2]: + cssClass = "snippetHighlightLine" + else: + cssClass = "snippetLine" + loader = TagLoader(newTag(**{"class": cssClass})) + yield _SourceLineElement(loader, lineNumber, sourceLine) + + +class _FrameElement(Element): + """ + L{_FrameElement} is an L{IRenderable} which can render details about one + frame from a L{Failure<twisted.python.failure.Failure>}. + + @ivar frame: A L{Failure<twisted.python.failure.Failure>}-style frame object + for which to load a source line to render. This is really a tuple + holding some information from a frame object. See + L{Failure.frames<twisted.python.failure.Failure>} for specifics. + """ + + def __init__(self, loader, frame): + Element.__init__(self, loader) + self.frame = frame + + @renderer + def filename(self, request, tag): + """ + Render the name of the file this frame references as a child of C{tag}. + """ + return tag(self.frame[1]) + + @renderer + def lineNumber(self, request, tag): + """ + Render the source line number this frame references as a child of + C{tag}. + """ + return tag(str(self.frame[2])) + + @renderer + def function(self, request, tag): + """ + Render the function name this frame references as a child of C{tag}. + """ + return tag(self.frame[0]) + + @renderer + def source(self, request, tag): + """ + Render the source code surrounding the line this frame references, + replacing C{tag}. + """ + return _SourceFragmentElement(TagLoader(tag), self.frame) + + +class _StackElement(Element): + """ + L{_StackElement} renders an L{IRenderable} which can render a list of frames. + """ + + def __init__(self, loader, stackFrames): + Element.__init__(self, loader) + self.stackFrames = stackFrames + + @renderer + def frames(self, request, tag): + """ + Render the list of frames in this L{_StackElement}, replacing C{tag}. + """ + return [ + _FrameElement(TagLoader(tag.clone()), frame) for frame in self.stackFrames + ] + + +class _NSContext: + """ + A mapping from XML namespaces onto their prefixes in the document. + """ + + def __init__(self, parent: Optional["_NSContext"] = None): + """ + Pull out the parent's namespaces, if there's no parent then default to + XML. + """ + self.parent = parent + if parent is not None: + self.nss: Dict[Optional[str], Optional[str]] = OrderedDict(parent.nss) + else: + self.nss = {"http://www.w3.org/XML/1998/namespace": "xml"} + + def get(self, k: Optional[str], d: Optional[str] = None) -> Optional[str]: + """ + Get a prefix for a namespace. + + @param d: The default prefix value. + """ + return self.nss.get(k, d) + + def __setitem__(self, k: Optional[str], v: Optional[str]) -> None: + """ + Proxy through to setting the prefix for the namespace. + """ + self.nss.__setitem__(k, v) + + def __getitem__(self, k: Optional[str]) -> Optional[str]: + """ + Proxy through to getting the prefix for the namespace. + """ + return self.nss.__getitem__(k) + + +TEMPLATE_NAMESPACE = "http://twistedmatrix.com/ns/twisted.web.template/0.1" + + +class _ToStan(handler.ContentHandler, handler.EntityResolver): + """ + A SAX parser which converts an XML document to the Twisted STAN + Document Object Model. + """ + + def __init__(self, sourceFilename: Optional[str]): + """ + @param sourceFilename: the filename the XML was loaded out of. + """ + self.sourceFilename = sourceFilename + self.prefixMap = _NSContext() + self.inCDATA = False + + def setDocumentLocator(self, locator: Locator) -> None: + """ + Set the document locator, which knows about line and character numbers. + """ + self.locator = locator + + def startDocument(self) -> None: + """ + Initialise the document. + """ + # Depending on our active context, the element type can be Tag, slot + # or str. Since mypy doesn't understand that context, it would be + # a pain to not use Any here. + self.document: List[Any] = [] + self.current = self.document + self.stack: List[Any] = [] + self.xmlnsAttrs: List[Tuple[str, str]] = [] + + def endDocument(self) -> None: + """ + Document ended. + """ + + def processingInstruction(self, target: str, data: str) -> None: + """ + Processing instructions are ignored. + """ + + def startPrefixMapping(self, prefix: Optional[str], uri: str) -> None: + """ + Set up the prefix mapping, which maps fully qualified namespace URIs + onto namespace prefixes. + + This gets called before startElementNS whenever an C{xmlns} attribute + is seen. + """ + + self.prefixMap = _NSContext(self.prefixMap) + self.prefixMap[uri] = prefix + + # Ignore the template namespace; we'll replace those during parsing. + if uri == TEMPLATE_NAMESPACE: + return + + # Add to a list that will be applied once we have the element. + if prefix is None: + self.xmlnsAttrs.append(("xmlns", uri)) + else: + self.xmlnsAttrs.append(("xmlns:%s" % prefix, uri)) + + def endPrefixMapping(self, prefix: Optional[str]) -> None: + """ + "Pops the stack" on the prefix mapping. + + Gets called after endElementNS. + """ + parent = self.prefixMap.parent + assert parent is not None, "More prefix mapping ends than starts" + self.prefixMap = parent + + def startElementNS( + self, + namespaceAndName: Tuple[str, str], + qname: Optional[str], + attrs: Mapping[Tuple[Optional[str], str], str], + ) -> None: + """ + Gets called when we encounter a new xmlns attribute. + + @param namespaceAndName: a (namespace, name) tuple, where name + determines which type of action to take, if the namespace matches + L{TEMPLATE_NAMESPACE}. + @param qname: ignored. + @param attrs: attributes on the element being started. + """ + + filename = self.sourceFilename + lineNumber = self.locator.getLineNumber() + columnNumber = self.locator.getColumnNumber() + + ns, name = namespaceAndName + if ns == TEMPLATE_NAMESPACE: + if name == "transparent": + name = "" + elif name == "slot": + default: Optional[str] + try: + # Try to get the default value for the slot + default = attrs[(None, "default")] + except KeyError: + # If there wasn't one, then use None to indicate no + # default. + default = None + sl = slot( + attrs[(None, "name")], + default=default, + filename=filename, + lineNumber=lineNumber, + columnNumber=columnNumber, + ) + self.stack.append(sl) + self.current.append(sl) + self.current = sl.children + return + + render = None + + attrs = OrderedDict(attrs) + for k, v in list(attrs.items()): + attrNS, justTheName = k + if attrNS != TEMPLATE_NAMESPACE: + continue + if justTheName == "render": + render = v + del attrs[k] + + # nonTemplateAttrs is a dictionary mapping attributes that are *not* in + # TEMPLATE_NAMESPACE to their values. Those in TEMPLATE_NAMESPACE were + # just removed from 'attrs' in the loop immediately above. The key in + # nonTemplateAttrs is either simply the attribute name (if it was not + # specified as having a namespace in the template) or prefix:name, + # preserving the xml namespace prefix given in the document. + + nonTemplateAttrs = OrderedDict() + for (attrNs, attrName), v in attrs.items(): + nsPrefix = self.prefixMap.get(attrNs) + if nsPrefix is None: + attrKey = attrName + else: + attrKey = f"{nsPrefix}:{attrName}" + nonTemplateAttrs[attrKey] = v + + if ns == TEMPLATE_NAMESPACE and name == "attr": + if not self.stack: + # TODO: define a better exception for this? + raise AssertionError( + f"<{{{TEMPLATE_NAMESPACE}}}attr> as top-level element" + ) + if "name" not in nonTemplateAttrs: + # TODO: same here + raise AssertionError( + f"<{{{TEMPLATE_NAMESPACE}}}attr> requires a name attribute" + ) + el = Tag( + "", + render=render, + filename=filename, + lineNumber=lineNumber, + columnNumber=columnNumber, + ) + self.stack[-1].attributes[nonTemplateAttrs["name"]] = el + self.stack.append(el) + self.current = el.children + return + + # Apply any xmlns attributes + if self.xmlnsAttrs: + nonTemplateAttrs.update(OrderedDict(self.xmlnsAttrs)) + self.xmlnsAttrs = [] + + # Add the prefix that was used in the parsed template for non-template + # namespaces (which will not be consumed anyway). + if ns != TEMPLATE_NAMESPACE and ns is not None: + prefix = self.prefixMap[ns] + if prefix is not None: + name = f"{self.prefixMap[ns]}:{name}" + el = Tag( + name, + attributes=OrderedDict( + cast(Mapping[Union[bytes, str], str], nonTemplateAttrs) + ), + render=render, + filename=filename, + lineNumber=lineNumber, + columnNumber=columnNumber, + ) + self.stack.append(el) + self.current.append(el) + self.current = el.children + + def characters(self, ch: str) -> None: + """ + Called when we receive some characters. CDATA characters get passed + through as is. + """ + if self.inCDATA: + self.stack[-1].append(ch) + return + self.current.append(ch) + + def endElementNS(self, name: Tuple[str, str], qname: Optional[str]) -> None: + """ + A namespace tag is closed. Pop the stack, if there's anything left in + it, otherwise return to the document's namespace. + """ + self.stack.pop() + if self.stack: + self.current = self.stack[-1].children + else: + self.current = self.document + + def startDTD(self, name: str, publicId: str, systemId: str) -> None: + """ + DTDs are ignored. + """ + + def endDTD(self, *args: object) -> None: + """ + DTDs are ignored. + """ + + def startCDATA(self) -> None: + """ + We're starting to be in a CDATA element, make a note of this. + """ + self.inCDATA = True + self.stack.append([]) + + def endCDATA(self) -> None: + """ + We're no longer in a CDATA element. Collect up the characters we've + parsed and put them in a new CDATA object. + """ + self.inCDATA = False + comment = "".join(self.stack.pop()) + self.current.append(CDATA(comment)) + + def comment(self, content: str) -> None: + """ + Add an XML comment which we've encountered. + """ + self.current.append(Comment(content)) + + +def _flatsaxParse(fl: Union[IO[AnyStr], str]) -> List["Flattenable"]: + """ + Perform a SAX parse of an XML document with the _ToStan class. + + @param fl: The XML document to be parsed. + + @return: a C{list} of Stan objects. + """ + parser = make_parser() + parser.setFeature(handler.feature_validation, 0) + parser.setFeature(handler.feature_namespaces, 1) + parser.setFeature(handler.feature_external_ges, 0) + parser.setFeature(handler.feature_external_pes, 0) + + s = _ToStan(getattr(fl, "name", None)) + parser.setContentHandler(s) + parser.setEntityResolver(s) + parser.setProperty(handler.property_lexical_handler, s) + + parser.parse(fl) + + return s.document + + +@implementer(ITemplateLoader) +class XMLString: + """ + An L{ITemplateLoader} that loads and parses XML from a string. + """ + + def __init__(self, s: Union[str, bytes]): + """ + Run the parser on a L{io.StringIO} copy of the string. + + @param s: The string from which to load the XML. + @type s: L{str}, or a UTF-8 encoded L{bytes}. + """ + if not isinstance(s, str): + s = s.decode("utf8") + + self._loadedTemplate: List["Flattenable"] = _flatsaxParse(io.StringIO(s)) + """The loaded document.""" + + def load(self) -> List["Flattenable"]: + """ + Return the document. + + @return: the loaded document. + """ + return self._loadedTemplate + + +class FailureElement(Element): + """ + L{FailureElement} is an L{IRenderable} which can render detailed information + about a L{Failure<twisted.python.failure.Failure>}. + + @ivar failure: The L{Failure<twisted.python.failure.Failure>} instance which + will be rendered. + + @since: 12.1 + """ + + loader = XMLString( + """ +<div xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1"> + <style type="text/css"> + div.error { + color: red; + font-family: Verdana, Arial, helvetica, sans-serif; + font-weight: bold; + } + + div { + font-family: Verdana, Arial, helvetica, sans-serif; + } + + div.stackTrace { + } + + div.frame { + padding: 1em; + background: white; + border-bottom: thin black dashed; + } + + div.frame:first-child { + padding: 1em; + background: white; + border-top: thin black dashed; + border-bottom: thin black dashed; + } + + div.location { + } + + span.function { + font-weight: bold; + font-family: "Courier New", courier, monospace; + } + + div.snippet { + margin-bottom: 0.5em; + margin-left: 1em; + background: #FFFFDD; + } + + div.snippetHighlightLine { + color: red; + } + + span.code { + font-family: "Courier New", courier, monospace; + } + </style> + + <div class="error"> + <span t:render="type" />: <span t:render="value" /> + </div> + <div class="stackTrace" t:render="traceback"> + <div class="frame" t:render="frames"> + <div class="location"> + <span t:render="filename" />:<span t:render="lineNumber" /> in + <span class="function" t:render="function" /> + </div> + <div class="snippet" t:render="source"> + <div t:render="sourceLines"> + <span class="lineno" t:render="lineNumber" /> + <code class="code" t:render="sourceLine" /> + </div> + </div> + </div> + </div> + <div class="error"> + <span t:render="type" />: <span t:render="value" /> + </div> +</div> +""" + ) + + def __init__(self, failure, loader=None): + Element.__init__(self, loader) + self.failure = failure + + @renderer + def type(self, request, tag): + """ + Render the exception type as a child of C{tag}. + """ + return tag(fullyQualifiedName(self.failure.type)) + + @renderer + def value(self, request, tag): + """ + Render the exception value as a child of C{tag}. + """ + return tag(str(self.failure.value).encode("utf8")) + + @renderer + def traceback(self, request, tag): + """ + Render all the frames in the wrapped + L{Failure<twisted.python.failure.Failure>}'s traceback stack, replacing + C{tag}. + """ + return _StackElement(TagLoader(tag), self.failure.frames) + + +def formatFailure(myFailure): + """ + Construct an HTML representation of the given failure. + + Consider using L{FailureElement} instead. + + @type myFailure: L{Failure<twisted.python.failure.Failure>} + + @rtype: L{bytes} + @return: A string containing the HTML representation of the given failure. + """ + result = [] + flattenString(None, FailureElement(myFailure)).addBoth(result.append) + if isinstance(result[0], bytes): + # Ensure the result string is all ASCII, for compatibility with the + # default encoding expected by browsers. + return result[0].decode("utf-8").encode("ascii", "xmlcharrefreplace") + result[0].raiseException() + + +# Go read the definition of NOT_DONE_YET. For lulz. This is totally +# equivalent. And this turns out to be necessary, because trying to import +# NOT_DONE_YET in this module causes a circular import which we cannot escape +# from. From which we cannot escape. Etc. glyph is okay with this solution for +# now, and so am I, as long as this comment stays to explain to future +# maintainers what it means. ~ C. +# +# See http://twistedmatrix.com/trac/ticket/5557 for progress on fixing this. +NOT_DONE_YET = 1 +_moduleLog = Logger() + + +@implementer(ITemplateLoader) +class TagLoader: + """ + An L{ITemplateLoader} that loads an existing flattenable object. + """ + + def __init__(self, tag: "Flattenable"): + """ + @param tag: The object which will be loaded. + """ + + self.tag: "Flattenable" = tag + """The object which will be loaded.""" + + def load(self) -> List["Flattenable"]: + return [self.tag] + + +@implementer(ITemplateLoader) +class XMLFile: + """ + An L{ITemplateLoader} that loads and parses XML from a file. + """ + + def __init__(self, path: FilePath[Any]): + """ + Run the parser on a file. + + @param path: The file from which to load the XML. + """ + if not isinstance(path, FilePath): + warnings.warn( # type: ignore[unreachable] + "Passing filenames or file objects to XMLFile is deprecated " + "since Twisted 12.1. Pass a FilePath instead.", + category=DeprecationWarning, + stacklevel=2, + ) + + self._loadedTemplate: Optional[List["Flattenable"]] = None + """The loaded document, or L{None}, if not loaded.""" + + self._path: FilePath[Any] = path + """The file that is being loaded from.""" + + def _loadDoc(self) -> List["Flattenable"]: + """ + Read and parse the XML. + + @return: the loaded document. + """ + if not isinstance(self._path, FilePath): + return _flatsaxParse(self._path) # type: ignore[unreachable] + else: + with self._path.open("r") as f: + return _flatsaxParse(f) + + def __repr__(self) -> str: + return f"<XMLFile of {self._path!r}>" + + def load(self) -> List["Flattenable"]: + """ + Return the document, first loading it if necessary. + + @return: the loaded document. + """ + if self._loadedTemplate is None: + self._loadedTemplate = self._loadDoc() + return self._loadedTemplate + + +# Last updated October 2011, using W3Schools as a reference. Link: +# http://www.w3schools.com/html5/html5_reference.asp +# Note that <xmp> is explicitly omitted; its semantics do not work with +# t.w.template and it is officially deprecated. +VALID_HTML_TAG_NAMES = { + "a", + "abbr", + "acronym", + "address", + "applet", + "area", + "article", + "aside", + "audio", + "b", + "base", + "basefont", + "bdi", + "bdo", + "big", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "center", + "cite", + "code", + "col", + "colgroup", + "command", + "datalist", + "dd", + "del", + "details", + "dfn", + "dir", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "font", + "footer", + "form", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "isindex", + "keygen", + "kbd", + "label", + "legend", + "li", + "link", + "map", + "mark", + "menu", + "meta", + "meter", + "nav", + "noframes", + "noscript", + "object", + "ol", + "optgroup", + "option", + "output", + "p", + "param", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "section", + "select", + "small", + "source", + "span", + "strike", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "tt", + "u", + "ul", + "var", + "video", + "wbr", +} + + +class _TagFactory: + """ + A factory for L{Tag} objects; the implementation of the L{tags} object. + + This allows for the syntactic convenience of C{from twisted.web.template + import tags; tags.a(href="linked-page.html")}, where 'a' can be basically + any HTML tag. + + The class is not exposed publicly because you only ever need one of these, + and we already made it for you. + + @see: L{tags} + """ + + def __getattr__(self, tagName: str) -> Tag: + if tagName == "transparent": + return Tag("") + # allow for E.del as E.del_ + tagName = tagName.rstrip("_") + if tagName not in VALID_HTML_TAG_NAMES: + raise AttributeError(f"unknown tag {tagName!r}") + return Tag(tagName) + + +tags = _TagFactory() + + +def renderElement( + request: IRequest, + element: IRenderable, + doctype: Optional[bytes] = b"<!DOCTYPE html>", + _failElement: Optional[Callable[[Failure], "Element"]] = None, +) -> object: + """ + Render an element or other L{IRenderable}. + + @param request: The L{IRequest} being rendered to. + @param element: An L{IRenderable} which will be rendered. + @param doctype: A L{bytes} which will be written as the first line of + the request, or L{None} to disable writing of a doctype. The argument + should not include a trailing newline and will default to the HTML5 + doctype C{'<!DOCTYPE html>'}. + + @returns: NOT_DONE_YET + + @since: 12.1 + """ + if doctype is not None: + request.write(doctype) + request.write(b"\n") + + if _failElement is None: + _failElement = FailureElement + + d = flatten(request, element, request.write) + + def eb(failure: Failure) -> Optional[Deferred[None]]: + _moduleLog.failure( + "An error occurred while rendering the response.", failure=failure + ) + site = getattr(request, "site", None) + if site is not None and site.displayTracebacks: + assert _failElement is not None + return flatten(request, _failElement(failure), request.write) + else: + request.write( + b'<div style="font-size:800%;' + b"background-color:#FFF;" + b"color:#F00" + b'">An error occurred while rendering the response.</div>' + ) + return None + + def finish(result: object, *, request: IRequest = request) -> object: + request.finish() + return result + + d.addErrback(eb) + d.addBoth(finish) + return NOT_DONE_YET diff --git a/contrib/python/Twisted/py3/twisted/web/client.py b/contrib/python/Twisted/py3/twisted/web/client.py new file mode 100644 index 0000000000..9a0d5e9e10 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/client.py @@ -0,0 +1,1789 @@ +# -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP client. +""" + + +import collections +import os +import warnings +import zlib +from functools import wraps +from typing import Iterable +from urllib.parse import urldefrag, urljoin, urlunparse as _urlunparse + +from zope.interface import implementer + +from incremental import Version + +from twisted.internet import defer, protocol, task +from twisted.internet.abstract import isIPv6Address +from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS +from twisted.internet.interfaces import IOpenSSLContextFactory, IProtocol +from twisted.logger import Logger +from twisted.python.compat import nativeString, networkString +from twisted.python.components import proxyForInterface +from twisted.python.deprecate import ( + deprecatedModuleAttribute, + getDeprecationWarningString, +) +from twisted.python.failure import Failure +from twisted.web import error, http +from twisted.web._newclient import _ensureValidMethod, _ensureValidURI +from twisted.web.http_headers import Headers +from twisted.web.iweb import ( + UNKNOWN_LENGTH, + IAgent, + IAgentEndpointFactory, + IBodyProducer, + IPolicyForHTTPS, + IResponse, +) + + +def urlunparse(parts): + result = _urlunparse(tuple(p.decode("charmap") for p in parts)) + return result.encode("charmap") + + +class PartialDownloadError(error.Error): + """ + Page was only partially downloaded, we got disconnected in middle. + + @ivar response: All of the response body which was downloaded. + """ + + +class URI: + """ + A URI object. + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21} + """ + + def __init__(self, scheme, netloc, host, port, path, params, query, fragment): + """ + @type scheme: L{bytes} + @param scheme: URI scheme specifier. + + @type netloc: L{bytes} + @param netloc: Network location component. + + @type host: L{bytes} + @param host: Host name. For IPv6 address literals the brackets are + stripped. + + @type port: L{int} + @param port: Port number. + + @type path: L{bytes} + @param path: Hierarchical path. + + @type params: L{bytes} + @param params: Parameters for last path segment. + + @type query: L{bytes} + @param query: Query string. + + @type fragment: L{bytes} + @param fragment: Fragment identifier. + """ + self.scheme = scheme + self.netloc = netloc + self.host = host.strip(b"[]") + self.port = port + self.path = path + self.params = params + self.query = query + self.fragment = fragment + + @classmethod + def fromBytes(cls, uri, defaultPort=None): + """ + Parse the given URI into a L{URI}. + + @type uri: C{bytes} + @param uri: URI to parse. + + @type defaultPort: C{int} or L{None} + @param defaultPort: An alternate value to use as the port if the URI + does not include one. + + @rtype: L{URI} + @return: Parsed URI instance. + """ + uri = uri.strip() + scheme, netloc, path, params, query, fragment = http.urlparse(uri) + + if defaultPort is None: + if scheme == b"https": + defaultPort = 443 + else: + defaultPort = 80 + + if b":" in netloc: + host, port = netloc.rsplit(b":", 1) + try: + port = int(port) + except ValueError: + host, port = netloc, defaultPort + else: + host, port = netloc, defaultPort + return cls(scheme, netloc, host, port, path, params, query, fragment) + + def toBytes(self): + """ + Assemble the individual parts of the I{URI} into a fully formed I{URI}. + + @rtype: C{bytes} + @return: A fully formed I{URI}. + """ + return urlunparse( + ( + self.scheme, + self.netloc, + self.path, + self.params, + self.query, + self.fragment, + ) + ) + + @property + def originForm(self): + """ + The absolute I{URI} path including I{URI} parameters, query string and + fragment identifier. + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21#section-5.3} + + @return: The absolute path in original form. + @rtype: L{bytes} + """ + # The HTTP bis draft says the origin form should not include the + # fragment. + path = urlunparse((b"", b"", self.path, self.params, self.query, b"")) + if path == b"": + path = b"/" + return path + + +def _urljoin(base, url): + """ + Construct a full ("absolute") URL by combining a "base URL" with another + URL. Informally, this uses components of the base URL, in particular the + addressing scheme, the network location and (part of) the path, to provide + missing components in the relative URL. + + Additionally, the fragment identifier is preserved according to the HTTP + 1.1 bis draft. + + @type base: C{bytes} + @param base: Base URL. + + @type url: C{bytes} + @param url: URL to combine with C{base}. + + @return: An absolute URL resulting from the combination of C{base} and + C{url}. + + @see: L{urllib.parse.urljoin()} + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2} + """ + base, baseFrag = urldefrag(base) + url, urlFrag = urldefrag(urljoin(base, url)) + return urljoin(url, b"#" + (urlFrag or baseFrag)) + + +def _makeGetterFactory(url, factoryFactory, contextFactory=None, *args, **kwargs): + """ + Create and connect an HTTP page getting factory. + + Any additional positional or keyword arguments are used when calling + C{factoryFactory}. + + @param factoryFactory: Factory factory that is called with C{url}, C{args} + and C{kwargs} to produce the getter + + @param contextFactory: Context factory to use when creating a secure + connection, defaulting to L{None} + + @return: The factory created by C{factoryFactory} + """ + uri = URI.fromBytes(_ensureValidURI(url.strip())) + factory = factoryFactory(url, *args, **kwargs) + from twisted.internet import reactor + + if uri.scheme == b"https": + from twisted.internet import ssl + + if contextFactory is None: + contextFactory = ssl.ClientContextFactory() + reactor.connectSSL(nativeString(uri.host), uri.port, factory, contextFactory) + else: + reactor.connectTCP(nativeString(uri.host), uri.port, factory) + return factory + + +# The code which follows is based on the new HTTP client implementation. It +# should be significantly better than anything above, though it is not yet +# feature equivalent. + +from twisted.web._newclient import ( + HTTP11ClientProtocol, + PotentialDataLoss, + Request, + RequestGenerationFailed, + RequestNotSent, + RequestTransmissionFailed, + Response, + ResponseDone, + ResponseFailed, + ResponseNeverReceived, + _WrapperException, +) +from twisted.web.error import SchemeNotSupported + +try: + from OpenSSL import SSL +except ImportError: + SSL = None # type: ignore[assignment] +else: + from twisted.internet.ssl import ( + CertificateOptions, + optionsForClientTLS, + platformTrust, + ) + + +def _requireSSL(decoratee): + """ + The decorated method requires pyOpenSSL to be present, or it raises + L{NotImplementedError}. + + @param decoratee: A function which requires pyOpenSSL. + @type decoratee: L{callable} + + @return: A function which raises L{NotImplementedError} if pyOpenSSL is not + installed; otherwise, if it is installed, simply return C{decoratee}. + @rtype: L{callable} + """ + if SSL is None: + + @wraps(decoratee) + def raiseNotImplemented(*a, **kw): + """ + pyOpenSSL is not available. + + @param a: The positional arguments for C{decoratee}. + + @param kw: The keyword arguments for C{decoratee}. + + @raise NotImplementedError: Always. + """ + raise NotImplementedError("SSL support unavailable") + + return raiseNotImplemented + return decoratee + + +class WebClientContextFactory: + """ + This class is deprecated. Please simply use L{Agent} as-is, or if you want + to customize something, use L{BrowserLikePolicyForHTTPS}. + + A L{WebClientContextFactory} is an HTTPS policy which totally ignores the + hostname and port. It performs basic certificate verification, however the + lack of validation of service identity (e.g. hostname validation) means it + is still vulnerable to man-in-the-middle attacks. Don't use it any more. + """ + + def _getCertificateOptions(self, hostname, port): + """ + Return a L{CertificateOptions}. + + @param hostname: ignored + + @param port: ignored + + @return: A new CertificateOptions instance. + @rtype: L{CertificateOptions} + """ + return CertificateOptions(method=SSL.SSLv23_METHOD, trustRoot=platformTrust()) + + @_requireSSL + def getContext(self, hostname, port): + """ + Return an L{OpenSSL.SSL.Context}. + + @param hostname: ignored + @param port: ignored + + @return: A new SSL context. + @rtype: L{OpenSSL.SSL.Context} + """ + return self._getCertificateOptions(hostname, port).getContext() + + +@implementer(IPolicyForHTTPS) +class BrowserLikePolicyForHTTPS: + """ + SSL connection creator for web clients. + """ + + def __init__(self, trustRoot=None): + self._trustRoot = trustRoot + + @_requireSSL + def creatorForNetloc(self, hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a + given network location. + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: a connection creator with appropriate verification + restrictions set + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + return optionsForClientTLS(hostname.decode("ascii"), trustRoot=self._trustRoot) + + +deprecatedModuleAttribute( + Version("Twisted", 14, 0, 0), + getDeprecationWarningString( + WebClientContextFactory, + Version("Twisted", 14, 0, 0), + replacement=BrowserLikePolicyForHTTPS, + ).split("; ")[1], + WebClientContextFactory.__module__, + WebClientContextFactory.__name__, +) + + +@implementer(IPolicyForHTTPS) +class HostnameCachingHTTPSPolicy: + """ + IPolicyForHTTPS that wraps a L{IPolicyForHTTPS} and caches the created + L{IOpenSSLClientConnectionCreator}. + + This policy will cache up to C{cacheSize} + L{client connection creators <twisted.internet.interfaces. + IOpenSSLClientConnectionCreator>} for reuse in subsequent requests to + the same hostname. + + @ivar _policyForHTTPS: See C{policyforHTTPS} parameter of L{__init__}. + + @ivar _cache: A cache associating hostnames to their + L{client connection creators <twisted.internet.interfaces. + IOpenSSLClientConnectionCreator>}. + @type _cache: L{collections.OrderedDict} + + @ivar _cacheSize: See C{cacheSize} parameter of L{__init__}. + + @since: Twisted 19.2.0 + """ + + def __init__(self, policyforHTTPS, cacheSize=20): + """ + @param policyforHTTPS: The IPolicyForHTTPS to wrap. + @type policyforHTTPS: L{IPolicyForHTTPS} + + @param cacheSize: The maximum size of the hostname cache. + @type cacheSize: L{int} + """ + self._policyForHTTPS = policyforHTTPS + self._cache = collections.OrderedDict() + self._cacheSize = cacheSize + + def creatorForNetloc(self, hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a + given network location and cache it for future use. + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: a connection creator with appropriate verification + restrictions set + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + host = hostname.decode("ascii") + try: + creator = self._cache.pop(host) + except KeyError: + creator = self._policyForHTTPS.creatorForNetloc(hostname, port) + + self._cache[host] = creator + if len(self._cache) > self._cacheSize: + self._cache.popitem(last=False) + + return creator + + +@implementer(IOpenSSLContextFactory) +class _ContextFactoryWithContext: + """ + A L{_ContextFactoryWithContext} is like a + L{twisted.internet.ssl.ContextFactory} with a pre-created context. + + @ivar _context: A Context. + @type _context: L{OpenSSL.SSL.Context} + """ + + def __init__(self, context): + """ + Initialize a L{_ContextFactoryWithContext} with a context. + + @param context: An SSL context. + @type context: L{OpenSSL.SSL.Context} + """ + self._context = context + + def getContext(self): + """ + Return the context created by + L{_DeprecatedToCurrentPolicyForHTTPS._webContextFactory}. + + @return: A context. + @rtype: L{OpenSSL.SSL.Context} + """ + return self._context + + +@implementer(IPolicyForHTTPS) +class _DeprecatedToCurrentPolicyForHTTPS: + """ + Adapt a web context factory to a normal context factory. + + @ivar _webContextFactory: An object providing a getContext method with + C{hostname} and C{port} arguments. + @type _webContextFactory: L{WebClientContextFactory} (or object with a + similar C{getContext} method). + """ + + def __init__(self, webContextFactory): + """ + Wrap a web context factory in an L{IPolicyForHTTPS}. + + @param webContextFactory: An object providing a getContext method with + C{hostname} and C{port} arguments. + @type webContextFactory: L{WebClientContextFactory} (or object with a + similar C{getContext} method). + """ + self._webContextFactory = webContextFactory + + def creatorForNetloc(self, hostname, port): + """ + Called the wrapped web context factory's C{getContext} method with a + hostname and port number and return the resulting context object. + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: A context factory. + @rtype: L{IOpenSSLContextFactory} + """ + context = self._webContextFactory.getContext(hostname, port) + return _ContextFactoryWithContext(context) + + +@implementer(IBodyProducer) +class FileBodyProducer: + """ + L{FileBodyProducer} produces bytes from an input file object incrementally + and writes them to a consumer. + + Since file-like objects cannot be read from in an event-driven manner, + L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from + the file. This process is also paused and resumed based on notifications + from the L{IConsumer} provider being written to. + + The file is closed after it has been read, or if the producer is stopped + early. + + @ivar _inputFile: Any file-like object, bytes read from which will be + written to a consumer. + + @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to + schedule all reads. + + @ivar _readSize: The number of bytes to read from C{_inputFile} at a time. + """ + + def __init__(self, inputFile, cooperator=task, readSize=2**16): + self._inputFile = inputFile + self._cooperate = cooperator.cooperate + self._readSize = readSize + self.length = self._determineLength(inputFile) + + def _determineLength(self, fObj): + """ + Determine how many bytes can be read out of C{fObj} (assuming it is not + modified from this point on). If the determination cannot be made, + return C{UNKNOWN_LENGTH}. + """ + try: + seek = fObj.seek + tell = fObj.tell + except AttributeError: + return UNKNOWN_LENGTH + originalPosition = tell() + seek(0, os.SEEK_END) + end = tell() + seek(originalPosition, os.SEEK_SET) + return end - originalPosition + + def stopProducing(self): + """ + Permanently stop writing bytes from the file to the consumer by + stopping the underlying L{CooperativeTask}. + """ + self._inputFile.close() + try: + self._task.stop() + except task.TaskFinished: + pass + + def startProducing(self, consumer): + """ + Start a cooperative task which will read bytes from the input file and + write them to C{consumer}. Return a L{Deferred} which fires after all + bytes have been written. If this L{Deferred} is cancelled before it is + fired, stop reading and writing bytes. + + @param consumer: Any L{IConsumer} provider + """ + self._task = self._cooperate(self._writeloop(consumer)) + d = self._task.whenDone() + + def maybeStopped(reason): + if reason.check(defer.CancelledError): + self.stopProducing() + elif reason.check(task.TaskStopped): + pass + else: + return reason + # IBodyProducer.startProducing's Deferred isn't supposed to fire if + # stopProducing is called. + return defer.Deferred() + + d.addCallbacks(lambda ignored: None, maybeStopped) + return d + + def _writeloop(self, consumer): + """ + Return an iterator which reads one chunk of bytes from the input file + and writes them to the consumer for each time it is iterated. + """ + while True: + bytes = self._inputFile.read(self._readSize) + if not bytes: + self._inputFile.close() + break + consumer.write(bytes) + yield None + + def pauseProducing(self): + """ + Temporarily suspend copying bytes from the input file to the consumer + by pausing the L{CooperativeTask} which drives that activity. + """ + self._task.pause() + + def resumeProducing(self): + """ + Undo the effects of a previous C{pauseProducing} and resume copying + bytes to the consumer by resuming the L{CooperativeTask} which drives + the write activity. + """ + self._task.resume() + + +class _HTTP11ClientFactory(protocol.Factory): + """ + A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}. + + @ivar _quiescentCallback: The quiescent callback to be passed to protocol + instances, used to return them to the connection pool. + + @ivar _metadata: Metadata about the low-level connection details, + used to make the repr more useful. + + @since: 11.1 + """ + + def __init__(self, quiescentCallback, metadata): + self._quiescentCallback = quiescentCallback + self._metadata = metadata + + def __repr__(self) -> str: + return "_HTTP11ClientFactory({}, {})".format( + self._quiescentCallback, self._metadata + ) + + def buildProtocol(self, addr): + return HTTP11ClientProtocol(self._quiescentCallback) + + +class _RetryingHTTP11ClientProtocol: + """ + A wrapper for L{HTTP11ClientProtocol} that automatically retries requests. + + @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}. + + @ivar _newConnection: A callable that creates a new connection for a + retry. + """ + + def __init__(self, clientProtocol, newConnection): + self._clientProtocol = clientProtocol + self._newConnection = newConnection + + def _shouldRetry(self, method, exception, bodyProducer): + """ + Indicate whether request should be retried. + + Only returns C{True} if method is idempotent, no response was + received, the reason for the failed request was not due to + user-requested cancellation, and no body was sent. The latter + requirement may be relaxed in the future, and PUT added to approved + method list. + + @param method: The method of the request. + @type method: L{bytes} + """ + if method not in (b"GET", b"HEAD", b"OPTIONS", b"DELETE", b"TRACE"): + return False + if not isinstance( + exception, + (RequestNotSent, RequestTransmissionFailed, ResponseNeverReceived), + ): + return False + if isinstance(exception, _WrapperException): + for aFailure in exception.reasons: + if aFailure.check(defer.CancelledError): + return False + if bodyProducer is not None: + return False + return True + + def request(self, request): + """ + Do a request, and retry once (with a new connection) if it fails in + a retryable manner. + + @param request: A L{Request} instance that will be requested using the + wrapped protocol. + """ + d = self._clientProtocol.request(request) + + def failed(reason): + if self._shouldRetry(request.method, reason.value, request.bodyProducer): + return self._newConnection().addCallback( + lambda connection: connection.request(request) + ) + else: + return reason + + d.addErrback(failed) + return d + + +class HTTPConnectionPool: + """ + A pool of persistent HTTP connections. + + Features: + - Cached connections will eventually time out. + - Limits on maximum number of persistent connections. + + Connections are stored using keys, which should be chosen such that any + connections stored under a given key can be used interchangeably. + + Failed requests done using previously cached connections will be retried + once if they use an idempotent method (e.g. GET), in case the HTTP server + timed them out. + + @ivar persistent: Boolean indicating whether connections should be + persistent. Connections are persistent by default. + + @ivar maxPersistentPerHost: The maximum number of cached persistent + connections for a C{host:port} destination. + @type maxPersistentPerHost: C{int} + + @ivar cachedConnectionTimeout: Number of seconds a cached persistent + connection will stay open before disconnecting. + + @ivar retryAutomatically: C{boolean} indicating whether idempotent + requests should be retried once if no response was received. + + @ivar _factory: The factory used to connect to the proxy. + + @ivar _connections: Map (scheme, host, port) to lists of + L{HTTP11ClientProtocol} instances. + + @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a + C{IDelayedCall} instance of their timeout. + + @since: 12.1 + """ + + _factory = _HTTP11ClientFactory + maxPersistentPerHost = 2 + cachedConnectionTimeout = 240 + retryAutomatically = True + _log = Logger() + + def __init__(self, reactor, persistent=True): + self._reactor = reactor + self.persistent = persistent + self._connections = {} + self._timeouts = {} + + def getConnection(self, key, endpoint): + """ + Supply a connection, newly created or retrieved from the pool, to be + used for one HTTP request. + + The connection will remain out of the pool (not available to be + returned from future calls to this method) until one HTTP request has + been completed over it. + + Afterwards, if the connection is still open, it will automatically be + added to the pool. + + @param key: A unique key identifying connections that can be used + interchangeably. + + @param endpoint: An endpoint that can be used to open a new connection + if no cached connection is available. + + @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol} + (or a wrapper) that can be used to send a single HTTP request. + """ + # Try to get cached version: + connections = self._connections.get(key) + while connections: + connection = connections.pop(0) + # Cancel timeout: + self._timeouts[connection].cancel() + del self._timeouts[connection] + if connection.state == "QUIESCENT": + if self.retryAutomatically: + newConnection = lambda: self._newConnection(key, endpoint) + connection = _RetryingHTTP11ClientProtocol( + connection, newConnection + ) + return defer.succeed(connection) + + return self._newConnection(key, endpoint) + + def _newConnection(self, key, endpoint): + """ + Create a new connection. + + This implements the new connection code path for L{getConnection}. + """ + + def quiescentCallback(protocol): + self._putConnection(key, protocol) + + factory = self._factory(quiescentCallback, repr(endpoint)) + return endpoint.connect(factory) + + def _removeConnection(self, key, connection): + """ + Remove a connection from the cache and disconnect it. + """ + connection.transport.loseConnection() + self._connections[key].remove(connection) + del self._timeouts[connection] + + def _putConnection(self, key, connection): + """ + Return a persistent connection to the pool. This will be called by + L{HTTP11ClientProtocol} when the connection becomes quiescent. + """ + if connection.state != "QUIESCENT": + # Log with traceback for debugging purposes: + try: + raise RuntimeError( + "BUG: Non-quiescent protocol added to connection pool." + ) + except BaseException: + self._log.failure( + "BUG: Non-quiescent protocol added to connection pool." + ) + return + connections = self._connections.setdefault(key, []) + if len(connections) == self.maxPersistentPerHost: + dropped = connections.pop(0) + dropped.transport.loseConnection() + self._timeouts[dropped].cancel() + del self._timeouts[dropped] + connections.append(connection) + cid = self._reactor.callLater( + self.cachedConnectionTimeout, self._removeConnection, key, connection + ) + self._timeouts[connection] = cid + + def closeCachedConnections(self): + """ + Close all persistent connections and remove them from the pool. + + @return: L{defer.Deferred} that fires when all connections have been + closed. + """ + results = [] + for protocols in self._connections.values(): + for p in protocols: + results.append(p.abort()) + self._connections = {} + for dc in self._timeouts.values(): + dc.cancel() + self._timeouts = {} + return defer.gatherResults(results).addCallback(lambda ign: None) + + +class _AgentBase: + """ + Base class offering common facilities for L{Agent}-type classes. + + @ivar _reactor: The C{IReactorTime} implementation which will be used by + the pool, and perhaps by subclasses as well. + + @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections. + """ + + def __init__(self, reactor, pool): + if pool is None: + pool = HTTPConnectionPool(reactor, False) + self._reactor = reactor + self._pool = pool + + def _computeHostValue(self, scheme, host, port): + """ + Compute the string to use for the value of the I{Host} header, based on + the given scheme, host name, and port number. + """ + if isIPv6Address(nativeString(host)): + host = b"[" + host + b"]" + if (scheme, port) in ((b"http", 80), (b"https", 443)): + return host + return b"%b:%d" % (host, port) + + def _requestWithEndpoint( + self, key, endpoint, method, parsedURI, headers, bodyProducer, requestPath + ): + """ + Issue a new request, given the endpoint and the path sent as part of + the request. + """ + if not isinstance(method, bytes): + raise TypeError(f"method={method!r} is {type(method)}, but must be bytes") + + method = _ensureValidMethod(method) + + # Create minimal headers, if necessary: + if headers is None: + headers = Headers() + if not headers.hasHeader(b"host"): + headers = headers.copy() + headers.addRawHeader( + b"host", + self._computeHostValue( + parsedURI.scheme, parsedURI.host, parsedURI.port + ), + ) + + d = self._pool.getConnection(key, endpoint) + + def cbConnected(proto): + return proto.request( + Request._construct( + method, + requestPath, + headers, + bodyProducer, + persistent=self._pool.persistent, + parsedURI=parsedURI, + ) + ) + + d.addCallback(cbConnected) + return d + + +@implementer(IAgentEndpointFactory) +class _StandardEndpointFactory: + """ + Standard HTTP endpoint destinations - TCP for HTTP, TCP+TLS for HTTPS. + + @ivar _policyForHTTPS: A web context factory which will be used to create + SSL context objects for any SSL connections the agent needs to make. + + @ivar _connectTimeout: If not L{None}, the timeout passed to + L{HostnameEndpoint} for specifying the connection timeout. + + @ivar _bindAddress: If not L{None}, the address passed to + L{HostnameEndpoint} for specifying the local address to bind to. + """ + + def __init__(self, reactor, contextFactory, connectTimeout, bindAddress): + """ + @param reactor: A provider to use to create endpoints. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param contextFactory: A factory for TLS contexts, to control the + verification parameters of OpenSSL. + @type contextFactory: L{IPolicyForHTTPS}. + + @param connectTimeout: The amount of time that this L{Agent} will wait + for the peer to accept a connection. + @type connectTimeout: L{float} or L{None} + + @param bindAddress: The local address for client sockets to bind to. + @type bindAddress: L{bytes} or L{None} + """ + self._reactor = reactor + self._policyForHTTPS = contextFactory + self._connectTimeout = connectTimeout + self._bindAddress = bindAddress + + def endpointForURI(self, uri): + """ + Connect directly over TCP for C{b'http'} scheme, and TLS for + C{b'https'}. + + @param uri: L{URI} to connect to. + + @return: Endpoint to connect to. + @rtype: L{IStreamClientEndpoint} + """ + kwargs = {} + if self._connectTimeout is not None: + kwargs["timeout"] = self._connectTimeout + kwargs["bindAddress"] = self._bindAddress + + try: + host = nativeString(uri.host) + except UnicodeDecodeError: + raise ValueError( + ( + "The host of the provided URI ({uri.host!r}) " + "contains non-ASCII octets, it should be ASCII " + "decodable." + ).format(uri=uri) + ) + + endpoint = HostnameEndpoint(self._reactor, host, uri.port, **kwargs) + if uri.scheme == b"http": + return endpoint + elif uri.scheme == b"https": + connectionCreator = self._policyForHTTPS.creatorForNetloc( + uri.host, uri.port + ) + return wrapClientTLS(connectionCreator, endpoint) + else: + raise SchemeNotSupported(f"Unsupported scheme: {uri.scheme!r}") + + +@implementer(IAgent) +class Agent(_AgentBase): + """ + L{Agent} is a very basic HTTP client. It supports I{HTTP} and I{HTTPS} + scheme URIs. + + @ivar _pool: An L{HTTPConnectionPool} instance. + + @ivar _endpointFactory: The L{IAgentEndpointFactory} which will + be used to create endpoints for outgoing connections. + + @since: 9.0 + """ + + def __init__( + self, + reactor, + contextFactory=BrowserLikePolicyForHTTPS(), + connectTimeout=None, + bindAddress=None, + pool=None, + ): + """ + Create an L{Agent}. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param contextFactory: A factory for TLS contexts, to control the + verification parameters of OpenSSL. The default is to use a + L{BrowserLikePolicyForHTTPS}, so unless you have special + requirements you can leave this as-is. + @type contextFactory: L{IPolicyForHTTPS}. + + @param connectTimeout: The amount of time that this L{Agent} will wait + for the peer to accept a connection. + @type connectTimeout: L{float} + + @param bindAddress: The local address for client sockets to bind to. + @type bindAddress: L{bytes} + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + """ + if not IPolicyForHTTPS.providedBy(contextFactory): + warnings.warn( + repr(contextFactory) + + " was passed as the HTTPS policy for an Agent, but it does " + "not provide IPolicyForHTTPS. Since Twisted 14.0, you must " + "pass a provider of IPolicyForHTTPS.", + stacklevel=2, + category=DeprecationWarning, + ) + contextFactory = _DeprecatedToCurrentPolicyForHTTPS(contextFactory) + endpointFactory = _StandardEndpointFactory( + reactor, contextFactory, connectTimeout, bindAddress + ) + self._init(reactor, endpointFactory, pool) + + @classmethod + def usingEndpointFactory(cls, reactor, endpointFactory, pool=None): + """ + Create a new L{Agent} that will use the endpoint factory to figure + out how to connect to the server. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param endpointFactory: Used to construct endpoints which the + HTTP client will connect with. + @type endpointFactory: an L{IAgentEndpointFactory} provider. + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + + @return: A new L{Agent}. + """ + agent = cls.__new__(cls) + agent._init(reactor, endpointFactory, pool) + return agent + + def _init(self, reactor, endpointFactory, pool): + """ + Initialize a new L{Agent}. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param endpointFactory: Used to construct endpoints which the + HTTP client will connect with. + @type endpointFactory: an L{IAgentEndpointFactory} provider. + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + + @return: A new L{Agent}. + """ + _AgentBase.__init__(self, reactor, pool) + self._endpointFactory = endpointFactory + + def _getEndpoint(self, uri): + """ + Get an endpoint for the given URI, using C{self._endpointFactory}. + + @param uri: The URI of the request. + @type uri: L{URI} + + @return: An endpoint which can be used to connect to given address. + """ + return self._endpointFactory.endpointForURI(uri) + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a request to the server indicated by the given C{uri}. + + An existing connection from the connection pool may be used or a new + one may be created. + + I{HTTP} and I{HTTPS} schemes are supported in C{uri}. + + @see: L{twisted.web.iweb.IAgent.request} + """ + uri = _ensureValidURI(uri.strip()) + parsedURI = URI.fromBytes(uri) + try: + endpoint = self._getEndpoint(parsedURI) + except SchemeNotSupported: + return defer.fail(Failure()) + key = (parsedURI.scheme, parsedURI.host, parsedURI.port) + return self._requestWithEndpoint( + key, + endpoint, + method, + parsedURI, + headers, + bodyProducer, + parsedURI.originForm, + ) + + +@implementer(IAgent) +class ProxyAgent(_AgentBase): + """ + An HTTP agent able to cross HTTP proxies. + + @ivar _proxyEndpoint: The endpoint used to connect to the proxy. + + @since: 11.1 + """ + + def __init__(self, endpoint, reactor=None, pool=None): + if reactor is None: + from twisted.internet import reactor + _AgentBase.__init__(self, reactor, pool) + self._proxyEndpoint = endpoint + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a new request via the configured proxy. + """ + uri = _ensureValidURI(uri.strip()) + + # Cache *all* connections under the same key, since we are only + # connecting to a single destination, the proxy: + key = ("http-proxy", self._proxyEndpoint) + + # To support proxying HTTPS via CONNECT, we will use key + # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that + # wraps _proxyEndpoint with an additional callback to do the CONNECT. + return self._requestWithEndpoint( + key, + self._proxyEndpoint, + method, + URI.fromBytes(uri), + headers, + bodyProducer, + uri, + ) + + +class _FakeUrllib2Request: + """ + A fake C{urllib2.Request} object for C{cookielib} to work with. + + @see: U{http://docs.python.org/library/urllib2.html#request-objects} + + @type uri: native L{str} + @ivar uri: Request URI. + + @type headers: L{twisted.web.http_headers.Headers} + @ivar headers: Request headers. + + @type type: native L{str} + @ivar type: The scheme of the URI. + + @type host: native L{str} + @ivar host: The host[:port] of the URI. + + @since: 11.1 + """ + + def __init__(self, uri): + """ + Create a fake Urllib2 request. + + @param uri: Request URI. + @type uri: L{bytes} + """ + self.uri = nativeString(uri) + self.headers = Headers() + + _uri = URI.fromBytes(uri) + self.type = nativeString(_uri.scheme) + self.host = nativeString(_uri.host) + + if (_uri.scheme, _uri.port) not in ((b"http", 80), (b"https", 443)): + # If it's not a schema on the regular port, add the port. + self.host += ":" + str(_uri.port) + + self.origin_req_host = nativeString(_uri.host) + self.unverifiable = lambda _: False + + def has_header(self, header): + return self.headers.hasHeader(networkString(header)) + + def add_unredirected_header(self, name, value): + self.headers.addRawHeader(networkString(name), networkString(value)) + + def get_full_url(self): + return self.uri + + def get_header(self, name, default=None): + headers = self.headers.getRawHeaders(networkString(name), default) + if headers is not None: + headers = [nativeString(x) for x in headers] + return headers[0] + return None + + def get_host(self): + return self.host + + def get_type(self): + return self.type + + def is_unverifiable(self): + # In theory this shouldn't be hardcoded. + return False + + +class _FakeUrllib2Response: + """ + A fake C{urllib2.Response} object for C{cookielib} to work with. + + @type response: C{twisted.web.iweb.IResponse} + @ivar response: Underlying Twisted Web response. + + @since: 11.1 + """ + + def __init__(self, response): + self.response = response + + def info(self): + class _Meta: + def getheaders(zelf, name): + # PY2 + headers = self.response.headers.getRawHeaders(name, []) + return headers + + def get_all(zelf, name, default): + # PY3 + headers = self.response.headers.getRawHeaders( + networkString(name), default + ) + h = [nativeString(x) for x in headers] + return h + + return _Meta() + + +@implementer(IAgent) +class CookieAgent: + """ + L{CookieAgent} extends the basic L{Agent} to add RFC-compliant + handling of HTTP cookies. Cookies are written to and extracted + from a C{cookielib.CookieJar} instance. + + The same cookie jar instance will be used for any requests through this + agent, mutating it whenever a I{Set-Cookie} header appears in a response. + + @type _agent: L{twisted.web.client.Agent} + @ivar _agent: Underlying Twisted Web agent to issue requests through. + + @type cookieJar: C{cookielib.CookieJar} + @ivar cookieJar: Initialized cookie jar to read cookies from and store + cookies to. + + @since: 11.1 + """ + + def __init__(self, agent, cookieJar): + self._agent = agent + self.cookieJar = cookieJar + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a new request to the wrapped L{Agent}. + + Send a I{Cookie} header if a cookie for C{uri} is stored in + L{CookieAgent.cookieJar}. Cookies are automatically extracted and + stored from requests. + + If a C{'cookie'} header appears in C{headers} it will override the + automatic cookie header obtained from the cookie jar. + + @see: L{Agent.request} + """ + if headers is None: + headers = Headers() + lastRequest = _FakeUrllib2Request(uri) + # Setting a cookie header explicitly will disable automatic request + # cookies. + if not headers.hasHeader(b"cookie"): + self.cookieJar.add_cookie_header(lastRequest) + cookieHeader = lastRequest.get_header("Cookie", None) + if cookieHeader is not None: + headers = headers.copy() + headers.addRawHeader(b"cookie", networkString(cookieHeader)) + + d = self._agent.request(method, uri, headers, bodyProducer) + d.addCallback(self._extractCookies, lastRequest) + return d + + def _extractCookies(self, response, request): + """ + Extract response cookies and store them in the cookie jar. + + @type response: L{twisted.web.iweb.IResponse} + @param response: Twisted Web response. + + @param request: A urllib2 compatible request object. + """ + resp = _FakeUrllib2Response(response) + self.cookieJar.extract_cookies(resp, request) + return response + + +class GzipDecoder(proxyForInterface(IResponse)): # type: ignore[misc] + """ + A wrapper for a L{Response} instance which handles gzip'ed body. + + @ivar original: The original L{Response} object. + + @since: 11.1 + """ + + def __init__(self, response): + self.original = response + self.length = UNKNOWN_LENGTH + + def deliverBody(self, protocol): + """ + Override C{deliverBody} to wrap the given C{protocol} with + L{_GzipProtocol}. + """ + self.original.deliverBody(_GzipProtocol(protocol, self.original)) + + +class _GzipProtocol(proxyForInterface(IProtocol)): # type: ignore[misc] + """ + A L{Protocol} implementation which wraps another one, transparently + decompressing received data. + + @ivar _zlibDecompress: A zlib decompress object used to decompress the data + stream. + + @ivar _response: A reference to the original response, in case of errors. + + @since: 11.1 + """ + + def __init__(self, protocol, response): + self.original = protocol + self._response = response + self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def dataReceived(self, data): + """ + Decompress C{data} with the zlib decompressor, forwarding the raw data + to the original protocol. + """ + try: + rawData = self._zlibDecompress.decompress(data) + except zlib.error: + raise ResponseFailed([Failure()], self._response) + if rawData: + self.original.dataReceived(rawData) + + def connectionLost(self, reason): + """ + Forward the connection lost event, flushing remaining data from the + decompressor if any. + """ + try: + rawData = self._zlibDecompress.flush() + except zlib.error: + raise ResponseFailed([reason, Failure()], self._response) + if rawData: + self.original.dataReceived(rawData) + self.original.connectionLost(reason) + + +@implementer(IAgent) +class ContentDecoderAgent: + """ + An L{Agent} wrapper to handle encoded content. + + It takes care of declaring the support for content in the + I{Accept-Encoding} header and automatically decompresses the received data + if the I{Content-Encoding} header indicates a supported encoding. + + For example:: + + agent = ContentDecoderAgent(Agent(reactor), + [(b'gzip', GzipDecoder)]) + + @param agent: The agent to wrap + @type agent: L{IAgent} + + @param decoders: A sequence of (name, decoder) objects. The name + declares which encoding the decoder supports. The decoder must accept + an L{IResponse} and return an L{IResponse} when called. The order + determines how the decoders are advertised to the server. Names must + be unique.not be duplicated. + @type decoders: sequence of (L{bytes}, L{callable}) tuples + + @since: 11.1 + + @see: L{GzipDecoder} + """ + + def __init__(self, agent, decoders): + self._agent = agent + self._decoders = dict(decoders) + self._supported = b",".join([decoder[0] for decoder in decoders]) + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Send a client request which declares supporting compressed content. + + @see: L{Agent.request}. + """ + if headers is None: + headers = Headers() + else: + headers = headers.copy() + headers.addRawHeader(b"accept-encoding", self._supported) + deferred = self._agent.request(method, uri, headers, bodyProducer) + return deferred.addCallback(self._handleResponse) + + def _handleResponse(self, response): + """ + Check if the response is encoded, and wrap it to handle decompression. + """ + contentEncodingHeaders = response.headers.getRawHeaders(b"content-encoding", []) + contentEncodingHeaders = b",".join(contentEncodingHeaders).split(b",") + while contentEncodingHeaders: + name = contentEncodingHeaders.pop().strip() + decoder = self._decoders.get(name) + if decoder is not None: + response = decoder(response) + else: + # Add it back + contentEncodingHeaders.append(name) + break + if contentEncodingHeaders: + response.headers.setRawHeaders( + b"content-encoding", [b",".join(contentEncodingHeaders)] + ) + else: + response.headers.removeHeader(b"content-encoding") + return response + + +_canonicalHeaderName = Headers()._canonicalNameCaps +_defaultSensitiveHeaders = frozenset( + [ + b"Authorization", + b"Cookie", + b"Cookie2", + b"Proxy-Authorization", + b"WWW-Authenticate", + ] +) + + +@implementer(IAgent) +class RedirectAgent: + """ + An L{Agent} wrapper which handles HTTP redirects. + + The implementation is rather strict: 301 and 302 behaves like 307, not + redirecting automatically on methods different from I{GET} and I{HEAD}. + + See L{BrowserLikeRedirectAgent} for a redirecting Agent that behaves more + like a web browser. + + @param redirectLimit: The maximum number of times the agent is allowed to + follow redirects before failing with a L{error.InfiniteRedirection}. + + @param sensitiveHeaderNames: An iterable of C{bytes} enumerating the names + of headers that must not be transmitted when redirecting to a different + origins. These will be consulted in addition to the protocol-specified + set of headers that contain sensitive information. + + @cvar _redirectResponses: A L{list} of HTTP status codes to be redirected + for I{GET} and I{HEAD} methods. + + @cvar _seeOtherResponses: A L{list} of HTTP status codes to be redirected + for any method and the method altered to I{GET}. + + @since: 11.1 + """ + + _redirectResponses = [ + http.MOVED_PERMANENTLY, + http.FOUND, + http.TEMPORARY_REDIRECT, + http.PERMANENT_REDIRECT, + ] + _seeOtherResponses = [http.SEE_OTHER] + + def __init__( + self, + agent: IAgent, + redirectLimit: int = 20, + sensitiveHeaderNames: Iterable[bytes] = (), + ): + self._agent = agent + self._redirectLimit = redirectLimit + sensitive = {_canonicalHeaderName(each) for each in sensitiveHeaderNames} + sensitive.update(_defaultSensitiveHeaders) + self._sensitiveHeaderNames = sensitive + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Send a client request following HTTP redirects. + + @see: L{Agent.request}. + """ + deferred = self._agent.request(method, uri, headers, bodyProducer) + return deferred.addCallback(self._handleResponse, method, uri, headers, 0) + + def _resolveLocation(self, requestURI, location): + """ + Resolve the redirect location against the request I{URI}. + + @type requestURI: C{bytes} + @param requestURI: The request I{URI}. + + @type location: C{bytes} + @param location: The redirect location. + + @rtype: C{bytes} + @return: Final resolved I{URI}. + """ + return _urljoin(requestURI, location) + + def _handleRedirect(self, response, method, uri, headers, redirectCount): + """ + Handle a redirect response, checking the number of redirects already + followed, and extracting the location header fields. + """ + if redirectCount >= self._redirectLimit: + err = error.InfiniteRedirection( + response.code, b"Infinite redirection detected", location=uri + ) + raise ResponseFailed([Failure(err)], response) + locationHeaders = response.headers.getRawHeaders(b"location", []) + if not locationHeaders: + err = error.RedirectWithNoLocation( + response.code, b"No location header field", uri + ) + raise ResponseFailed([Failure(err)], response) + location = self._resolveLocation(uri, locationHeaders[0]) + if headers: + parsedURI = URI.fromBytes(uri) + parsedLocation = URI.fromBytes(location) + sameOrigin = ( + (parsedURI.scheme == parsedLocation.scheme) + and (parsedURI.host == parsedLocation.host) + and (parsedURI.port == parsedLocation.port) + ) + if not sameOrigin: + headers = Headers( + { + rawName: rawValue + for rawName, rawValue in headers.getAllRawHeaders() + if rawName not in self._sensitiveHeaderNames + } + ) + deferred = self._agent.request(method, location, headers) + + def _chainResponse(newResponse): + newResponse.setPreviousResponse(response) + return newResponse + + deferred.addCallback(_chainResponse) + return deferred.addCallback( + self._handleResponse, method, uri, headers, redirectCount + 1 + ) + + def _handleResponse(self, response, method, uri, headers, redirectCount): + """ + Handle the response, making another request if it indicates a redirect. + """ + if response.code in self._redirectResponses: + if method not in (b"GET", b"HEAD"): + err = error.PageRedirect(response.code, location=uri) + raise ResponseFailed([Failure(err)], response) + return self._handleRedirect(response, method, uri, headers, redirectCount) + elif response.code in self._seeOtherResponses: + return self._handleRedirect(response, b"GET", uri, headers, redirectCount) + return response + + +class BrowserLikeRedirectAgent(RedirectAgent): + """ + An L{Agent} wrapper which handles HTTP redirects in the same fashion as web + browsers. + + Unlike L{RedirectAgent}, the implementation is more relaxed: 301 and 302 + behave like 303, redirecting automatically on any method and altering the + redirect request to a I{GET}. + + @see: L{RedirectAgent} + + @since: 13.1 + """ + + _redirectResponses = [http.TEMPORARY_REDIRECT] + _seeOtherResponses = [ + http.MOVED_PERMANENTLY, + http.FOUND, + http.SEE_OTHER, + http.PERMANENT_REDIRECT, + ] + + +class _ReadBodyProtocol(protocol.Protocol): + """ + Protocol that collects data sent to it. + + This is a helper for L{IResponse.deliverBody}, which collects the body and + fires a deferred with it. + + @ivar deferred: See L{__init__}. + @ivar status: See L{__init__}. + @ivar message: See L{__init__}. + + @ivar dataBuffer: list of byte-strings received + @type dataBuffer: L{list} of L{bytes} + """ + + def __init__(self, status, message, deferred): + """ + @param status: Status of L{IResponse} + @ivar status: L{int} + + @param message: Message of L{IResponse} + @type message: L{bytes} + + @param deferred: deferred to fire when response is complete + @type deferred: L{Deferred} firing with L{bytes} + """ + self.deferred = deferred + self.status = status + self.message = message + self.dataBuffer = [] + + def dataReceived(self, data): + """ + Accumulate some more bytes from the response. + """ + self.dataBuffer.append(data) + + def connectionLost(self, reason): + """ + Deliver the accumulated response bytes to the waiting L{Deferred}, if + the response body has been completely received without error. + """ + if reason.check(ResponseDone): + self.deferred.callback(b"".join(self.dataBuffer)) + elif reason.check(PotentialDataLoss): + self.deferred.errback( + PartialDownloadError( + self.status, self.message, b"".join(self.dataBuffer) + ) + ) + else: + self.deferred.errback(reason) + + +def readBody(response: IResponse) -> defer.Deferred[bytes]: + """ + Get the body of an L{IResponse} and return it as a byte string. + + This is a helper function for clients that don't want to incrementally + receive the body of an HTTP response. + + @param response: The HTTP response for which the body will be read. + @type response: L{IResponse} provider + + @return: A L{Deferred} which will fire with the body of the response. + Cancelling it will close the connection to the server immediately. + """ + + def cancel(deferred: defer.Deferred[bytes]) -> None: + """ + Cancel a L{readBody} call, close the connection to the HTTP server + immediately, if it is still open. + + @param deferred: The cancelled L{defer.Deferred}. + """ + abort = getAbort() + if abort is not None: + abort() + + d: defer.Deferred[bytes] = defer.Deferred(cancel) + protocol = _ReadBodyProtocol(response.code, response.phrase, d) + + def getAbort(): + return getattr(protocol.transport, "abortConnection", None) + + response.deliverBody(protocol) + + if protocol.transport is not None and getAbort() is None: + warnings.warn( + "Using readBody with a transport that does not have an " + "abortConnection method", + category=DeprecationWarning, + stacklevel=2, + ) + + return d + + +__all__ = [ + "Agent", + "BrowserLikePolicyForHTTPS", + "BrowserLikeRedirectAgent", + "ContentDecoderAgent", + "CookieAgent", + "GzipDecoder", + "HTTPConnectionPool", + "PartialDownloadError", + "ProxyAgent", + "readBody", + "RedirectAgent", + "RequestGenerationFailed", + "RequestTransmissionFailed", + "Response", + "ResponseDone", + "ResponseFailed", + "ResponseNeverReceived", + "URI", +] diff --git a/contrib/python/Twisted/py3/twisted/web/demo.py b/contrib/python/Twisted/py3/twisted/web/demo.py new file mode 100644 index 0000000000..2c8a3b69be --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/demo.py @@ -0,0 +1,27 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I am a simple test resource. +""" + + +from twisted.web import static + + +class Test(static.Data): + isLeaf = True + + def __init__(self): + static.Data.__init__( + self, + b""" + <html> + <head><title>Twisted Web Demo</title><head> + <body> + Hello! This is a Twisted Web test page. + </body> + </html> + """, + "text/html", + ) diff --git a/contrib/python/Twisted/py3/twisted/web/distrib.py b/contrib/python/Twisted/py3/twisted/web/distrib.py new file mode 100644 index 0000000000..4f25c03ee8 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/distrib.py @@ -0,0 +1,390 @@ +# -*- test-case-name: twisted.web.test.test_distrib -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Distributed web servers. + +This is going to have to be refactored so that argument parsing is done +by each subprocess and not by the main web server (i.e. GET, POST etc.). +""" + +import copy +import os +import sys + +try: + import pwd +except ImportError: + pwd = None # type: ignore[assignment] +from io import BytesIO +from xml.dom.minidom import getDOMImplementation + +from twisted.internet import address, reactor +from twisted.logger import Logger +from twisted.persisted import styles +from twisted.spread import pb +from twisted.spread.banana import SIZE_LIMIT +from twisted.web import http, resource, server, static, util +from twisted.web.http_headers import Headers + + +class _ReferenceableProducerWrapper(pb.Referenceable): + def __init__(self, producer): + self.producer = producer + + def remote_resumeProducing(self): + self.producer.resumeProducing() + + def remote_pauseProducing(self): + self.producer.pauseProducing() + + def remote_stopProducing(self): + self.producer.stopProducing() + + +class Request(pb.RemoteCopy, server.Request): + """ + A request which was received by a L{ResourceSubscription} and sent via + PB to a distributed node. + """ + + def setCopyableState(self, state): + """ + Initialize this L{twisted.web.distrib.Request} based on the copied + state so that it closely resembles a L{twisted.web.server.Request}. + """ + for k in "host", "client": + tup = state[k] + addrdesc = {"INET": "TCP", "UNIX": "UNIX"}[tup[0]] + addr = { + "TCP": lambda: address.IPv4Address(addrdesc, tup[1], tup[2]), + "UNIX": lambda: address.UNIXAddress(tup[1]), + }[addrdesc]() + state[k] = addr + state["requestHeaders"] = Headers(dict(state["requestHeaders"])) + pb.RemoteCopy.setCopyableState(self, state) + # Emulate the local request interface -- + self.content = BytesIO(self.content_data) + self.finish = self.remote.remoteMethod("finish") + self.setHeader = self.remote.remoteMethod("setHeader") + self.addCookie = self.remote.remoteMethod("addCookie") + self.setETag = self.remote.remoteMethod("setETag") + self.setResponseCode = self.remote.remoteMethod("setResponseCode") + self.setLastModified = self.remote.remoteMethod("setLastModified") + + # To avoid failing if a resource tries to write a very long string + # all at once, this one will be handled slightly differently. + self._write = self.remote.remoteMethod("write") + + def write(self, bytes): + """ + Write the given bytes to the response body. + + @param bytes: The bytes to write. If this is longer than 640k, it + will be split up into smaller pieces. + """ + start = 0 + end = SIZE_LIMIT + while True: + self._write(bytes[start:end]) + start += SIZE_LIMIT + end += SIZE_LIMIT + if start >= len(bytes): + break + + def registerProducer(self, producer, streaming): + self.remote.callRemote( + "registerProducer", _ReferenceableProducerWrapper(producer), streaming + ).addErrback(self.fail) + + def unregisterProducer(self): + self.remote.callRemote("unregisterProducer").addErrback(self.fail) + + def fail(self, failure): + self._log.failure("", failure=failure) + + +pb.setUnjellyableForClass(server.Request, Request) + + +class Issue: + _log = Logger() + + def __init__(self, request): + self.request = request + + def finished(self, result): + if result is not server.NOT_DONE_YET: + assert isinstance(result, str), "return value not a string" + self.request.write(result) + self.request.finish() + + def failed(self, failure): + # XXX: Argh. FIXME. + failure = str(failure) + self.request.write( + resource._UnsafeErrorPage( + http.INTERNAL_SERVER_ERROR, + "Server Connection Lost", + # GHSA-vg46-2rrj-3647 note: _PRE does HTML-escape the input. + "Connection to distributed server lost:" + util._PRE(failure), + ).render(self.request) + ) + self.request.finish() + self._log.info(failure) + + +class ResourceSubscription(resource.Resource): + isLeaf = 1 + waiting = 0 + _log = Logger() + + def __init__(self, host, port): + resource.Resource.__init__(self) + self.host = host + self.port = port + self.pending = [] + self.publisher = None + + def __getstate__(self): + """Get persistent state for this ResourceSubscription.""" + # When I unserialize, + state = copy.copy(self.__dict__) + # Publisher won't be connected... + state["publisher"] = None + # I won't be making a connection + state["waiting"] = 0 + # There will be no pending requests. + state["pending"] = [] + return state + + def connected(self, publisher): + """I've connected to a publisher; I'll now send all my requests.""" + self._log.info("connected to publisher") + publisher.broker.notifyOnDisconnect(self.booted) + self.publisher = publisher + self.waiting = 0 + for request in self.pending: + self.render(request) + self.pending = [] + + def notConnected(self, msg): + """I can't connect to a publisher; I'll now reply to all pending + requests. + """ + self._log.info("could not connect to distributed web service: {msg}", msg=msg) + self.waiting = 0 + self.publisher = None + for request in self.pending: + request.write("Unable to connect to distributed server.") + request.finish() + self.pending = [] + + def booted(self): + self.notConnected("connection dropped") + + def render(self, request): + """Render this request, from my server. + + This will always be asynchronous, and therefore return NOT_DONE_YET. + It spins off a request to the pb client, and either adds it to the list + of pending issues or requests it immediately, depending on if the + client is already connected. + """ + if not self.publisher: + self.pending.append(request) + if not self.waiting: + self.waiting = 1 + bf = pb.PBClientFactory() + timeout = 10 + if self.host == "unix": + reactor.connectUNIX(self.port, bf, timeout) + else: + reactor.connectTCP(self.host, self.port, bf, timeout) + d = bf.getRootObject() + d.addCallbacks(self.connected, self.notConnected) + + else: + i = Issue(request) + self.publisher.callRemote("request", request).addCallbacks( + i.finished, i.failed + ) + return server.NOT_DONE_YET + + +class ResourcePublisher(pb.Root, styles.Versioned): + """ + L{ResourcePublisher} exposes a remote API which can be used to respond + to request. + + @ivar site: The site which will be used for resource lookup. + @type site: L{twisted.web.server.Site} + """ + + _log = Logger() + + def __init__(self, site): + self.site = site + + persistenceVersion = 2 + + def upgradeToVersion2(self): + self.application.authorizer.removeIdentity("web") + del self.application.services[self.serviceName] + del self.serviceName + del self.application + del self.perspectiveName + + def getPerspectiveNamed(self, name): + return self + + def remote_request(self, request): + """ + Look up the resource for the given request and render it. + """ + res = self.site.getResourceFor(request) + self._log.info(request) + result = res.render(request) + if result is not server.NOT_DONE_YET: + request.write(result) + request.finish() + return server.NOT_DONE_YET + + +class UserDirectory(resource.Resource): + """ + A resource which lists available user resources and serves them as + children. + + @ivar _pwd: An object like L{pwd} which is used to enumerate users and + their home directories. + """ + + userDirName = "public_html" + userSocketName = ".twistd-web-pb" + + template = """ +<html> + <head> + <title>twisted.web.distrib.UserDirectory</title> + <style> + + a + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + color: #369; + text-decoration: none; + } + + th + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + font-weight: bold; + text-decoration: none; + text-align: left; + } + + pre, code + { + font-family: "Courier New", Courier, monospace; + } + + p, body, td, ol, ul, menu, blockquote, div + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + color: #000; + } + </style> + </head> + + <body> + <h1>twisted.web.distrib.UserDirectory</h1> + + %(users)s +</body> +</html> +""" + + def __init__(self, userDatabase=None): + resource.Resource.__init__(self) + if userDatabase is None: + userDatabase = pwd + self._pwd = userDatabase + + def _users(self): + """ + Return a list of two-tuples giving links to user resources and text to + associate with those links. + """ + users = [] + for user in self._pwd.getpwall(): + name, passwd, uid, gid, gecos, dir, shell = user + realname = gecos.split(",")[0] + if not realname: + realname = name + if os.path.exists(os.path.join(dir, self.userDirName)): + users.append((name, realname + " (file)")) + twistdsock = os.path.join(dir, self.userSocketName) + if os.path.exists(twistdsock): + linkName = name + ".twistd" + users.append((linkName, realname + " (twistd)")) + return users + + def render_GET(self, request): + """ + Render as HTML a listing of all known users with links to their + personal resources. + """ + + domImpl = getDOMImplementation() + newDoc = domImpl.createDocument(None, "ul", None) + listing = newDoc.documentElement + for link, text in self._users(): + linkElement = newDoc.createElement("a") + linkElement.setAttribute("href", link + "/") + textNode = newDoc.createTextNode(text) + linkElement.appendChild(textNode) + item = newDoc.createElement("li") + item.appendChild(linkElement) + listing.appendChild(item) + + htmlDoc = self.template % ({"users": listing.toxml()}) + return htmlDoc.encode("utf-8") + + def getChild(self, name, request): + if name == b"": + return self + + td = b".twistd" + + if name.endswith(td): + username = name[: -len(td)] + sub = 1 + else: + username = name + sub = 0 + try: + # Decode using the filesystem encoding to reverse a transformation + # done in the pwd module. + ( + pw_name, + pw_passwd, + pw_uid, + pw_gid, + pw_gecos, + pw_dir, + pw_shell, + ) = self._pwd.getpwnam(username.decode(sys.getfilesystemencoding())) + except KeyError: + return resource._UnsafeNoResource() + if sub: + twistdsock = os.path.join(pw_dir, self.userSocketName) + rs = ResourceSubscription("unix", twistdsock) + self.putChild(name, rs) + return rs + else: + path = os.path.join(pw_dir, self.userDirName) + if not os.path.exists(path): + return resource._UnsafeNoResource() + return static.File(path) diff --git a/contrib/python/Twisted/py3/twisted/web/domhelpers.py b/contrib/python/Twisted/py3/twisted/web/domhelpers.py new file mode 100644 index 0000000000..326c3f8485 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/domhelpers.py @@ -0,0 +1,313 @@ +# -*- test-case-name: twisted.web.test.test_domhelpers -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A library for performing interesting tasks with DOM objects. + +This module is now deprecated. +""" +import warnings +from io import StringIO + +from incremental import Version, getVersionString + +from twisted.web import microdom +from twisted.web.microdom import escape, getElementsByTagName, unescape + +warningString = "twisted.web.domhelpers was deprecated at {}".format( + getVersionString(Version("Twisted", 23, 10, 0)) +) +warnings.warn(warningString, DeprecationWarning, stacklevel=3) + + +# These modules are imported here as a shortcut. +escape +getElementsByTagName + + +class NodeLookupError(Exception): + pass + + +def substitute(request, node, subs): + """ + Look through the given node's children for strings, and + attempt to do string substitution with the given parameter. + """ + for child in node.childNodes: + if hasattr(child, "nodeValue") and child.nodeValue: + child.replaceData(0, len(child.nodeValue), child.nodeValue % subs) + substitute(request, child, subs) + + +def _get(node, nodeId, nodeAttrs=("id", "class", "model", "pattern")): + """ + (internal) Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. + """ + + if hasattr(node, "hasAttributes") and node.hasAttributes(): + for nodeAttr in nodeAttrs: + if str(node.getAttribute(nodeAttr)) == nodeId: + return node + if node.hasChildNodes(): + if hasattr(node.childNodes, "length"): + length = node.childNodes.length + else: + length = len(node.childNodes) + for childNum in range(length): + result = _get(node.childNodes[childNum], nodeId) + if result: + return result + + +def get(node, nodeId): + """ + Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, raise + L{NodeLookupError}. + """ + result = _get(node, nodeId) + if result: + return result + raise NodeLookupError(nodeId) + + +def getIfExists(node, nodeId): + """ + Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, return + L{None}. + """ + return _get(node, nodeId) + + +def getAndClear(node, nodeId): + """Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, raise + L{NodeLookupError}. Remove all child nodes before returning. + """ + result = get(node, nodeId) + if result: + clearNode(result) + return result + + +def clearNode(node): + """ + Remove all children from the given node. + """ + node.childNodes[:] = [] + + +def locateNodes(nodeList, key, value, noNesting=1): + """ + Find subnodes in the given node where the given attribute + has the given value. + """ + returnList = [] + if not isinstance(nodeList, type([])): + return locateNodes(nodeList.childNodes, key, value, noNesting) + for childNode in nodeList: + if not hasattr(childNode, "getAttribute"): + continue + if str(childNode.getAttribute(key)) == value: + returnList.append(childNode) + if noNesting: + continue + returnList.extend(locateNodes(childNode, key, value, noNesting)) + return returnList + + +def superSetAttribute(node, key, value): + if not hasattr(node, "setAttribute"): + return + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superSetAttribute(child, key, value) + + +def superPrependAttribute(node, key, value): + if not hasattr(node, "setAttribute"): + return + old = node.getAttribute(key) + if old: + node.setAttribute(key, value + "/" + old) + else: + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superPrependAttribute(child, key, value) + + +def superAppendAttribute(node, key, value): + if not hasattr(node, "setAttribute"): + return + old = node.getAttribute(key) + if old: + node.setAttribute(key, old + "/" + value) + else: + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superAppendAttribute(child, key, value) + + +def gatherTextNodes(iNode, dounescape=0, joinWith=""): + """Visit each child node and collect its text data, if any, into a string. + For example:: + >>> doc=microdom.parseString('<a>1<b>2<c>3</c>4</b></a>') + >>> gatherTextNodes(doc.documentElement) + '1234' + With dounescape=1, also convert entities back into normal characters. + @return: the gathered nodes as a single string + @rtype: str""" + gathered = [] + gathered_append = gathered.append + slice = [iNode] + while len(slice) > 0: + c = slice.pop(0) + if hasattr(c, "nodeValue") and c.nodeValue is not None: + if dounescape: + val = unescape(c.nodeValue) + else: + val = c.nodeValue + gathered_append(val) + slice[:0] = c.childNodes + return joinWith.join(gathered) + + +class RawText(microdom.Text): + """This is an evil and horrible speed hack. Basically, if you have a big + chunk of XML that you want to insert into the DOM, but you don't want to + incur the cost of parsing it, you can construct one of these and insert it + into the DOM. This will most certainly only work with microdom as the API + for converting nodes to xml is different in every DOM implementation. + + This could be improved by making this class a Lazy parser, so if you + inserted this into the DOM and then later actually tried to mutate this + node, it would be parsed then. + """ + + def writexml( + self, + writer, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes=None, + namespace=None, + ): + writer.write(f"{indent}{self.data}{newl}") + + +def findNodes(parent, matcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + # print child, child.nodeType, child.nodeName + if matcher(child): + accum.append(child) + findNodes(child, matcher, accum) + return accum + + +def findNodesShallowOnMatch(parent, matcher, recurseMatcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + # print child, child.nodeType, child.nodeName + if matcher(child): + accum.append(child) + if recurseMatcher(child): + findNodesShallowOnMatch(child, matcher, recurseMatcher, accum) + return accum + + +def findNodesShallow(parent, matcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + if matcher(child): + accum.append(child) + else: + findNodes(child, matcher, accum) + return accum + + +def findElementsWithAttributeShallow(parent, attribute): + """ + Return an iterable of the elements which are direct children of C{parent} + and which have the C{attribute} attribute. + """ + return findNodesShallow( + parent, + lambda n: getattr(n, "tagName", None) is not None and n.hasAttribute(attribute), + ) + + +def findElements(parent, matcher): + """ + Return an iterable of the elements which are children of C{parent} for + which the predicate C{matcher} returns true. + """ + return findNodes( + parent, + lambda n, matcher=matcher: getattr(n, "tagName", None) is not None + and matcher(n), + ) + + +def findElementsWithAttribute(parent, attribute, value=None): + if value: + return findElements( + parent, + lambda n, attribute=attribute, value=value: n.hasAttribute(attribute) + and n.getAttribute(attribute) == value, + ) + else: + return findElements( + parent, lambda n, attribute=attribute: n.hasAttribute(attribute) + ) + + +def findNodesNamed(parent, name): + return findNodes(parent, lambda n, name=name: n.nodeName == name) + + +def writeNodeData(node, oldio): + for subnode in node.childNodes: + if hasattr(subnode, "data"): + oldio.write("" + subnode.data) + else: + writeNodeData(subnode, oldio) + + +def getNodeText(node): + oldio = StringIO() + writeNodeData(node, oldio) + return oldio.getvalue() + + +def getParents(node): + l = [] + while node: + l.append(node) + node = node.parentNode + return l + + +def namedChildren(parent, nodeName): + """namedChildren(parent, nodeName) -> children (not descendants) of parent + that have tagName == nodeName + """ + return [n for n in parent.childNodes if getattr(n, "tagName", "") == nodeName] diff --git a/contrib/python/Twisted/py3/twisted/web/error.py b/contrib/python/Twisted/py3/twisted/web/error.py new file mode 100644 index 0000000000..cc151d4205 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/error.py @@ -0,0 +1,442 @@ +# -*- test-case-name: twisted.web.test.test_error -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Exception definitions for L{twisted.web}. +""" + +__all__ = [ + "Error", + "PageRedirect", + "InfiniteRedirection", + "RenderError", + "MissingRenderMethod", + "MissingTemplateLoader", + "UnexposedMethodError", + "UnfilledSlot", + "UnsupportedType", + "FlattenerError", + "RedirectWithNoLocation", +] + + +from collections.abc import Sequence +from typing import Optional, Union, cast + +from twisted.python.compat import nativeString +from twisted.web._responses import RESPONSES + + +def _codeToMessage(code: Union[int, bytes]) -> Optional[bytes]: + """ + Returns the response message corresponding to an HTTP code, or None + if the code is unknown or unrecognized. + + @param code: HTTP status code, for example C{http.NOT_FOUND}. + + @return: A string message or none + """ + try: + return RESPONSES.get(int(code)) + except (ValueError, AttributeError): + return None + + +class Error(Exception): + """ + A basic HTTP error. + + @ivar status: Refers to an HTTP status code, for example C{http.NOT_FOUND}. + + @param message: A short error message, for example "NOT FOUND". + + @ivar response: A complete HTML document for an error page. + """ + + status: bytes + message: Optional[bytes] + response: Optional[bytes] + + def __init__( + self, + code: Union[int, bytes], + message: Optional[bytes] = None, + response: Optional[bytes] = None, + ) -> None: + """ + Initializes a basic exception. + + @type code: L{bytes} or L{int} + @param code: Refers to an HTTP status code (for example, 200) either as + an integer or a bytestring representing such. If no C{message} is + given, C{code} is mapped to a descriptive bytestring that is used + instead. + + @type message: L{bytes} + @param message: A short error message, for example C{b"NOT FOUND"}. + + @type response: L{bytes} + @param response: A complete HTML document for an error page. + """ + + message = message or _codeToMessage(code) + + Exception.__init__(self, code, message, response) + + if isinstance(code, int): + # If we're given an int, convert it to a bytestring + # downloadPage gives a bytes, Agent gives an int, and it worked by + # accident previously, so just make it keep working. + code = b"%d" % (code,) + elif len(code) != 3 or not code.isdigit(): + # Status codes must be 3 digits. See + # https://httpwg.org/specs/rfc9110.html#status.code.extensibility + raise ValueError(f"Not a valid HTTP status code: {code!r}") + + self.status = code + self.message = message + self.response = response + + def __str__(self) -> str: + s = self.status + if self.message: + s += b" " + self.message + return nativeString(s) + + +class PageRedirect(Error): + """ + A request resulted in an HTTP redirect. + + @ivar location: The location of the redirect which was not followed. + """ + + location: Optional[bytes] + + def __init__( + self, + code: Union[int, bytes], + message: Optional[bytes] = None, + response: Optional[bytes] = None, + location: Optional[bytes] = None, + ) -> None: + """ + Initializes a page redirect exception. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to a + descriptive string that is used instead. + + @type message: L{bytes} + @param message: A short error message, for example C{b"NOT FOUND"}. + + @type response: L{bytes} + @param response: A complete HTML document for an error page. + + @type location: L{bytes} + @param location: The location response-header field value. It is an + absolute URI used to redirect the receiver to a location other than + the Request-URI so the request can be completed. + """ + Error.__init__(self, code, message, response) + if self.message and location: + self.message = self.message + b" to " + location + self.location = location + + +class InfiniteRedirection(Error): + """ + HTTP redirection is occurring endlessly. + + @ivar location: The first URL in the series of redirections which was + not followed. + """ + + location: Optional[bytes] + + def __init__( + self, + code: Union[int, bytes], + message: Optional[bytes] = None, + response: Optional[bytes] = None, + location: Optional[bytes] = None, + ) -> None: + """ + Initializes an infinite redirection exception. + + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to a + descriptive string that is used instead. + + @param message: A short error message, for example C{b"NOT FOUND"}. + + @param response: A complete HTML document for an error page. + + @param location: The location response-header field value. It is an + absolute URI used to redirect the receiver to a location other than + the Request-URI so the request can be completed. + """ + Error.__init__(self, code, message, response) + if self.message and location: + self.message = self.message + b" to " + location + self.location = location + + +class RedirectWithNoLocation(Error): + """ + Exception passed to L{ResponseFailed} if we got a redirect without a + C{Location} header field. + + @type uri: L{bytes} + @ivar uri: The URI which failed to give a proper location header + field. + + @since: 11.1 + """ + + message: bytes + uri: bytes + + def __init__(self, code: Union[bytes, int], message: bytes, uri: bytes) -> None: + """ + Initializes a page redirect exception when no location is given. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to + a descriptive string that is used instead. + + @type message: L{bytes} + @param message: A short error message. + + @type uri: L{bytes} + @param uri: The URI which failed to give a proper location header + field. + """ + Error.__init__(self, code, message) + self.message = self.message + b" to " + uri + self.uri = uri + + +class UnsupportedMethod(Exception): + """ + Raised by a resource when faced with a strange request method. + + RFC 2616 (HTTP 1.1) gives us two choices when faced with this situation: + If the type of request is known to us, but not allowed for the requested + resource, respond with NOT_ALLOWED. Otherwise, if the request is something + we don't know how to deal with in any case, respond with NOT_IMPLEMENTED. + + When this exception is raised by a Resource's render method, the server + will make the appropriate response. + + This exception's first argument MUST be a sequence of the methods the + resource *does* support. + """ + + allowedMethods = () + + def __init__(self, allowedMethods, *args): + Exception.__init__(self, allowedMethods, *args) + self.allowedMethods = allowedMethods + + if not isinstance(allowedMethods, Sequence): + raise TypeError( + "First argument must be a sequence of supported methods, " + "but my first argument is not a sequence." + ) + + def __str__(self) -> str: + return f"Expected one of {self.allowedMethods!r}" + + +class SchemeNotSupported(Exception): + """ + The scheme of a URI was not one of the supported values. + """ + + +class RenderError(Exception): + """ + Base exception class for all errors which can occur during template + rendering. + """ + + +class MissingRenderMethod(RenderError): + """ + Tried to use a render method which does not exist. + + @ivar element: The element which did not have the render method. + @ivar renderName: The name of the renderer which could not be found. + """ + + def __init__(self, element, renderName): + RenderError.__init__(self, element, renderName) + self.element = element + self.renderName = renderName + + def __repr__(self) -> str: + return "{!r}: {!r} had no render method named {!r}".format( + self.__class__.__name__, + self.element, + self.renderName, + ) + + +class MissingTemplateLoader(RenderError): + """ + L{MissingTemplateLoader} is raised when trying to render an Element without + a template loader, i.e. a C{loader} attribute. + + @ivar element: The Element which did not have a document factory. + """ + + def __init__(self, element): + RenderError.__init__(self, element) + self.element = element + + def __repr__(self) -> str: + return f"{self.__class__.__name__!r}: {self.element!r} had no loader" + + +class UnexposedMethodError(Exception): + """ + Raised on any attempt to get a method which has not been exposed. + """ + + +class UnfilledSlot(Exception): + """ + During flattening, a slot with no associated data was encountered. + """ + + +class UnsupportedType(Exception): + """ + During flattening, an object of a type which cannot be flattened was + encountered. + """ + + +class ExcessiveBufferingError(Exception): + """ + The HTTP/2 protocol has been forced to buffer an excessive amount of + outbound data, and has therefore closed the connection and dropped all + outbound data. + """ + + +class FlattenerError(Exception): + """ + An error occurred while flattening an object. + + @ivar _roots: A list of the objects on the flattener's stack at the time + the unflattenable object was encountered. The first element is least + deeply nested object and the last element is the most deeply nested. + """ + + def __init__(self, exception, roots, traceback): + self._exception = exception + self._roots = roots + self._traceback = traceback + Exception.__init__(self, exception, roots, traceback) + + def _formatRoot(self, obj): + """ + Convert an object from C{self._roots} to a string suitable for + inclusion in a render-traceback (like a normal Python traceback, but + can include "frame" source locations which are not in Python source + files). + + @param obj: Any object which can be a render step I{root}. + Typically, L{Tag}s, strings, and other simple Python types. + + @return: A string representation of C{obj}. + @rtype: L{str} + """ + # There's a circular dependency between this class and 'Tag', although + # only for an isinstance() check. + from twisted.web.template import Tag + + if isinstance(obj, (bytes, str)): + # It's somewhat unlikely that there will ever be a str in the roots + # list. However, something like a MemoryError during a str.replace + # call (eg, replacing " with ") could possibly cause this. + # Likewise, UTF-8 encoding a unicode string to a byte string might + # fail like this. + if len(obj) > 40: + if isinstance(obj, str): + ellipsis = "<...>" + else: + ellipsis = b"<...>" + return ascii(obj[:20] + ellipsis + obj[-20:]) + else: + return ascii(obj) + elif isinstance(obj, Tag): + if obj.filename is None: + return "Tag <" + obj.tagName + ">" + else: + return 'File "%s", line %d, column %d, in "%s"' % ( + obj.filename, + obj.lineNumber, + obj.columnNumber, + obj.tagName, + ) + else: + return ascii(obj) + + def __repr__(self) -> str: + """ + Present a string representation which includes a template traceback, so + we can tell where this error occurred in the template, as well as in + Python. + """ + # Avoid importing things unnecessarily until we actually need them; + # since this is an 'error' module we should be extra paranoid about + # that. + from traceback import format_list + + if self._roots: + roots = ( + " " + "\n ".join([self._formatRoot(r) for r in self._roots]) + "\n" + ) + else: + roots = "" + if self._traceback: + traceback = ( + "\n".join( + [ + line + for entry in format_list(self._traceback) + for line in entry.splitlines() + ] + ) + + "\n" + ) + else: + traceback = "" + return cast( + str, + ( + "Exception while flattening:\n" + + roots + + traceback + + self._exception.__class__.__name__ + + ": " + + str(self._exception) + + "\n" + ), + ) + + def __str__(self) -> str: + return repr(self) + + +class UnsupportedSpecialHeader(Exception): + """ + A HTTP/2 request was received that contained a HTTP/2 pseudo-header field + that is not recognised by Twisted. + """ diff --git a/contrib/python/Twisted/py3/twisted/web/guard.py b/contrib/python/Twisted/py3/twisted/web/guard.py new file mode 100644 index 0000000000..894823f814 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/guard.py @@ -0,0 +1,21 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Resource traversal integration with L{twisted.cred} to allow for +authentication and authorization of HTTP requests. +""" + + +from twisted.web._auth.basic import BasicCredentialFactory +from twisted.web._auth.digest import DigestCredentialFactory + +# Expose HTTP authentication classes here. +from twisted.web._auth.wrapper import HTTPAuthSessionWrapper + +__all__ = [ + "HTTPAuthSessionWrapper", + "BasicCredentialFactory", + "DigestCredentialFactory", +] diff --git a/contrib/python/Twisted/py3/twisted/web/html.py b/contrib/python/Twisted/py3/twisted/web/html.py new file mode 100644 index 0000000000..8253b3ef5d --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/html.py @@ -0,0 +1,56 @@ +# -*- test-case-name: twisted.web.test.test_html -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +"""I hold HTML generation helpers. +""" + +from html import escape +from io import StringIO + +from incremental import Version + +from twisted.python import log +from twisted.python.deprecate import deprecated + + +@deprecated(Version("Twisted", 15, 3, 0), replacement="twisted.web.template") +def PRE(text): + "Wrap <pre> tags around some text and HTML-escape it." + return "<pre>" + escape(text) + "</pre>" + + +@deprecated(Version("Twisted", 15, 3, 0), replacement="twisted.web.template") +def UL(lst): + io = StringIO() + io.write("<ul>\n") + for el in lst: + io.write("<li> %s</li>\n" % el) + io.write("</ul>") + return io.getvalue() + + +@deprecated(Version("Twisted", 15, 3, 0), replacement="twisted.web.template") +def linkList(lst): + io = StringIO() + io.write("<ul>\n") + for hr, el in lst: + io.write(f'<li> <a href="{hr}">{el}</a></li>\n') + io.write("</ul>") + return io.getvalue() + + +@deprecated(Version("Twisted", 15, 3, 0), replacement="twisted.web.template") +def output(func, *args, **kw): + """output(func, *args, **kw) -> html string + Either return the result of a function (which presumably returns an + HTML-legal string) or a sparse HTMLized error message and a message + in the server log. + """ + try: + return func(*args, **kw) + except BaseException: + log.msg(f"Error calling {func!r}:") + log.err() + return PRE("An error occurred.") diff --git a/contrib/python/Twisted/py3/twisted/web/http.py b/contrib/python/Twisted/py3/twisted/web/http.py new file mode 100644 index 0000000000..2bad1471dc --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/http.py @@ -0,0 +1,3305 @@ +# -*- test-case-name: twisted.web.test.test_http -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HyperText Transfer Protocol implementation. + +This is the basic server-side protocol implementation used by the Twisted +Web server. It can parse HTTP 1.0 requests and supports many HTTP 1.1 +features as well. Additionally, some functionality implemented here is +also useful for HTTP clients (such as the chunked encoding parser). + +@var CACHED: A marker value to be returned from cache-related request methods + to indicate to the caller that a cached response will be usable and no + response body should be generated. + +@var FOUND: An HTTP response code indicating a temporary redirect. + +@var NOT_MODIFIED: An HTTP response code indicating that a requested + pre-condition (for example, the condition represented by an + I{If-Modified-Since} header is present in the request) has succeeded. This + indicates a response body cached by the client can be used. + +@var PRECONDITION_FAILED: An HTTP response code indicating that a requested + pre-condition (for example, the condition represented by an I{If-None-Match} + header is present in the request) has failed. This should typically + indicate that the server has not taken the requested action. + +@var maxChunkSizeLineLength: Maximum allowable length of the CRLF-terminated + line that indicates the size of a chunk and the extensions associated with + it, as in the HTTP 1.1 chunked I{Transfer-Encoding} (RFC 7230 section 4.1). + This limits how much data may be buffered when decoding the line. +""" + +__all__ = [ + "SWITCHING", + "OK", + "CREATED", + "ACCEPTED", + "NON_AUTHORITATIVE_INFORMATION", + "NO_CONTENT", + "RESET_CONTENT", + "PARTIAL_CONTENT", + "MULTI_STATUS", + "MULTIPLE_CHOICE", + "MOVED_PERMANENTLY", + "FOUND", + "SEE_OTHER", + "NOT_MODIFIED", + "USE_PROXY", + "TEMPORARY_REDIRECT", + "PERMANENT_REDIRECT", + "BAD_REQUEST", + "UNAUTHORIZED", + "PAYMENT_REQUIRED", + "FORBIDDEN", + "NOT_FOUND", + "NOT_ALLOWED", + "NOT_ACCEPTABLE", + "PROXY_AUTH_REQUIRED", + "REQUEST_TIMEOUT", + "CONFLICT", + "GONE", + "LENGTH_REQUIRED", + "PRECONDITION_FAILED", + "REQUEST_ENTITY_TOO_LARGE", + "REQUEST_URI_TOO_LONG", + "UNSUPPORTED_MEDIA_TYPE", + "REQUESTED_RANGE_NOT_SATISFIABLE", + "EXPECTATION_FAILED", + "INTERNAL_SERVER_ERROR", + "NOT_IMPLEMENTED", + "BAD_GATEWAY", + "SERVICE_UNAVAILABLE", + "GATEWAY_TIMEOUT", + "HTTP_VERSION_NOT_SUPPORTED", + "INSUFFICIENT_STORAGE_SPACE", + "NOT_EXTENDED", + "RESPONSES", + "CACHED", + "urlparse", + "parse_qs", + "datetimeToString", + "datetimeToLogString", + "timegm", + "stringToDatetime", + "toChunk", + "fromChunk", + "parseContentRange", + "StringTransport", + "HTTPClient", + "NO_BODY_CODES", + "Request", + "PotentialDataLoss", + "HTTPChannel", + "HTTPFactory", +] + + +import base64 +import binascii +import calendar +import cgi +import math +import os +import re +import tempfile +import time +import warnings +from io import BytesIO +from typing import AnyStr, Callable, List, Optional, Tuple +from urllib.parse import ( + ParseResultBytes, + unquote_to_bytes as unquote, + urlparse as _urlparse, +) + +from zope.interface import Attribute, Interface, implementer, provider + +from incremental import Version + +from twisted.internet import address, interfaces, protocol +from twisted.internet._producer_helpers import _PullToPush +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IProtocol +from twisted.logger import Logger +from twisted.protocols import basic, policies +from twisted.python import log +from twisted.python.compat import nativeString, networkString +from twisted.python.components import proxyForInterface +from twisted.python.deprecate import deprecated +from twisted.python.failure import Failure + +# twisted imports +from twisted.web._responses import ( + ACCEPTED, + BAD_GATEWAY, + BAD_REQUEST, + CONFLICT, + CREATED, + EXPECTATION_FAILED, + FORBIDDEN, + FOUND, + GATEWAY_TIMEOUT, + GONE, + HTTP_VERSION_NOT_SUPPORTED, + INSUFFICIENT_STORAGE_SPACE, + INTERNAL_SERVER_ERROR, + LENGTH_REQUIRED, + MOVED_PERMANENTLY, + MULTI_STATUS, + MULTIPLE_CHOICE, + NO_CONTENT, + NON_AUTHORITATIVE_INFORMATION, + NOT_ACCEPTABLE, + NOT_ALLOWED, + NOT_EXTENDED, + NOT_FOUND, + NOT_IMPLEMENTED, + NOT_MODIFIED, + OK, + PARTIAL_CONTENT, + PAYMENT_REQUIRED, + PERMANENT_REDIRECT, + PRECONDITION_FAILED, + PROXY_AUTH_REQUIRED, + REQUEST_ENTITY_TOO_LARGE, + REQUEST_TIMEOUT, + REQUEST_URI_TOO_LONG, + REQUESTED_RANGE_NOT_SATISFIABLE, + RESET_CONTENT, + RESPONSES, + SEE_OTHER, + SERVICE_UNAVAILABLE, + SWITCHING, + TEMPORARY_REDIRECT, + UNAUTHORIZED, + UNSUPPORTED_MEDIA_TYPE, + USE_PROXY, +) +from twisted.web.http_headers import Headers, _sanitizeLinearWhitespace +from twisted.web.iweb import IAccessLogFormatter, INonQueuedRequestFactory, IRequest + +try: + from twisted.web._http2 import H2Connection + + H2_ENABLED = True +except ImportError: + H2_ENABLED = False + + +# A common request timeout -- 1 minute. This is roughly what nginx uses, and +# so it seems to be a good choice for us too. +_REQUEST_TIMEOUT = 1 * 60 + +protocol_version = "HTTP/1.1" + +CACHED = """Magic constant returned by http.Request methods to set cache +validation headers when the request is conditional and the value fails +the condition.""" + +# backwards compatibility +responses = RESPONSES + + +# datetime parsing and formatting +weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +monthname = [ + None, + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", +] +weekdayname_lower = [name.lower() for name in weekdayname] +monthname_lower = [name and name.lower() for name in monthname] + + +def _parseHeader(line): + # cgi.parse_header requires a str + key, pdict = cgi.parse_header(line.decode("charmap")) + + # We want the key as bytes, and cgi.parse_multipart (which consumes + # pdict) expects a dict of str keys but bytes values + key = key.encode("charmap") + pdict = {x: y.encode("charmap") for x, y in pdict.items()} + return (key, pdict) + + +def urlparse(url): + """ + Parse an URL into six components. + + This is similar to C{urlparse.urlparse}, but rejects C{str} input + and always produces C{bytes} output. + + @type url: C{bytes} + + @raise TypeError: The given url was a C{str} string instead of a + C{bytes}. + + @return: The scheme, net location, path, params, query string, and fragment + of the URL - all as C{bytes}. + @rtype: C{ParseResultBytes} + """ + if isinstance(url, str): + raise TypeError("url must be bytes, not unicode") + scheme, netloc, path, params, query, fragment = _urlparse(url) + if isinstance(scheme, str): + scheme = scheme.encode("ascii") + netloc = netloc.encode("ascii") + path = path.encode("ascii") + query = query.encode("ascii") + fragment = fragment.encode("ascii") + return ParseResultBytes(scheme, netloc, path, params, query, fragment) + + +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """ + Like C{cgi.parse_qs}, but with support for parsing byte strings on Python 3. + + @type qs: C{bytes} + """ + d = {} + items = [s2 for s1 in qs.split(b"&") for s2 in s1.split(b";")] + for item in items: + try: + k, v = item.split(b"=", 1) + except ValueError: + if strict_parsing: + raise + continue + if v or keep_blank_values: + k = unquote(k.replace(b"+", b" ")) + v = unquote(v.replace(b"+", b" ")) + if k in d: + d[k].append(v) + else: + d[k] = [v] + return d + + +def datetimeToString(msSinceEpoch=None): + """ + Convert seconds since epoch to HTTP datetime string. + + @rtype: C{bytes} + """ + if msSinceEpoch == None: + msSinceEpoch = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) + s = networkString( + "%s, %02d %3s %4d %02d:%02d:%02d GMT" + % (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + ) + return s + + +def datetimeToLogString(msSinceEpoch=None): + """ + Convert seconds since epoch to log datetime string. + + @rtype: C{str} + """ + if msSinceEpoch == None: + msSinceEpoch = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) + s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % ( + day, + monthname[month], + year, + hh, + mm, + ss, + ) + return s + + +def timegm(year, month, day, hour, minute, second): + """ + Convert time tuple in GMT to seconds since epoch, GMT + """ + EPOCH = 1970 + if year < EPOCH: + raise ValueError("Years prior to %d not supported" % (EPOCH,)) + assert 1 <= month <= 12 + days = 365 * (year - EPOCH) + calendar.leapdays(EPOCH, year) + for i in range(1, month): + days = days + calendar.mdays[i] + if month > 2 and calendar.isleap(year): + days = days + 1 + days = days + day - 1 + hours = days * 24 + hour + minutes = hours * 60 + minute + seconds = minutes * 60 + second + return seconds + + +def stringToDatetime(dateString): + """ + Convert an HTTP date string (one of three formats) to seconds since epoch. + + @type dateString: C{bytes} + """ + parts = nativeString(dateString).split() + + if not parts[0][0:3].lower() in weekdayname_lower: + # Weekday is stupid. Might have been omitted. + try: + return stringToDatetime(b"Sun, " + dateString) + except ValueError: + # Guess not. + pass + + partlen = len(parts) + if (partlen == 5 or partlen == 6) and parts[1].isdigit(): + # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without "GMT") + # This is the normal format + day = parts[1] + month = parts[2] + year = parts[3] + time = parts[4] + elif (partlen == 3 or partlen == 4) and parts[1].find("-") != -1: + # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without without "GMT") + # Two digit year, yucko. + day, month, year = parts[1].split("-") + time = parts[2] + year = int(year) + if year < 69: + year = year + 2000 + elif year < 100: + year = year + 1900 + elif len(parts) == 5: + # 3rd date format: Sun Nov 6 08:49:37 1994 + # ANSI C asctime() format. + day = parts[2] + month = parts[1] + year = parts[4] + time = parts[3] + else: + raise ValueError("Unknown datetime format %r" % dateString) + + day = int(day) + month = int(monthname_lower.index(month.lower())) + year = int(year) + hour, min, sec = map(int, time.split(":")) + return int(timegm(year, month, day, hour, min, sec)) + + +def toChunk(data): + """ + Convert string to a chunk. + + @type data: C{bytes} + + @returns: a tuple of C{bytes} representing the chunked encoding of data + """ + return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n") + + +def _ishexdigits(b: bytes) -> bool: + """ + Is the string case-insensitively hexidecimal? + + It must be composed of one or more characters in the ranges a-f, A-F + and 0-9. + """ + for c in b: + if c not in b"0123456789abcdefABCDEF": + return False + return b != b"" + + +def _hexint(b: bytes) -> int: + """ + Decode a hexadecimal integer. + + Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has + a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when + parsing network protocols. + """ + if not _ishexdigits(b): + raise ValueError(b) + return int(b, 16) + + +def fromChunk(data: bytes) -> Tuple[bytes, bytes]: + """ + Convert chunk to string. + + Note that this function is not specification compliant: it doesn't handle + chunk extensions. + + @type data: C{bytes} + + @return: tuple of (result, remaining) - both C{bytes}. + + @raise ValueError: If the given data is not a correctly formatted chunked + byte string. + """ + prefix, rest = data.split(b"\r\n", 1) + length = _hexint(prefix) + if length < 0: + raise ValueError("Chunk length must be >= 0, not %d" % (length,)) + if rest[length : length + 2] != b"\r\n": + raise ValueError("chunk must end with CRLF") + return rest[:length], rest[length + 2 :] + + +def parseContentRange(header): + """ + Parse a content-range header into (start, end, realLength). + + realLength might be None if real length is not known ('*'). + """ + kind, other = header.strip().split() + if kind.lower() != "bytes": + raise ValueError("a range of type %r is not supported") + startend, realLength = other.split("/") + start, end = map(int, startend.split("-")) + if realLength == "*": + realLength = None + else: + realLength = int(realLength) + return (start, end, realLength) + + +class _IDeprecatedHTTPChannelToRequestInterface(Interface): + """ + The interface L{HTTPChannel} expects of L{Request}. + """ + + requestHeaders = Attribute( + "A L{http_headers.Headers} instance giving all received HTTP request " + "headers." + ) + + responseHeaders = Attribute( + "A L{http_headers.Headers} instance holding all HTTP response " + "headers to be sent." + ) + + def connectionLost(reason): + """ + The underlying connection has been lost. + + @param reason: A failure instance indicating the reason why + the connection was lost. + @type reason: L{twisted.python.failure.Failure} + """ + + def gotLength(length): + """ + Called when L{HTTPChannel} has determined the length, if any, + of the incoming request's body. + + @param length: The length of the request's body. + @type length: L{int} if the request declares its body's length + and L{None} if it does not. + """ + + def handleContentChunk(data): + """ + Deliver a received chunk of body data to the request. Note + this does not imply chunked transfer encoding. + + @param data: The received chunk. + @type data: L{bytes} + """ + + def parseCookies(): + """ + Parse the request's cookies out of received headers. + """ + + def requestReceived(command, path, version): + """ + Called when the entire request, including its body, has been + received. + + @param command: The request's HTTP command. + @type command: L{bytes} + + @param path: The request's path. Note: this is actually what + RFC7320 calls the URI. + @type path: L{bytes} + + @param version: The request's HTTP version. + @type version: L{bytes} + """ + + def __eq__(other: object) -> bool: + """ + Determines if two requests are the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are the same object and L{False} + when not. + """ + + def __ne__(other: object) -> bool: + """ + Determines if two requests are not the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are not the same object and + L{False} when they are. + """ + + def __hash__(): + """ + Generate a hash value for the request. + + @return: The request's hash value. + @rtype: L{int} + """ + + +class StringTransport: + """ + I am a BytesIO wrapper that conforms for the transport API. I support + the `writeSequence' method. + """ + + def __init__(self): + self.s = BytesIO() + + def writeSequence(self, seq): + self.s.write(b"".join(seq)) + + def __getattr__(self, attr): + return getattr(self.__dict__["s"], attr) + + +class HTTPClient(basic.LineReceiver): + """ + A client for HTTP 1.0. + + Notes: + You probably want to send a 'Host' header with the name of the site you're + connecting to, in order to not break name based virtual hosting. + + @ivar length: The length of the request body in bytes. + @type length: C{int} + + @ivar firstLine: Are we waiting for the first header line? + @type firstLine: C{bool} + + @ivar __buffer: The buffer that stores the response to the HTTP request. + @type __buffer: A C{BytesIO} object. + + @ivar _header: Part or all of an HTTP request header. + @type _header: C{bytes} + """ + + length = None + firstLine = True + __buffer = None + _header = b"" + + def sendCommand(self, command, path): + self.transport.writeSequence([command, b" ", path, b" HTTP/1.0\r\n"]) + + def sendHeader(self, name, value): + if not isinstance(value, bytes): + # XXX Deprecate this case + value = networkString(str(value)) + santizedName = _sanitizeLinearWhitespace(name) + santizedValue = _sanitizeLinearWhitespace(value) + self.transport.writeSequence([santizedName, b": ", santizedValue, b"\r\n"]) + + def endHeaders(self): + self.transport.write(b"\r\n") + + def extractHeader(self, header): + """ + Given a complete HTTP header, extract the field name and value and + process the header. + + @param header: a complete HTTP request header of the form + 'field-name: value'. + @type header: C{bytes} + """ + key, val = header.split(b":", 1) + val = val.lstrip() + self.handleHeader(key, val) + if key.lower() == b"content-length": + self.length = int(val) + + def lineReceived(self, line): + """ + Parse the status line and headers for an HTTP request. + + @param line: Part of an HTTP request header. Request bodies are parsed + in L{HTTPClient.rawDataReceived}. + @type line: C{bytes} + """ + if self.firstLine: + self.firstLine = False + l = line.split(None, 2) + version = l[0] + status = l[1] + try: + message = l[2] + except IndexError: + # sometimes there is no message + message = b"" + self.handleStatus(version, status, message) + return + if not line: + if self._header != b"": + # Only extract headers if there are any + self.extractHeader(self._header) + self.__buffer = BytesIO() + self.handleEndHeaders() + self.setRawMode() + return + + if line.startswith(b"\t") or line.startswith(b" "): + # This line is part of a multiline header. According to RFC 822, in + # "unfolding" multiline headers you do not strip the leading + # whitespace on the continuing line. + self._header = self._header + line + elif self._header: + # This line starts a new header, so process the previous one. + self.extractHeader(self._header) + self._header = line + else: # First header + self._header = line + + def connectionLost(self, reason): + self.handleResponseEnd() + + def handleResponseEnd(self): + """ + The response has been completely received. + + This callback may be invoked more than once per request. + """ + if self.__buffer is not None: + b = self.__buffer.getvalue() + self.__buffer = None + self.handleResponse(b) + + def handleResponsePart(self, data): + self.__buffer.write(data) + + def connectionMade(self): + pass + + def handleStatus(self, version, status, message): + """ + Called when the status-line is received. + + @param version: e.g. 'HTTP/1.0' + @param status: e.g. '200' + @type status: C{bytes} + @param message: e.g. 'OK' + """ + + def handleHeader(self, key, val): + """ + Called every time a header is received. + """ + + def handleEndHeaders(self): + """ + Called when all headers have been received. + """ + + def rawDataReceived(self, data): + if self.length is not None: + data, rest = data[: self.length], data[self.length :] + self.length -= len(data) + else: + rest = b"" + self.handleResponsePart(data) + if self.length == 0: + self.handleResponseEnd() + self.setLineMode(rest) + + +# response codes that must have empty bodies +NO_BODY_CODES = (204, 304) + + +# Sentinel object that detects people explicitly passing `queued` to Request. +_QUEUED_SENTINEL = object() + + +def _getContentFile(length): + """ + Get a writeable file-like object to which request content can be written. + """ + if length is not None and length < 100000: + return BytesIO() + return tempfile.TemporaryFile() + + +_hostHeaderExpression = re.compile(rb"^\[?(?P<host>.*?)\]?(:\d+)?$") + + +@implementer(interfaces.IConsumer, _IDeprecatedHTTPChannelToRequestInterface) +class Request: + """ + A HTTP request. + + Subclasses should override the process() method to determine how + the request will be processed. + + @ivar method: The HTTP method that was used, e.g. C{b'GET'}. + @type method: L{bytes} + + @ivar uri: The full encoded URI which was requested (including query + arguments), e.g. C{b'/a/b%20/c?q=v'}. + @type uri: L{bytes} + + @ivar path: The encoded path of the request URI (not including query + arguments), e.g. C{b'/a/b%20/c'}. + @type path: L{bytes} + + @ivar args: A mapping of decoded query argument names as L{bytes} to + corresponding query argument values as L{list}s of L{bytes}. + For example, for a URI with C{foo=bar&foo=baz&quux=spam} + as its query part C{args} will be C{{b'foo': [b'bar', b'baz'], + b'quux': [b'spam']}}. + @type args: L{dict} of L{bytes} to L{list} of L{bytes} + + @ivar content: A file-like object giving the request body. This may be + a file on disk, an L{io.BytesIO}, or some other type. The + implementation is free to decide on a per-request basis. + @type content: L{typing.BinaryIO} + + @ivar cookies: The cookies that will be sent in the response. + @type cookies: L{list} of L{bytes} + + @type requestHeaders: L{http_headers.Headers} + @ivar requestHeaders: All received HTTP request headers. + + @type responseHeaders: L{http_headers.Headers} + @ivar responseHeaders: All HTTP response headers to be sent. + + @ivar notifications: A L{list} of L{Deferred}s which are waiting for + notification that the response to this request has been finished + (successfully or with an error). Don't use this attribute directly, + instead use the L{Request.notifyFinish} method. + + @ivar _disconnected: A flag which is C{False} until the connection over + which this request was received is closed and which is C{True} after + that. + @type _disconnected: L{bool} + + @ivar _log: A logger instance for request related messages. + @type _log: L{twisted.logger.Logger} + """ + + producer = None + finished = 0 + code = OK + code_message = RESPONSES[OK] + method = b"(no method yet)" + clientproto = b"(no clientproto yet)" + uri = b"(no uri yet)" + startedWriting = 0 + chunked = 0 + sentLength = 0 # content-length of response, or total bytes sent via chunking + etag = None + lastModified = None + args = None + path = None + content = None + _forceSSL = 0 + _disconnected = False + _log = Logger() + + def __init__(self, channel, queued=_QUEUED_SENTINEL): + """ + @param channel: the channel we're connected to. + @param queued: (deprecated) are we in the request queue, or can we + start writing to the transport? + """ + self.notifications: List[Deferred[None]] = [] + self.channel = channel + + # Cache the client and server information, we'll need this + # later to be serialized and sent with the request so CGIs + # will work remotely + self.client = self.channel.getPeer() + self.host = self.channel.getHost() + + self.requestHeaders: Headers = Headers() + self.received_cookies = {} + self.responseHeaders: Headers = Headers() + self.cookies = [] # outgoing cookies + self.transport = self.channel.transport + + if queued is _QUEUED_SENTINEL: + queued = False + + self.queued = queued + + def _cleanup(self): + """ + Called when have finished responding and are no longer queued. + """ + if self.producer: + self._log.failure( + "", + Failure(RuntimeError(f"Producer was not unregistered for {self.uri}")), + ) + self.unregisterProducer() + self.channel.requestDone(self) + del self.channel + if self.content is not None: + try: + self.content.close() + except OSError: + # win32 suckiness, no idea why it does this + pass + del self.content + for d in self.notifications: + d.callback(None) + self.notifications = [] + + # methods for channel - end users should not use these + + @deprecated(Version("Twisted", 16, 3, 0)) + def noLongerQueued(self): + """ + Notify the object that it is no longer queued. + + We start writing whatever data we have to the transport, etc. + + This method is not intended for users. + + In 16.3 this method was changed to become a no-op, as L{Request} + objects are now never queued. + """ + pass + + def gotLength(self, length): + """ + Called when HTTP channel got length of content in this request. + + This method is not intended for users. + + @param length: The length of the request body, as indicated by the + request headers. L{None} if the request headers do not indicate a + length. + """ + self.content = _getContentFile(length) + + def parseCookies(self): + """ + Parse cookie headers. + + This method is not intended for users. + """ + cookieheaders = self.requestHeaders.getRawHeaders(b"cookie") + + if cookieheaders is None: + return + + for cookietxt in cookieheaders: + if cookietxt: + for cook in cookietxt.split(b";"): + cook = cook.lstrip() + try: + k, v = cook.split(b"=", 1) + self.received_cookies[k] = v + except ValueError: + pass + + def handleContentChunk(self, data): + """ + Write a chunk of data. + + This method is not intended for users. + """ + self.content.write(data) + + def requestReceived(self, command, path, version): + """ + Called by channel when all data has been received. + + This method is not intended for users. + + @type command: C{bytes} + @param command: The HTTP verb of this request. This has the case + supplied by the client (eg, it maybe "get" rather than "GET"). + + @type path: C{bytes} + @param path: The URI of this request. + + @type version: C{bytes} + @param version: The HTTP version of this request. + """ + clength = self.content.tell() + self.content.seek(0, 0) + self.args = {} + + self.method, self.uri = command, path + self.clientproto = version + x = self.uri.split(b"?", 1) + + if len(x) == 1: + self.path = self.uri + else: + self.path, argstring = x + self.args = parse_qs(argstring, 1) + + # Argument processing + args = self.args + ctype = self.requestHeaders.getRawHeaders(b"content-type") + if ctype is not None: + ctype = ctype[0] + + if self.method == b"POST" and ctype and clength: + mfd = b"multipart/form-data" + key, pdict = _parseHeader(ctype) + # This weird CONTENT-LENGTH param is required by + # cgi.parse_multipart() in some versions of Python 3.7+, see + # bpo-29979. It looks like this will be relaxed and backported, see + # https://github.com/python/cpython/pull/8530. + pdict["CONTENT-LENGTH"] = clength + if key == b"application/x-www-form-urlencoded": + args.update(parse_qs(self.content.read(), 1)) + elif key == mfd: + try: + cgiArgs = cgi.parse_multipart( + self.content, + pdict, + encoding="utf8", + errors="surrogateescape", + ) + + # The parse_multipart function on Python 3.7+ + # decodes the header bytes as iso-8859-1 and + # decodes the body bytes as utf8 with + # surrogateescape -- we want bytes + self.args.update( + { + x.encode("iso-8859-1"): [ + z.encode("utf8", "surrogateescape") + if isinstance(z, str) + else z + for z in y + ] + for x, y in cgiArgs.items() + if isinstance(x, str) + } + ) + except Exception as e: + # It was a bad request, or we got a signal. + self.channel._respondToBadRequestAndDisconnect() + if isinstance(e, (TypeError, ValueError, KeyError)): + return + else: + # If it's not a userspace error from CGI, reraise + raise + + self.content.seek(0, 0) + + self.process() + + def __repr__(self) -> str: + """ + Return a string description of the request including such information + as the request method and request URI. + + @return: A string loosely describing this L{Request} object. + @rtype: L{str} + """ + return "<{} at 0x{:x} method={} uri={} clientproto={}>".format( + self.__class__.__name__, + id(self), + nativeString(self.method), + nativeString(self.uri), + nativeString(self.clientproto), + ) + + def process(self): + """ + Override in subclasses. + + This method is not intended for users. + """ + pass + + # consumer interface + + def registerProducer(self, producer, streaming): + """ + Register a producer. + """ + if self.producer: + raise ValueError( + "registering producer %s before previous one (%s) was " + "unregistered" % (producer, self.producer) + ) + + self.streamingProducer = streaming + self.producer = producer + self.channel.registerProducer(producer, streaming) + + def unregisterProducer(self): + """ + Unregister the producer. + """ + self.channel.unregisterProducer() + self.producer = None + + # The following is the public interface that people should be + # writing to. + def getHeader(self, key: AnyStr) -> Optional[AnyStr]: + """ + Get an HTTP request header. + + @type key: C{bytes} or C{str} + @param key: The name of the header to get the value of. + + @rtype: C{bytes} or C{str} or L{None} + @return: The value of the specified header, or L{None} if that header + was not present in the request. The string type of the result + matches the type of C{key}. + """ + value = self.requestHeaders.getRawHeaders(key) + if value is not None: + return value[-1] + return None + + def getCookie(self, key): + """ + Get a cookie that was sent from the network. + + @type key: C{bytes} + @param key: The name of the cookie to get. + + @rtype: C{bytes} or C{None} + @returns: The value of the specified cookie, or L{None} if that cookie + was not present in the request. + """ + return self.received_cookies.get(key) + + def notifyFinish(self) -> Deferred[None]: + """ + Notify when the response to this request has finished. + + @note: There are some caveats around the reliability of the delivery of + this notification. + + 1. If this L{Request}'s channel is paused, the notification + will not be delivered. This can happen in one of two ways; + either you can call C{request.transport.pauseProducing} + yourself, or, + + 2. In order to deliver this notification promptly when a client + disconnects, the reactor must continue reading from the + transport, so that it can tell when the underlying network + connection has gone away. Twisted Web will only keep + reading up until a finite (small) maximum buffer size before + it gives up and pauses the transport itself. If this + occurs, you will not discover that the connection has gone + away until a timeout fires or until the application attempts + to send some data via L{Request.write}. + + 3. It is theoretically impossible to distinguish between + successfully I{sending} a response and the peer successfully + I{receiving} it. There are several networking edge cases + where the L{Deferred}s returned by C{notifyFinish} will + indicate success, but the data will never be received. + There are also edge cases where the connection will appear + to fail, but in reality the response was delivered. As a + result, the information provided by the result of the + L{Deferred}s returned by this method should be treated as a + guess; do not make critical decisions in your applications + based upon it. + + @rtype: L{Deferred} + @return: A L{Deferred} which will be triggered when the request is + finished -- with a L{None} value if the request finishes + successfully or with an error if the request is interrupted by an + error (for example, the client closing the connection prematurely). + """ + self.notifications.append(Deferred()) + return self.notifications[-1] + + def finish(self): + """ + Indicate that all response data has been written to this L{Request}. + """ + if self._disconnected: + raise RuntimeError( + "Request.finish called on a request after its connection was lost; " + "use Request.notifyFinish to keep track of this." + ) + if self.finished: + warnings.warn("Warning! request.finish called twice.", stacklevel=2) + return + + if not self.startedWriting: + # write headers + self.write(b"") + + if self.chunked: + # write last chunk and closing CRLF + self.channel.write(b"0\r\n\r\n") + + # log request + if hasattr(self.channel, "factory") and self.channel.factory is not None: + self.channel.factory.log(self) + + self.finished = 1 + if not self.queued: + self._cleanup() + + def write(self, data): + """ + Write some data as a result of an HTTP request. The first + time this is called, it writes out response data. + + @type data: C{bytes} + @param data: Some bytes to be sent as part of the response body. + """ + if self.finished: + raise RuntimeError( + "Request.write called on a request after " "Request.finish was called." + ) + + if self._disconnected: + # Don't attempt to write any data to a disconnected client. + # The RuntimeError exception will be thrown as usual when + # request.finish is called + return + + if not self.startedWriting: + self.startedWriting = 1 + version = self.clientproto + code = b"%d" % (self.code,) + reason = self.code_message + headers = [] + + # if we don't have a content length, we send data in + # chunked mode, so that we can support pipelining in + # persistent connections. + if ( + (version == b"HTTP/1.1") + and (self.responseHeaders.getRawHeaders(b"content-length") is None) + and self.method != b"HEAD" + and self.code not in NO_BODY_CODES + ): + headers.append((b"Transfer-Encoding", b"chunked")) + self.chunked = 1 + + if self.lastModified is not None: + if self.responseHeaders.hasHeader(b"last-modified"): + self._log.info( + "Warning: last-modified specified both in" + " header list and lastModified attribute." + ) + else: + self.responseHeaders.setRawHeaders( + b"last-modified", [datetimeToString(self.lastModified)] + ) + + if self.etag is not None: + self.responseHeaders.setRawHeaders(b"ETag", [self.etag]) + + for name, values in self.responseHeaders.getAllRawHeaders(): + for value in values: + headers.append((name, value)) + + for cookie in self.cookies: + headers.append((b"Set-Cookie", cookie)) + + self.channel.writeHeaders(version, code, reason, headers) + + # if this is a "HEAD" request, we shouldn't return any data + if self.method == b"HEAD": + self.write = lambda data: None + return + + # for certain result codes, we should never return any data + if self.code in NO_BODY_CODES: + self.write = lambda data: None + return + + self.sentLength = self.sentLength + len(data) + if data: + if self.chunked: + self.channel.writeSequence(toChunk(data)) + else: + self.channel.write(data) + + def addCookie( + self, + k, + v, + expires=None, + domain=None, + path=None, + max_age=None, + comment=None, + secure=None, + httpOnly=False, + sameSite=None, + ): + """ + Set an outgoing HTTP cookie. + + In general, you should consider using sessions instead of cookies, see + L{twisted.web.server.Request.getSession} and the + L{twisted.web.server.Session} class for details. + + @param k: cookie name + @type k: L{bytes} or L{str} + + @param v: cookie value + @type v: L{bytes} or L{str} + + @param expires: cookie expire attribute value in + "Wdy, DD Mon YYYY HH:MM:SS GMT" format + @type expires: L{bytes} or L{str} + + @param domain: cookie domain + @type domain: L{bytes} or L{str} + + @param path: cookie path + @type path: L{bytes} or L{str} + + @param max_age: cookie expiration in seconds from reception + @type max_age: L{bytes} or L{str} + + @param comment: cookie comment + @type comment: L{bytes} or L{str} + + @param secure: direct browser to send the cookie on encrypted + connections only + @type secure: L{bool} + + @param httpOnly: direct browser not to expose cookies through channels + other than HTTP (and HTTPS) requests + @type httpOnly: L{bool} + + @param sameSite: One of L{None} (default), C{'lax'} or C{'strict'}. + Direct browsers not to send this cookie on cross-origin requests. + Please see: + U{https://tools.ietf.org/html/draft-west-first-party-cookies-07} + @type sameSite: L{None}, L{bytes} or L{str} + + @raise ValueError: If the value for C{sameSite} is not supported. + """ + + def _ensureBytes(val): + """ + Ensure that C{val} is bytes, encoding using UTF-8 if + needed. + + @param val: L{bytes} or L{str} + + @return: L{bytes} + """ + if val is None: + # It's None, so we don't want to touch it + return val + + if isinstance(val, bytes): + return val + else: + return val.encode("utf8") + + def _sanitize(val): + r""" + Replace linear whitespace (C{\r}, C{\n}, C{\r\n}) and + semicolons C{;} in C{val} with a single space. + + @param val: L{bytes} + @return: L{bytes} + """ + return _sanitizeLinearWhitespace(val).replace(b";", b" ") + + cookie = _sanitize(_ensureBytes(k)) + b"=" + _sanitize(_ensureBytes(v)) + if expires is not None: + cookie = cookie + b"; Expires=" + _sanitize(_ensureBytes(expires)) + if domain is not None: + cookie = cookie + b"; Domain=" + _sanitize(_ensureBytes(domain)) + if path is not None: + cookie = cookie + b"; Path=" + _sanitize(_ensureBytes(path)) + if max_age is not None: + cookie = cookie + b"; Max-Age=" + _sanitize(_ensureBytes(max_age)) + if comment is not None: + cookie = cookie + b"; Comment=" + _sanitize(_ensureBytes(comment)) + if secure: + cookie = cookie + b"; Secure" + if httpOnly: + cookie = cookie + b"; HttpOnly" + if sameSite: + sameSite = _ensureBytes(sameSite).lower() + if sameSite not in [b"lax", b"strict"]: + raise ValueError("Invalid value for sameSite: " + repr(sameSite)) + cookie += b"; SameSite=" + sameSite + self.cookies.append(cookie) + + def setResponseCode(self, code, message=None): + """ + Set the HTTP response code. + + @type code: L{int} + @type message: L{bytes} + """ + if not isinstance(code, int): + raise TypeError("HTTP response code must be int or long") + self.code = code + if message: + if not isinstance(message, bytes): + raise TypeError("HTTP response status message must be bytes") + self.code_message = message + else: + self.code_message = RESPONSES.get(code, b"Unknown Status") + + def setHeader(self, name, value): + """ + Set an HTTP response header. Overrides any previously set values for + this header. + + @type name: L{bytes} or L{str} + @param name: The name of the header for which to set the value. + + @type value: L{bytes} or L{str} + @param value: The value to set for the named header. A L{str} will be + UTF-8 encoded, which may not interoperable with other + implementations. Avoid passing non-ASCII characters if possible. + """ + self.responseHeaders.setRawHeaders(name, [value]) + + def redirect(self, url): + """ + Utility function that does a redirect. + + Set the response code to L{FOUND} and the I{Location} header to the + given URL. + + The request should have C{finish()} called after this. + + @param url: I{Location} header value. + @type url: L{bytes} or L{str} + """ + self.setResponseCode(FOUND) + self.setHeader(b"location", url) + + def setLastModified(self, when): + """ + Set the C{Last-Modified} time for the response to this request. + + If I am called more than once, I ignore attempts to set + Last-Modified earlier, only replacing the Last-Modified time + if it is to a later value. + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} if appropriate for the time given. + + @param when: The last time the resource being returned was + modified, in seconds since the epoch. + @type when: number + @return: If I am a I{If-Modified-Since} conditional request and + the time given is not newer than the condition, I return + L{http.CACHED<CACHED>} to indicate that you should write no + body. Otherwise, I return a false value. + """ + # time.time() may be a float, but the HTTP-date strings are + # only good for whole seconds. + when = int(math.ceil(when)) + if (not self.lastModified) or (self.lastModified < when): + self.lastModified = when + + modifiedSince = self.getHeader(b"if-modified-since") + if modifiedSince: + firstPart = modifiedSince.split(b";", 1)[0] + try: + modifiedSince = stringToDatetime(firstPart) + except ValueError: + return None + if modifiedSince >= self.lastModified: + self.setResponseCode(NOT_MODIFIED) + return CACHED + return None + + def setETag(self, etag): + """ + Set an C{entity tag} for the outgoing response. + + That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used + for comparing two or more entities from the same requested + resource.\" + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate + for the tag given. + + @param etag: The entity tag for the resource being returned. + @type etag: string + @return: If I am a C{If-None-Match} conditional request and + the tag matches one in the request, I return + L{http.CACHED<CACHED>} to indicate that you should write + no body. Otherwise, I return a false value. + """ + if etag: + self.etag = etag + + tags = self.getHeader(b"if-none-match") + if tags: + tags = tags.split() + if (etag in tags) or (b"*" in tags): + self.setResponseCode( + ((self.method in (b"HEAD", b"GET")) and NOT_MODIFIED) + or PRECONDITION_FAILED + ) + return CACHED + return None + + def getAllHeaders(self): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{self.requestHeaders.getAllRawHeaders()} may be preferred. + """ + headers = {} + for k, v in self.requestHeaders.getAllRawHeaders(): + headers[k.lower()] = v[-1] + return headers + + def getRequestHostname(self): + """ + Get the hostname that the HTTP client passed in to the request. + + @see: L{IRequest.getRequestHostname} + + @returns: the requested hostname + + @rtype: C{bytes} + """ + host = self.getHeader(b"host") + if host is not None: + match = _hostHeaderExpression.match(host) + if match is not None: + return match.group("host") + return networkString(self.getHost().host) + + def getHost(self): + """ + Get my originally requesting transport's host. + + Don't rely on the 'transport' attribute, since Request objects may be + copied remotely. For information on this method's return value, see + L{twisted.internet.tcp.Port}. + """ + return self.host + + def setHost(self, host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + This method is useful for working with reverse HTTP proxies (e.g. + both Squid and Apache's mod_proxy can do this), when the address + the HTTP client is using is different than the one we're listening on. + + For example, Apache may be listening on https://www.example.com/, and + then forwarding requests to http://localhost:8080/, but we don't want + HTML produced by Twisted to say b'http://localhost:8080/', they should + say b'https://www.example.com/', so we do:: + + request.setHost(b'www.example.com', 443, ssl=1) + + @type host: C{bytes} + @param host: The value to which to change the host header. + + @type ssl: C{bool} + @param ssl: A flag which, if C{True}, indicates that the request is + considered secure (if C{True}, L{isSecure} will return C{True}). + """ + self._forceSSL = ssl # set first so isSecure will work + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostHeader = host + else: + hostHeader = b"%b:%d" % (host, port) + self.requestHeaders.setRawHeaders(b"host", [hostHeader]) + self.host = address.IPv4Address("TCP", host, port) + + @deprecated(Version("Twisted", 18, 4, 0), replacement="getClientAddress") + def getClientIP(self): + """ + Return the IP address of the client who submitted this request. + + This method is B{deprecated}. Use L{getClientAddress} instead. + + @returns: the client IP address + @rtype: C{str} + """ + if isinstance(self.client, (address.IPv4Address, address.IPv6Address)): + return self.client.host + else: + return None + + def getClientAddress(self): + """ + Return the address of the client who submitted this request. + + This may not be a network address (e.g., a server listening on + a UNIX domain socket will cause this to return + L{UNIXAddress}). Callers must check the type of the returned + address. + + @since: 18.4 + + @return: the client's address. + @rtype: L{IAddress} + """ + return self.client + + def isSecure(self): + """ + Return L{True} if this request is using a secure transport. + + Normally this method returns L{True} if this request's L{HTTPChannel} + instance is using a transport that implements + L{interfaces.ISSLTransport}. + + This will also return L{True} if L{Request.setHost} has been called + with C{ssl=True}. + + @returns: L{True} if this request is secure + @rtype: C{bool} + """ + if self._forceSSL: + return True + channel = getattr(self, "channel", None) + if channel is None: + return False + return channel.isSecure() + + def _authorize(self): + # Authorization, (mostly) per the RFC + try: + authh = self.getHeader(b"Authorization") + if not authh: + self.user = self.password = b"" + return + bas, upw = authh.split() + if bas.lower() != b"basic": + raise ValueError() + upw = base64.b64decode(upw) + self.user, self.password = upw.split(b":", 1) + except (binascii.Error, ValueError): + self.user = self.password = b"" + except BaseException: + self._log.failure("") + self.user = self.password = b"" + + def getUser(self): + """ + Return the HTTP user sent with this request, if any. + + If no user was supplied, return the empty string. + + @returns: the HTTP user, if any + @rtype: C{bytes} + """ + try: + return self.user + except BaseException: + pass + self._authorize() + return self.user + + def getPassword(self): + """ + Return the HTTP password sent with this request, if any. + + If no password was supplied, return the empty string. + + @returns: the HTTP password, if any + @rtype: C{bytes} + """ + try: + return self.password + except BaseException: + pass + self._authorize() + return self.password + + def connectionLost(self, reason): + """ + There is no longer a connection for this request to respond over. + Clean up anything which can't be useful anymore. + """ + self._disconnected = True + self.channel = None + if self.content is not None: + self.content.close() + for d in self.notifications: + d.errback(reason) + self.notifications = [] + + def loseConnection(self): + """ + Pass the loseConnection through to the underlying channel. + """ + if self.channel is not None: + self.channel.loseConnection() + + def __eq__(self, other: object) -> bool: + """ + Determines if two requests are the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are the same object and L{False} + when not. + @rtype: L{bool} + """ + # When other is not an instance of request, return + # NotImplemented so that Python uses other.__eq__ to perform + # the comparison. This ensures that a Request proxy generated + # by proxyForInterface compares equal to an actual Request + # instanceby turning request != proxy into proxy != request. + if isinstance(other, Request): + return self is other + return NotImplemented + + def __hash__(self): + """ + A C{Request} is hashable so that it can be used as a mapping key. + + @return: A C{int} based on the instance's identity. + """ + return id(self) + + +class _DataLoss(Exception): + """ + L{_DataLoss} indicates that not all of a message body was received. This + is only one of several possible exceptions which may indicate that data + was lost. Because of this, it should not be checked for by + specifically; any unexpected exception should be treated as having + caused data loss. + """ + + +class PotentialDataLoss(Exception): + """ + L{PotentialDataLoss} may be raised by a transfer encoding decoder's + C{noMoreData} method to indicate that it cannot be determined if the + entire response body has been delivered. This only occurs when making + requests to HTTP servers which do not set I{Content-Length} or a + I{Transfer-Encoding} in the response because in this case the end of the + response is indicated by the connection being closed, an event which may + also be due to a transient network problem or other error. + """ + + +class _MalformedChunkedDataError(Exception): + """ + C{_ChunkedTransferDecoder} raises L{_MalformedChunkedDataError} from its + C{dataReceived} method when it encounters malformed data. This exception + indicates a client-side error. If this exception is raised, the connection + should be dropped with a 400 error. + """ + + +class _IdentityTransferDecoder: + """ + Protocol for accumulating bytes up to a specified length. This handles the + case where no I{Transfer-Encoding} is specified. + + @ivar contentLength: Counter keeping track of how many more bytes there are + to receive. + + @ivar dataCallback: A one-argument callable which will be invoked each + time application data is received. + + @ivar finishCallback: A one-argument callable which will be invoked when + the terminal chunk is received. It will be invoked with all bytes + which were delivered to this protocol which came after the terminal + chunk. + """ + + def __init__(self, contentLength, dataCallback, finishCallback): + self.contentLength = contentLength + self.dataCallback = dataCallback + self.finishCallback = finishCallback + + def dataReceived(self, data): + """ + Interpret the next chunk of bytes received. Either deliver them to the + data callback or invoke the finish callback if enough bytes have been + received. + + @raise RuntimeError: If the finish callback has already been invoked + during a previous call to this methood. + """ + if self.dataCallback is None: + raise RuntimeError( + "_IdentityTransferDecoder cannot decode data after finishing" + ) + + if self.contentLength is None: + self.dataCallback(data) + elif len(data) < self.contentLength: + self.contentLength -= len(data) + self.dataCallback(data) + else: + # Make the state consistent before invoking any code belonging to + # anyone else in case noMoreData ends up being called beneath this + # stack frame. + contentLength = self.contentLength + dataCallback = self.dataCallback + finishCallback = self.finishCallback + self.dataCallback = self.finishCallback = None + self.contentLength = 0 + + dataCallback(data[:contentLength]) + finishCallback(data[contentLength:]) + + def noMoreData(self): + """ + All data which will be delivered to this decoder has been. Check to + make sure as much data as was expected has been received. + + @raise PotentialDataLoss: If the content length is unknown. + @raise _DataLoss: If the content length is known and fewer than that + many bytes have been delivered. + + @return: L{None} + """ + finishCallback = self.finishCallback + self.dataCallback = self.finishCallback = None + if self.contentLength is None: + finishCallback(b"") + raise PotentialDataLoss() + elif self.contentLength != 0: + raise _DataLoss() + + +maxChunkSizeLineLength = 1024 + + +_chunkExtChars = ( + b"\t !\"#$%&'()*+,-./0123456789:;<=>?@" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`" + b"abcdefghijklmnopqrstuvwxyz{|}~" + b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" + b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" +) +""" +Characters that are valid in a chunk extension. + +See RFC 7230 section 4.1.1:: + + chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + + chunk-ext-name = token + chunk-ext-val = token / quoted-string + +And section 3.2.6:: + + token = 1*tchar + + tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + / DIGIT / ALPHA + ; any VCHAR, except delimiters + + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text + obs-text = %x80-FF + +We don't check if chunk extensions are well-formed beyond validating that they +don't contain characters outside this range. +""" + + +class _ChunkedTransferDecoder: + """ + Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230, + section 4.1. This protocol can interpret the contents of a request or + response body which uses the I{chunked} Transfer-Encoding. It cannot + interpret any of the rest of the HTTP protocol. + + It may make sense for _ChunkedTransferDecoder to be an actual IProtocol + implementation. Currently, the only user of this class will only ever + call dataReceived on it. However, it might be an improvement if the + user could connect this to a transport and deliver connection lost + notification. This way, `dataCallback` becomes `self.transport.write` + and perhaps `finishCallback` becomes `self.transport.loseConnection()` + (although I'm not sure where the extra data goes in that case). This + could also allow this object to indicate to the receiver of data that + the stream was not completely received, an error case which should be + noticed. -exarkun + + @ivar dataCallback: A one-argument callable which will be invoked each + time application data is received. This callback is not reentrant. + + @ivar finishCallback: A one-argument callable which will be invoked when + the terminal chunk is received. It will be invoked with all bytes + which were delivered to this protocol which came after the terminal + chunk. + + @ivar length: Counter keeping track of how many more bytes in a chunk there + are to receive. + + @ivar state: One of C{'CHUNK_LENGTH'}, C{'CRLF'}, C{'TRAILER'}, + C{'BODY'}, or C{'FINISHED'}. For C{'CHUNK_LENGTH'}, data for the + chunk length line is currently being read. For C{'CRLF'}, the CR LF + pair which follows each chunk is being read. For C{'TRAILER'}, the CR + LF pair which follows the terminal 0-length chunk is currently being + read. For C{'BODY'}, the contents of a chunk are being read. For + C{'FINISHED'}, the last chunk has been completely read and no more + input is valid. + + @ivar _buffer: Accumulated received data for the current state. At each + state transition this is truncated at the front so that index 0 is + where the next state shall begin. + + @ivar _start: While in the C{'CHUNK_LENGTH'} state, tracks the index into + the buffer at which search for CRLF should resume. Resuming the search + at this position avoids doing quadratic work if the chunk length line + arrives over many calls to C{dataReceived}. + + Not used in any other state. + """ + + state = "CHUNK_LENGTH" + + def __init__( + self, + dataCallback: Callable[[bytes], None], + finishCallback: Callable[[bytes], None], + ) -> None: + self.dataCallback = dataCallback + self.finishCallback = finishCallback + self._buffer = bytearray() + self._start = 0 + + def _dataReceived_CHUNK_LENGTH(self) -> bool: + """ + Read the chunk size line, ignoring any extensions. + + @returns: C{True} once the line has been read and removed from + C{self._buffer}. C{False} when more data is required. + + @raises _MalformedChunkedDataError: when the chunk size cannot be + decoded or the length of the line exceeds L{maxChunkSizeLineLength}. + """ + eolIndex = self._buffer.find(b"\r\n", self._start) + + if eolIndex >= maxChunkSizeLineLength or ( + eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength + ): + raise _MalformedChunkedDataError( + "Chunk size line exceeds maximum of {} bytes.".format( + maxChunkSizeLineLength + ) + ) + + if eolIndex == -1: + # Restart the search upon receipt of more data at the start of the + # new data, minus one in case the last character of the buffer is + # CR. + self._start = len(self._buffer) - 1 + return False + + endOfLengthIndex = self._buffer.find(b";", 0, eolIndex) + if endOfLengthIndex == -1: + endOfLengthIndex = eolIndex + rawLength = self._buffer[0:endOfLengthIndex] + try: + length = _hexint(rawLength) + except ValueError: + raise _MalformedChunkedDataError("Chunk-size must be an integer.") + + ext = self._buffer[endOfLengthIndex + 1 : eolIndex] + if ext and ext.translate(None, _chunkExtChars) != b"": + raise _MalformedChunkedDataError( + f"Invalid characters in chunk extensions: {ext!r}." + ) + + if length == 0: + self.state = "TRAILER" + else: + self.state = "BODY" + + self.length = length + del self._buffer[0 : eolIndex + 2] + self._start = 0 + return True + + def _dataReceived_CRLF(self) -> bool: + """ + Await the carriage return and line feed characters that are the end of + chunk marker that follow the chunk data. + + @returns: C{True} when the CRLF have been read, otherwise C{False}. + + @raises _MalformedChunkedDataError: when anything other than CRLF are + received. + """ + if len(self._buffer) < 2: + return False + + if not self._buffer.startswith(b"\r\n"): + raise _MalformedChunkedDataError("Chunk did not end with CRLF") + + self.state = "CHUNK_LENGTH" + del self._buffer[0:2] + return True + + def _dataReceived_TRAILER(self) -> bool: + """ + Await the carriage return and line feed characters that follow the + terminal zero-length chunk. Then invoke C{finishCallback} and switch to + state C{'FINISHED'}. + + @returns: C{False}, as there is either insufficient data to continue, + or no data remains. + + @raises _MalformedChunkedDataError: when anything other than CRLF is + received. + """ + if len(self._buffer) < 2: + return False + + if not self._buffer.startswith(b"\r\n"): + raise _MalformedChunkedDataError("Chunk did not end with CRLF") + + data = memoryview(self._buffer)[2:].tobytes() + del self._buffer[:] + self.state = "FINISHED" + self.finishCallback(data) + return False + + def _dataReceived_BODY(self) -> bool: + """ + Deliver any available chunk data to the C{dataCallback}. When all the + remaining data for the chunk arrives, switch to state C{'CRLF'}. + + @returns: C{True} to continue processing of any buffered data. + """ + if len(self._buffer) >= self.length: + chunk = memoryview(self._buffer)[: self.length].tobytes() + del self._buffer[: self.length] + self.state = "CRLF" + self.dataCallback(chunk) + else: + chunk = bytes(self._buffer) + self.length -= len(chunk) + del self._buffer[:] + self.dataCallback(chunk) + return True + + def _dataReceived_FINISHED(self) -> bool: + """ + Once C{finishCallback} has been invoked receipt of additional data + raises L{RuntimeError} because it represents a programming error in + the caller. + """ + raise RuntimeError( + "_ChunkedTransferDecoder.dataReceived called after last " + "chunk was processed" + ) + + def dataReceived(self, data: bytes) -> None: + """ + Interpret data from a request or response body which uses the + I{chunked} Transfer-Encoding. + """ + self._buffer += data + goOn = True + while goOn and self._buffer: + goOn = getattr(self, "_dataReceived_" + self.state)() + + def noMoreData(self) -> None: + """ + Verify that all data has been received. If it has not been, raise + L{_DataLoss}. + """ + if self.state != "FINISHED": + raise _DataLoss( + "Chunked decoder in %r state, still expecting more data to " + "get to 'FINISHED' state." % (self.state,) + ) + + +@implementer(interfaces.IPushProducer) +class _NoPushProducer: + """ + A no-op version of L{interfaces.IPushProducer}, used to abstract over the + possibility that a L{HTTPChannel} transport does not provide + L{IPushProducer}. + """ + + def pauseProducing(self): + """ + Pause producing data. + + Tells a producer that it has produced too much data to process for + the time being, and to stop until resumeProducing() is called. + """ + + def resumeProducing(self): + """ + Resume producing data. + + This tells a producer to re-add itself to the main loop and produce + more data for its consumer. + """ + + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + @param producer: The producer to register. + @param streaming: Whether this is a streaming producer or not. + """ + + def unregisterProducer(self): + """ + Stop consuming data from a producer, without disconnecting. + """ + + def stopProducing(self): + """ + IProducer.stopProducing + """ + + +@implementer(interfaces.ITransport, interfaces.IPushProducer, interfaces.IConsumer) +class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): + """ + A receiver for HTTP requests. + + The L{HTTPChannel} provides L{interfaces.ITransport} and + L{interfaces.IConsumer} to the L{Request} objects it creates. It also + implements L{interfaces.IPushProducer} to C{self.transport}, allowing the + transport to pause it. + + @ivar MAX_LENGTH: Maximum length for initial request line and each line + from the header. + + @ivar _transferDecoder: L{None} or a decoder instance if the request body + uses the I{chunked} Transfer-Encoding. + @type _transferDecoder: L{_ChunkedTransferDecoder} + + @ivar maxHeaders: Maximum number of headers allowed per request. + @type maxHeaders: C{int} + + @ivar totalHeadersSize: Maximum bytes for request line plus all headers + from the request. + @type totalHeadersSize: C{int} + + @ivar _receivedHeaderSize: Bytes received so far for the header. + @type _receivedHeaderSize: C{int} + + @ivar _handlingRequest: Whether a request is currently being processed. + @type _handlingRequest: L{bool} + + @ivar _dataBuffer: Any data that has been received from the connection + while processing an outstanding request. + @type _dataBuffer: L{list} of L{bytes} + + @ivar _networkProducer: Either the transport, if it provides + L{interfaces.IPushProducer}, or a null implementation of + L{interfaces.IPushProducer}. Used to attempt to prevent the transport + from producing excess data when we're responding to a request. + @type _networkProducer: L{interfaces.IPushProducer} + + @ivar _requestProducer: If the L{Request} object or anything it calls + registers itself as an L{interfaces.IProducer}, it will be stored here. + This is used to create a producing pipeline: pause/resume producing + methods will be propagated from the C{transport}, through the + L{HTTPChannel} instance, to the c{_requestProducer}. + + The reason we proxy through the producing methods rather than the old + behaviour (where we literally just set the L{Request} object as the + producer on the transport) is because we want to be able to exert + backpressure on the client to prevent it from sending in arbitrarily + many requests without ever reading responses. Essentially, if the + client never reads our responses we will eventually stop reading its + requests. + + @type _requestProducer: L{interfaces.IPushProducer} + + @ivar _requestProducerStreaming: A boolean that tracks whether the producer + on the L{Request} side of this channel has registered itself as a + L{interfaces.IPushProducer} or an L{interfaces.IPullProducer}. + @type _requestProducerStreaming: L{bool} or L{None} + + @ivar _waitingForTransport: A boolean that tracks whether the transport has + asked us to stop producing. This is used to keep track of what we're + waiting for: if the transport has asked us to stop producing then we + don't want to unpause the transport until it asks us to produce again. + @type _waitingForTransport: L{bool} + + @ivar abortTimeout: The number of seconds to wait after we attempt to shut + the transport down cleanly to give up and forcibly terminate it. This + is only used when we time a connection out, to prevent errors causing + the FD to get leaked. If this is L{None}, we will wait forever. + @type abortTimeout: L{int} + + @ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be + used to forcibly close the transport if it doesn't close cleanly. + @type _abortingCall: L{twisted.internet.base.DelayedCall} + + @ivar _optimisticEagerReadSize: When a resource takes a long time to answer + a request (via L{twisted.web.server.NOT_DONE_YET}, hopefully one day by + a L{Deferred}), we would like to be able to let that resource know + about the underlying transport disappearing as promptly as possible, + via L{Request.notifyFinish}, and therefore via + C{self.requests[...].connectionLost()} on this L{HTTPChannel}. + + However, in order to simplify application logic, we implement + head-of-line blocking, and do not relay pipelined requests to the + application until the previous request has been answered. This means + that said application cannot dispose of any entity-body that comes in + from those subsequent requests, which may be arbitrarily large, and it + may need to be buffered in memory. + + To implement this tradeoff between prompt notification when possible + (in the most frequent case of non-pipelined requests) and correct + behavior when not (say, if a client sends a very long-running GET + request followed by a PUT request with a very large body) we will + continue reading pipelined requests into C{self._dataBuffer} up to a + given limit. + + C{_optimisticEagerReadSize} is the number of bytes we will accept from + the client and buffer before pausing the transport. + + This behavior has been in place since Twisted 17.9.0 . + + @type _optimisticEagerReadSize: L{int} + """ + + maxHeaders = 500 + totalHeadersSize = 16384 + abortTimeout = 15 + + length = 0 + persistent = 1 + __header = b"" + __first_line = 1 + __content = None + + # set in instances or subclasses + requestFactory = Request + + _savedTimeOut = None + _receivedHeaderCount = 0 + _receivedHeaderSize = 0 + _requestProducer = None + _requestProducerStreaming = None + _waitingForTransport = False + _abortingCall = None + _optimisticEagerReadSize = 0x4000 + _log = Logger() + + def __init__(self): + # the request queue + self.requests = [] + self._handlingRequest = False + self._dataBuffer = [] + self._transferDecoder = None + + def connectionMade(self): + self.setTimeout(self.timeOut) + self._networkProducer = interfaces.IPushProducer( + self.transport, _NoPushProducer() + ) + self._networkProducer.registerProducer(self, True) + + def lineReceived(self, line): + """ + Called for each line from request until the end of headers when + it enters binary mode. + """ + self.resetTimeout() + + self._receivedHeaderSize += len(line) + if self._receivedHeaderSize > self.totalHeadersSize: + self._respondToBadRequestAndDisconnect() + return + + if self.__first_line: + # if this connection is not persistent, drop any data which + # the client (illegally) sent after the last request. + if not self.persistent: + self.dataReceived = self.lineReceived = lambda *args: None + return + + # IE sends an extraneous empty line (\r\n) after a POST request; + # eat up such a line, but only ONCE + if not line and self.__first_line == 1: + self.__first_line = 2 + return + + # create a new Request object + if INonQueuedRequestFactory.providedBy(self.requestFactory): + request = self.requestFactory(self) + else: + request = self.requestFactory(self, len(self.requests)) + self.requests.append(request) + + self.__first_line = 0 + + parts = line.split() + if len(parts) != 3: + self._respondToBadRequestAndDisconnect() + return + command, request, version = parts + try: + command.decode("ascii") + except UnicodeDecodeError: + self._respondToBadRequestAndDisconnect() + return + + self._command = command + self._path = request + self._version = version + elif line == b"": + # End of headers. + if self.__header: + ok = self.headerReceived(self.__header) + # If the last header we got is invalid, we MUST NOT proceed + # with processing. We'll have sent a 400 anyway, so just stop. + if not ok: + return + self.__header = b"" + self.allHeadersReceived() + if self.length == 0: + self.allContentReceived() + else: + self.setRawMode() + elif line[0] in b" \t": + # Continuation of a multi line header. + self.__header += b" " + line.lstrip(b" \t") + # Regular header line. + # Processing of header line is delayed to allow accumulating multi + # line headers. + else: + if self.__header: + self.headerReceived(self.__header) + self.__header = line + + def _finishRequestBody(self, data): + self.allContentReceived() + self._dataBuffer.append(data) + + def _maybeChooseTransferDecoder(self, header, data): + """ + If the provided header is C{content-length} or + C{transfer-encoding}, choose the appropriate decoder if any. + + Returns L{True} if the request can proceed and L{False} if not. + """ + + def fail(): + self._respondToBadRequestAndDisconnect() + self.length = None + return False + + # Can this header determine the length? + if header == b"content-length": + if not data.isdigit(): + return fail() + try: + length = int(data) + except ValueError: + return fail() + newTransferDecoder = _IdentityTransferDecoder( + length, self.requests[-1].handleContentChunk, self._finishRequestBody + ) + elif header == b"transfer-encoding": + # XXX Rather poorly tested code block, apparently only exercised by + # test_chunkedEncoding + if data.lower() == b"chunked": + length = None + newTransferDecoder = _ChunkedTransferDecoder( + self.requests[-1].handleContentChunk, self._finishRequestBody + ) + elif data.lower() == b"identity": + return True + else: + return fail() + else: + # It's not a length related header, so exit + return True + + if self._transferDecoder is not None: + return fail() + else: + self.length = length + self._transferDecoder = newTransferDecoder + return True + + def headerReceived(self, line): + """ + Do pre-processing (for content-length) and store this header away. + Enforce the per-request header limit. + + @type line: C{bytes} + @param line: A line from the header section of a request, excluding the + line delimiter. + + @return: A flag indicating whether the header was valid. + @rtype: L{bool} + """ + try: + header, data = line.split(b":", 1) + except ValueError: + self._respondToBadRequestAndDisconnect() + return False + + if not header or header[-1:].isspace(): + self._respondToBadRequestAndDisconnect() + return False + + header = header.lower() + data = data.strip(b" \t") + + if not self._maybeChooseTransferDecoder(header, data): + return False + + reqHeaders = self.requests[-1].requestHeaders + values = reqHeaders.getRawHeaders(header) + if values is not None: + values.append(data) + else: + reqHeaders.setRawHeaders(header, [data]) + + self._receivedHeaderCount += 1 + if self._receivedHeaderCount > self.maxHeaders: + self._respondToBadRequestAndDisconnect() + return False + + return True + + def allContentReceived(self): + command = self._command + path = self._path + version = self._version + + # reset ALL state variables, so we don't interfere with next request + self.length = 0 + self._receivedHeaderCount = 0 + self._receivedHeaderSize = 0 + self.__first_line = 1 + self._transferDecoder = None + del self._command, self._path, self._version + + # Disable the idle timeout, in case this request takes a long + # time to finish generating output. + if self.timeOut: + self._savedTimeOut = self.setTimeout(None) + + self._handlingRequest = True + + # We go into raw mode here even though we will be receiving lines next + # in the protocol; however, this data will be buffered and then passed + # back to line mode in the setLineMode call in requestDone. + self.setRawMode() + + req = self.requests[-1] + req.requestReceived(command, path, version) + + def rawDataReceived(self, data: bytes) -> None: + """ + This is called when this HTTP/1.1 parser is in raw mode rather than + line mode. + + It may be in raw mode for one of two reasons: + + 1. All the headers of a request have been received and this + L{HTTPChannel} is currently receiving its body. + + 2. The full content of a request has been received and is currently + being processed asynchronously, and this L{HTTPChannel} is + buffering the data of all subsequent requests to be parsed + later. + + In the second state, the data will be played back later. + + @note: This isn't really a public API, and should be invoked only by + L{LineReceiver}'s line parsing logic. If you wish to drive an + L{HTTPChannel} from a custom data source, call C{dataReceived} on + it directly. + + @see: L{LineReceive.rawDataReceived} + """ + if self._handlingRequest: + self._dataBuffer.append(data) + if ( + sum(map(len, self._dataBuffer)) > self._optimisticEagerReadSize + ) and not self._waitingForTransport: + # If we received more data than a small limit while processing + # the head-of-line request, apply TCP backpressure to our peer + # to get them to stop sending more request data until we're + # ready. See docstring for _optimisticEagerReadSize above. + self._networkProducer.pauseProducing() + return + + self.resetTimeout() + + try: + self._transferDecoder.dataReceived(data) + except _MalformedChunkedDataError: + self._respondToBadRequestAndDisconnect() + + def allHeadersReceived(self): + req = self.requests[-1] + req.parseCookies() + self.persistent = self.checkPersistence(req, self._version) + req.gotLength(self.length) + # Handle 'Expect: 100-continue' with automated 100 response code, + # a simplistic implementation of RFC 2686 8.2.3: + expectContinue = req.requestHeaders.getRawHeaders(b"expect") + if ( + expectContinue + and expectContinue[0].lower() == b"100-continue" + and self._version == b"HTTP/1.1" + ): + self._send100Continue() + + def checkPersistence(self, request, version): + """ + Check if the channel should close or not. + + @param request: The request most recently received over this channel + against which checks will be made to determine if this connection + can remain open after a matching response is returned. + + @type version: C{bytes} + @param version: The version of the request. + + @rtype: C{bool} + @return: A flag which, if C{True}, indicates that this connection may + remain open to receive another request; if C{False}, the connection + must be closed in order to indicate the completion of the response + to C{request}. + """ + connection = request.requestHeaders.getRawHeaders(b"connection") + if connection: + tokens = [t.lower() for t in connection[0].split(b" ")] + else: + tokens = [] + + # Once any HTTP 0.9 or HTTP 1.0 request is received, the connection is + # no longer allowed to be persistent. At this point in processing the + # request, we don't yet know if it will be possible to set a + # Content-Length in the response. If it is not, then the connection + # will have to be closed to end an HTTP 0.9 or HTTP 1.0 response. + + # If the checkPersistence call happened later, after the Content-Length + # has been determined (or determined not to be set), it would probably + # be possible to have persistent connections with HTTP 0.9 and HTTP 1.0. + # This may not be worth the effort, though. Just use HTTP 1.1, okay? + + if version == b"HTTP/1.1": + if b"close" in tokens: + request.responseHeaders.setRawHeaders(b"connection", [b"close"]) + return False + else: + return True + else: + return False + + def requestDone(self, request): + """ + Called by first request in queue when it is done. + """ + if request != self.requests[0]: + raise TypeError + del self.requests[0] + + # We should only resume the producer if we're not waiting for the + # transport. + if not self._waitingForTransport: + self._networkProducer.resumeProducing() + + if self.persistent: + self._handlingRequest = False + + if self._savedTimeOut: + self.setTimeout(self._savedTimeOut) + + # Receive our buffered data, if any. + data = b"".join(self._dataBuffer) + self._dataBuffer = [] + self.setLineMode(data) + else: + self.loseConnection() + + def timeoutConnection(self): + self._log.info("Timing out client: {peer}", peer=str(self.transport.getPeer())) + if self.abortTimeout is not None: + # We use self.callLater because that's what TimeoutMixin does. + self._abortingCall = self.callLater( + self.abortTimeout, self.forceAbortClient + ) + self.loseConnection() + + def forceAbortClient(self): + """ + Called if C{abortTimeout} seconds have passed since the timeout fired, + and the connection still hasn't gone away. This can really only happen + on extremely bad connections or when clients are maliciously attempting + to keep connections open. + """ + self._log.info( + "Forcibly timing out client: {peer}", peer=str(self.transport.getPeer()) + ) + # We want to lose track of the _abortingCall so that no-one tries to + # cancel it. + self._abortingCall = None + self.transport.abortConnection() + + def connectionLost(self, reason): + self.setTimeout(None) + for request in self.requests: + request.connectionLost(reason) + + # If we were going to force-close the transport, we don't have to now. + if self._abortingCall is not None: + self._abortingCall.cancel() + self._abortingCall = None + + def isSecure(self): + """ + Return L{True} if this channel is using a secure transport. + + Normally this method returns L{True} if this instance is using a + transport that implements L{interfaces.ISSLTransport}. + + @returns: L{True} if this request is secure + @rtype: C{bool} + """ + if interfaces.ISSLTransport(self.transport, None) is not None: + return True + return False + + def writeHeaders(self, version, code, reason, headers): + """ + Called by L{Request} objects to write a complete set of HTTP headers to + a transport. + + @param version: The HTTP version in use. + @type version: L{bytes} + + @param code: The HTTP status code to write. + @type code: L{bytes} + + @param reason: The HTTP reason phrase to write. + @type reason: L{bytes} + + @param headers: The headers to write to the transport. + @type headers: L{twisted.web.http_headers.Headers} + """ + sanitizedHeaders = Headers() + for name, value in headers: + sanitizedHeaders.addRawHeader(name, value) + + responseLine = version + b" " + code + b" " + reason + b"\r\n" + headerSequence = [responseLine] + headerSequence.extend( + name + b": " + value + b"\r\n" + for name, values in sanitizedHeaders.getAllRawHeaders() + for value in values + ) + headerSequence.append(b"\r\n") + self.transport.writeSequence(headerSequence) + + def write(self, data): + """ + Called by L{Request} objects to write response data. + + @param data: The data chunk to write to the stream. + @type data: L{bytes} + + @return: L{None} + """ + self.transport.write(data) + + def writeSequence(self, iovec): + """ + Write a list of strings to the HTTP response. + + @param iovec: A list of byte strings to write to the stream. + @type iovec: L{list} of L{bytes} + + @return: L{None} + """ + self.transport.writeSequence(iovec) + + def getPeer(self): + """ + Get the remote address of this connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getPeer() + + def getHost(self): + """ + Get the local address of this connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getHost() + + def loseConnection(self): + """ + Closes the connection. Will write any data that is pending to be sent + on the network, but if this response has not yet been written to the + network will not write anything. + + @return: L{None} + """ + self._networkProducer.unregisterProducer() + return self.transport.loseConnection() + + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + This sets self to be a consumer for a producer. When this object runs + out of data (as when a send(2) call on a socket succeeds in moving the + last data from a userspace buffer into a kernelspace buffer), it will + ask the producer to resumeProducing(). + + For L{IPullProducer} providers, C{resumeProducing} will be called once + each time data is required. + + For L{IPushProducer} providers, C{pauseProducing} will be called + whenever the write buffer fills up and C{resumeProducing} will only be + called when it empties. + + @type producer: L{IProducer} provider + @param producer: The L{IProducer} that will be producing data. + + @type streaming: L{bool} + @param streaming: C{True} if C{producer} provides L{IPushProducer}, + C{False} if C{producer} provides L{IPullProducer}. + + @raise RuntimeError: If a producer is already registered. + + @return: L{None} + """ + if self._requestProducer is not None: + raise RuntimeError( + "Cannot register producer %s, because producer %s was never " + "unregistered." % (producer, self._requestProducer) + ) + + if not streaming: + producer = _PullToPush(producer, self) + + self._requestProducer = producer + self._requestProducerStreaming = streaming + + if not streaming: + producer.startStreaming() + + def unregisterProducer(self): + """ + Stop consuming data from a producer, without disconnecting. + + @return: L{None} + """ + if self._requestProducer is None: + return + + if not self._requestProducerStreaming: + self._requestProducer.stopStreaming() + + self._requestProducer = None + self._requestProducerStreaming = None + + def stopProducing(self): + """ + Stop producing data. + + The HTTPChannel doesn't *actually* implement this, beacuse the + assumption is that it will only be called just before C{loseConnection} + is called. There's nothing sensible we can do other than call + C{loseConnection} anyway. + """ + if self._requestProducer is not None: + self._requestProducer.stopProducing() + + def pauseProducing(self): + """ + Pause producing data. + + This will be called by the transport when the send buffers have been + filled up. We want to simultaneously pause the producing L{Request} + object and also pause our transport. + + The logic behind pausing the transport is specifically to avoid issues + like https://twistedmatrix.com/trac/ticket/8868. In this case, our + inability to send does not prevent us handling more requests, which + means we increasingly queue up more responses in our send buffer + without end. The easiest way to handle this is to ensure that if we are + unable to send our responses, we will not read further data from the + connection until the client pulls some data out. This is a bit of a + blunt instrument, but it's ok. + + Note that this potentially interacts with timeout handling in a + positive way. Once the transport is paused the client may run into a + timeout which will cause us to tear the connection down. That's a good + thing! + """ + self._waitingForTransport = True + + # The first step is to tell any producer we might currently have + # registered to stop producing. If we can slow our applications down + # we should. + if self._requestProducer is not None: + self._requestProducer.pauseProducing() + + # The next step here is to pause our own transport, as discussed in the + # docstring. + if not self._handlingRequest: + self._networkProducer.pauseProducing() + + def resumeProducing(self): + """ + Resume producing data. + + This will be called by the transport when the send buffer has dropped + enough to actually send more data. When this happens we can unpause any + outstanding L{Request} producers we have, and also unpause our + transport. + """ + self._waitingForTransport = False + + if self._requestProducer is not None: + self._requestProducer.resumeProducing() + + # We only want to resume the network producer if we're not currently + # waiting for a response to show up. + if not self._handlingRequest: + self._networkProducer.resumeProducing() + + def _send100Continue(self): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + """ + self.transport.write(b"HTTP/1.1 100 Continue\r\n\r\n") + + def _respondToBadRequestAndDisconnect(self): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + """ + self.transport.write(b"HTTP/1.1 400 Bad Request\r\n\r\n") + self.loseConnection() + + +def _escape(s): + """ + Return a string like python repr, but always escaped as if surrounding + quotes were double quotes. + + @param s: The string to escape. + @type s: L{bytes} or L{str} + + @return: An escaped string. + @rtype: L{str} + """ + if not isinstance(s, bytes): + s = s.encode("ascii") + + r = repr(s) + if not isinstance(r, str): + r = r.decode("ascii") + if r.startswith("b"): + r = r[1:] + if r.startswith("'"): + return r[1:-1].replace('"', '\\"').replace("\\'", "'") + return r[1:-1] + + +@provider(IAccessLogFormatter) +def combinedLogFormatter(timestamp, request): + """ + @return: A combined log formatted log line for the given request. + + @see: L{IAccessLogFormatter} + """ + clientAddr = request.getClientAddress() + if isinstance( + clientAddr, (address.IPv4Address, address.IPv6Address, _XForwardedForAddress) + ): + ip = clientAddr.host + else: + ip = b"-" + referrer = _escape(request.getHeader(b"referer") or b"-") + agent = _escape(request.getHeader(b"user-agent") or b"-") + line = ( + '"%(ip)s" - - %(timestamp)s "%(method)s %(uri)s %(protocol)s" ' + '%(code)d %(length)s "%(referrer)s" "%(agent)s"' + % dict( + ip=_escape(ip), + timestamp=timestamp, + method=_escape(request.method), + uri=_escape(request.uri), + protocol=_escape(request.clientproto), + code=request.code, + length=request.sentLength or "-", + referrer=referrer, + agent=agent, + ) + ) + return line + + +@implementer(interfaces.IAddress) +class _XForwardedForAddress: + """ + L{IAddress} which represents the client IP to log for a request, as gleaned + from an X-Forwarded-For header. + + @ivar host: An IP address or C{b"-"}. + @type host: L{bytes} + + @see: L{proxiedLogFormatter} + """ + + def __init__(self, host): + self.host = host + + +class _XForwardedForRequest(proxyForInterface(IRequest, "_request")): # type: ignore[misc] + """ + Add a layer on top of another request that only uses the value of an + X-Forwarded-For header as the result of C{getClientAddress}. + """ + + def getClientAddress(self): + """ + The client address (the first address) in the value of the + I{X-Forwarded-For header}. If the header is not present, the IP is + considered to be C{b"-"}. + + @return: L{_XForwardedForAddress} which wraps the client address as + expected by L{combinedLogFormatter}. + """ + host = ( + self._request.requestHeaders.getRawHeaders(b"x-forwarded-for", [b"-"])[0] + .split(b",")[0] + .strip() + ) + return _XForwardedForAddress(host) + + # These are missing from the interface. Forward them manually. + @property + def clientproto(self): + """ + @return: The protocol version in the request. + @rtype: L{bytes} + """ + return self._request.clientproto + + @property + def code(self): + """ + @return: The response code for the request. + @rtype: L{int} + """ + return self._request.code + + @property + def sentLength(self): + """ + @return: The number of bytes sent in the response body. + @rtype: L{int} + """ + return self._request.sentLength + + +@provider(IAccessLogFormatter) +def proxiedLogFormatter(timestamp, request): + """ + @return: A combined log formatted log line for the given request but use + the value of the I{X-Forwarded-For} header as the value for the client + IP address. + + @see: L{IAccessLogFormatter} + """ + return combinedLogFormatter(timestamp, _XForwardedForRequest(request)) + + +class _GenericHTTPChannelProtocol(proxyForInterface(IProtocol, "_channel")): # type: ignore[misc] + """ + A proxy object that wraps one of the HTTP protocol objects, and switches + between them depending on TLS negotiated protocol. + + @ivar _negotiatedProtocol: The protocol negotiated with ALPN or NPN, if + any. + @type _negotiatedProtocol: Either a bytestring containing the ALPN token + for the negotiated protocol, or L{None} if no protocol has yet been + negotiated. + + @ivar _channel: The object capable of behaving like a L{HTTPChannel} that + is backing this object. By default this is a L{HTTPChannel}, but if a + HTTP protocol upgrade takes place this may be a different channel + object. Must implement L{IProtocol}. + @type _channel: L{HTTPChannel} + + @ivar _requestFactory: A callable to use to build L{IRequest} objects. + @type _requestFactory: L{IRequest} + + @ivar _site: A reference to the creating L{twisted.web.server.Site} object. + @type _site: L{twisted.web.server.Site} + + @ivar _factory: A reference to the creating L{HTTPFactory} object. + @type _factory: L{HTTPFactory} + + @ivar _timeOut: A timeout value to pass to the backing channel. + @type _timeOut: L{int} or L{None} + + @ivar _callLater: A value for the C{callLater} callback. + @type _callLater: L{callable} + """ + + _negotiatedProtocol = None + _requestFactory = Request + _factory = None + _site = None + _timeOut = None + _callLater = None + + @property + def factory(self): + """ + @see: L{_genericHTTPChannelProtocolFactory} + """ + return self._channel.factory + + @factory.setter + def factory(self, value): + self._factory = value + self._channel.factory = value + + @property + def requestFactory(self): + """ + A callable to use to build L{IRequest} objects. + + Retries the object from the current backing channel. + """ + return self._channel.requestFactory + + @requestFactory.setter + def requestFactory(self, value): + """ + A callable to use to build L{IRequest} objects. + + Sets the object on the backing channel and also stores the value for + propagation to any new channel. + + @param value: The new callable to use. + @type value: A L{callable} returning L{IRequest} + """ + self._requestFactory = value + self._channel.requestFactory = value + + @property + def site(self): + """ + A reference to the creating L{twisted.web.server.Site} object. + + Returns the site object from the backing channel. + """ + return self._channel.site + + @site.setter + def site(self, value): + """ + A reference to the creating L{twisted.web.server.Site} object. + + Sets the object on the backing channel and also stores the value for + propagation to any new channel. + + @param value: The L{twisted.web.server.Site} object to set. + @type value: L{twisted.web.server.Site} + """ + self._site = value + self._channel.site = value + + @property + def timeOut(self): + """ + The idle timeout for the backing channel. + """ + return self._channel.timeOut + + @timeOut.setter + def timeOut(self, value): + """ + The idle timeout for the backing channel. + + Sets the idle timeout on both the backing channel and stores it for + propagation to any new backing channel. + + @param value: The timeout to set. + @type value: L{int} or L{float} + """ + self._timeOut = value + self._channel.timeOut = value + + @property + def callLater(self): + """ + A value for the C{callLater} callback. This callback is used by the + L{twisted.protocols.policies.TimeoutMixin} to handle timeouts. + """ + return self._channel.callLater + + @callLater.setter + def callLater(self, value): + """ + Sets the value for the C{callLater} callback. This callback is used by + the L{twisted.protocols.policies.TimeoutMixin} to handle timeouts. + + @param value: The new callback to use. + @type value: L{callable} + """ + self._callLater = value + self._channel.callLater = value + + def dataReceived(self, data): + """ + An override of L{IProtocol.dataReceived} that checks what protocol we're + using. + """ + if self._negotiatedProtocol is None: + try: + negotiatedProtocol = self._channel.transport.negotiatedProtocol + except AttributeError: + # Plaintext HTTP, always HTTP/1.1 + negotiatedProtocol = b"http/1.1" + + if negotiatedProtocol is None: + negotiatedProtocol = b"http/1.1" + + if negotiatedProtocol == b"h2": + if not H2_ENABLED: + raise ValueError("Negotiated HTTP/2 without support.") + + # We need to make sure that the HTTPChannel is unregistered + # from the transport so that the H2Connection can register + # itself if possible. + networkProducer = self._channel._networkProducer + networkProducer.unregisterProducer() + + # Cancel the old channel's timeout. + self._channel.setTimeout(None) + + transport = self._channel.transport + self._channel = H2Connection() + self._channel.requestFactory = self._requestFactory + self._channel.site = self._site + self._channel.factory = self._factory + self._channel.timeOut = self._timeOut + self._channel.callLater = self._callLater + self._channel.makeConnection(transport) + + # Register the H2Connection as the transport's + # producer, so that the transport can apply back + # pressure. + networkProducer.registerProducer(self._channel, True) + else: + # Only HTTP/2 and HTTP/1.1 are supported right now. + assert ( + negotiatedProtocol == b"http/1.1" + ), "Unsupported protocol negotiated" + + self._negotiatedProtocol = negotiatedProtocol + + return self._channel.dataReceived(data) + + +def _genericHTTPChannelProtocolFactory(self): + """ + Returns an appropriately initialized _GenericHTTPChannelProtocol. + """ + return _GenericHTTPChannelProtocol(HTTPChannel()) + + +class HTTPFactory(protocol.ServerFactory): + """ + Factory for HTTP server. + + @ivar _logDateTime: A cached datetime string for log messages, updated by + C{_logDateTimeCall}. + @type _logDateTime: C{str} + + @ivar _logDateTimeCall: A delayed call for the next update to the cached + log datetime string. + @type _logDateTimeCall: L{IDelayedCall} provided + + @ivar _logFormatter: See the C{logFormatter} parameter to L{__init__} + + @ivar _nativeize: A flag that indicates whether the log file being written + to wants native strings (C{True}) or bytes (C{False}). This is only to + support writing to L{twisted.python.log} which, unfortunately, works + with native strings. + + @ivar reactor: An L{IReactorTime} provider used to manage connection + timeouts and compute logging timestamps. + """ + + # We need to ignore the mypy error here, because + # _genericHTTPChannelProtocolFactory is a callable which returns a proxy + # to a Protocol, instead of a concrete Protocol object, as expected in + # the protocol.Factory interface + protocol = _genericHTTPChannelProtocolFactory # type: ignore[assignment] + + logPath = None + + timeOut = _REQUEST_TIMEOUT + + def __init__( + self, logPath=None, timeout=_REQUEST_TIMEOUT, logFormatter=None, reactor=None + ): + """ + @param logPath: File path to which access log messages will be written + or C{None} to disable logging. + @type logPath: L{str} or L{bytes} + + @param timeout: The initial value of L{timeOut}, which defines the idle + connection timeout in seconds, or C{None} to disable the idle + timeout. + @type timeout: L{float} + + @param logFormatter: An object to format requests into log lines for + the access log. L{combinedLogFormatter} when C{None} is passed. + @type logFormatter: L{IAccessLogFormatter} provider + + @param reactor: An L{IReactorTime} provider used to manage connection + timeouts and compute logging timestamps. Defaults to the global + reactor. + """ + if not reactor: + from twisted.internet import reactor + self.reactor = reactor + + if logPath is not None: + logPath = os.path.abspath(logPath) + self.logPath = logPath + self.timeOut = timeout + if logFormatter is None: + logFormatter = combinedLogFormatter + self._logFormatter = logFormatter + + # For storing the cached log datetime and the callback to update it + self._logDateTime = None + self._logDateTimeCall = None + + def _updateLogDateTime(self): + """ + Update log datetime periodically, so we aren't always recalculating it. + """ + self._logDateTime = datetimeToLogString(self.reactor.seconds()) + self._logDateTimeCall = self.reactor.callLater(1, self._updateLogDateTime) + + def buildProtocol(self, addr): + p = protocol.ServerFactory.buildProtocol(self, addr) + + # This is a bit of a hack to ensure that the HTTPChannel timeouts + # occur on the same reactor as the one we're using here. This could + # ideally be resolved by passing the reactor more generally to the + # HTTPChannel, but that won't work for the TimeoutMixin until we fix + # https://twistedmatrix.com/trac/ticket/8488 + p.callLater = self.reactor.callLater + + # timeOut needs to be on the Protocol instance cause + # TimeoutMixin expects it there + p.timeOut = self.timeOut + return p + + def startFactory(self): + """ + Set up request logging if necessary. + """ + if self._logDateTimeCall is None: + self._updateLogDateTime() + + if self.logPath: + self.logFile = self._openLogFile(self.logPath) + else: + self.logFile = log.logfile + + def stopFactory(self): + if hasattr(self, "logFile"): + if self.logFile != log.logfile: + self.logFile.close() + del self.logFile + + if self._logDateTimeCall is not None and self._logDateTimeCall.active(): + self._logDateTimeCall.cancel() + self._logDateTimeCall = None + + def _openLogFile(self, path): + """ + Override in subclasses, e.g. to use L{twisted.python.logfile}. + """ + f = open(path, "ab", 1) + return f + + def log(self, request): + """ + Write a line representing C{request} to the access log file. + + @param request: The request object about which to log. + @type request: L{Request} + """ + try: + logFile = self.logFile + except AttributeError: + pass + else: + line = self._logFormatter(self._logDateTime, request) + "\n" + logFile.write(line.encode("utf8")) diff --git a/contrib/python/Twisted/py3/twisted/web/http_headers.py b/contrib/python/Twisted/py3/twisted/web/http_headers.py new file mode 100644 index 0000000000..f810f4bc2c --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/http_headers.py @@ -0,0 +1,295 @@ +# -*- test-case-name: twisted.web.test.test_http_headers -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An API for storing HTTP header names and values. +""" + +from collections.abc import Sequence as _Sequence +from typing import ( + AnyStr, + Dict, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + overload, +) + +from twisted.python.compat import cmp, comparable + +_T = TypeVar("_T") + + +def _dashCapitalize(name: bytes) -> bytes: + """ + Return a byte string which is capitalized using '-' as a word separator. + + @param name: The name of the header to capitalize. + + @return: The given header capitalized using '-' as a word separator. + """ + return b"-".join([word.capitalize() for word in name.split(b"-")]) + + +def _sanitizeLinearWhitespace(headerComponent: bytes) -> bytes: + r""" + Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key + or value with a single space. + + @param headerComponent: The header key or value to sanitize. + + @return: The sanitized header key or value. + """ + return b" ".join(headerComponent.splitlines()) + + +@comparable +class Headers: + """ + Stores HTTP headers in a key and multiple value format. + + When passed L{str}, header names (e.g. 'Content-Type') + are encoded using ISO-8859-1 and header values (e.g. + 'text/html;charset=utf-8') are encoded using UTF-8. Some methods that return + values will return them in the same type as the name given. + + If the header keys or values cannot be encoded or decoded using the rules + above, using just L{bytes} arguments to the methods of this class will + ensure no decoding or encoding is done, and L{Headers} will treat the keys + and values as opaque byte strings. + + @cvar _caseMappings: A L{dict} that maps lowercase header names + to their canonicalized representation. + + @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of + header values as L{bytes}. + """ + + _caseMappings = { + b"content-md5": b"Content-MD5", + b"dnt": b"DNT", + b"etag": b"ETag", + b"p3p": b"P3P", + b"te": b"TE", + b"www-authenticate": b"WWW-Authenticate", + b"x-xss-protection": b"X-XSS-Protection", + } + + def __init__( + self, + rawHeaders: Optional[Mapping[AnyStr, Sequence[AnyStr]]] = None, + ) -> None: + self._rawHeaders: Dict[bytes, List[bytes]] = {} + if rawHeaders is not None: + for name, values in rawHeaders.items(): + self.setRawHeaders(name, values) + + def __repr__(self) -> str: + """ + Return a string fully describing the headers set on this object. + """ + return "{}({!r})".format( + self.__class__.__name__, + self._rawHeaders, + ) + + def __cmp__(self, other): + """ + Define L{Headers} instances as being equal to each other if they have + the same raw headers. + """ + if isinstance(other, Headers): + return cmp( + sorted(self._rawHeaders.items()), sorted(other._rawHeaders.items()) + ) + return NotImplemented + + def _encodeName(self, name: Union[str, bytes]) -> bytes: + """ + Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded + bytestring if required. + + @param name: A HTTP header name + + @return: C{name}, encoded if required, lowercased + """ + if isinstance(name, str): + return name.lower().encode("iso-8859-1") + return name.lower() + + def copy(self): + """ + Return a copy of itself with the same headers set. + + @return: A new L{Headers} + """ + return self.__class__(self._rawHeaders) + + def hasHeader(self, name: AnyStr) -> bool: + """ + Check for the existence of a given header. + + @param name: The name of the HTTP header to check for. + + @return: C{True} if the header exists, otherwise C{False}. + """ + return self._encodeName(name) in self._rawHeaders + + def removeHeader(self, name: AnyStr) -> None: + """ + Remove the named header from this header object. + + @param name: The name of the HTTP header to remove. + + @return: L{None} + """ + self._rawHeaders.pop(self._encodeName(name), None) + + @overload + def setRawHeaders(self, name: Union[str, bytes], values: Sequence[bytes]) -> None: + ... + + @overload + def setRawHeaders(self, name: Union[str, bytes], values: Sequence[str]) -> None: + ... + + @overload + def setRawHeaders( + self, name: Union[str, bytes], values: Sequence[Union[str, bytes]] + ) -> None: + ... + + def setRawHeaders(self, name: Union[str, bytes], values: object) -> None: + """ + Sets the raw representation of the given header. + + @param name: The name of the HTTP header to set the values for. + + @param values: A list of strings each one being a header value of + the given name. + + @raise TypeError: Raised if C{values} is not a sequence of L{bytes} + or L{str}, or if C{name} is not L{bytes} or L{str}. + + @return: L{None} + """ + if not isinstance(values, _Sequence): + raise TypeError( + "Header entry %r should be sequence but found " + "instance of %r instead" % (name, type(values)) + ) + + if not isinstance(name, (bytes, str)): + raise TypeError( + f"Header name is an instance of {type(name)!r}, not bytes or str" + ) + + for count, value in enumerate(values): + if not isinstance(value, (bytes, str)): + raise TypeError( + "Header value at position %s is an instance of %r, not " + "bytes or str" + % ( + count, + type(value), + ) + ) + + _name = _sanitizeLinearWhitespace(self._encodeName(name)) + encodedValues: List[bytes] = [] + for v in values: + if isinstance(v, str): + _v = v.encode("utf8") + else: + _v = v + encodedValues.append(_sanitizeLinearWhitespace(_v)) + + self._rawHeaders[_name] = encodedValues + + def addRawHeader(self, name: Union[str, bytes], value: Union[str, bytes]) -> None: + """ + Add a new raw value for the given header. + + @param name: The name of the header for which to set the value. + + @param value: The value to set for the named header. + """ + if not isinstance(name, (bytes, str)): + raise TypeError( + f"Header name is an instance of {type(name)!r}, not bytes or str" + ) + + if not isinstance(value, (bytes, str)): + raise TypeError( + "Header value is an instance of %r, not " + "bytes or str" % (type(value),) + ) + + self._rawHeaders.setdefault( + _sanitizeLinearWhitespace(self._encodeName(name)), [] + ).append( + _sanitizeLinearWhitespace( + value.encode("utf8") if isinstance(value, str) else value + ) + ) + + @overload + def getRawHeaders(self, name: AnyStr) -> Optional[Sequence[AnyStr]]: + ... + + @overload + def getRawHeaders(self, name: AnyStr, default: _T) -> Union[Sequence[AnyStr], _T]: + ... + + def getRawHeaders( + self, name: AnyStr, default: Optional[_T] = None + ) -> Union[Sequence[AnyStr], Optional[_T]]: + """ + Returns a sequence of headers matching the given name as the raw string + given. + + @param name: The name of the HTTP header to get the values of. + + @param default: The value to return if no header with the given C{name} + exists. + + @return: If the named header is present, a sequence of its + values. Otherwise, C{default}. + """ + encodedName = self._encodeName(name) + values = self._rawHeaders.get(encodedName, []) + if not values: + return default + + if isinstance(name, str): + return [v.decode("utf8") for v in values] + return values + + def getAllRawHeaders(self) -> Iterator[Tuple[bytes, Sequence[bytes]]]: + """ + Return an iterator of key, value pairs of all headers contained in this + object, as L{bytes}. The keys are capitalized in canonical + capitalization. + """ + for k, v in self._rawHeaders.items(): + yield self._canonicalNameCaps(k), v + + def _canonicalNameCaps(self, name: bytes) -> bytes: + """ + Return the canonical name for the given header. + + @param name: The all-lowercase header name to capitalize in its + canonical form. + + @return: The canonical name of the header. + """ + return self._caseMappings.get(name, _dashCapitalize(name)) + + +__all__ = ["Headers"] diff --git a/contrib/python/Twisted/py3/twisted/web/iweb.py b/contrib/python/Twisted/py3/twisted/web/iweb.py new file mode 100644 index 0000000000..1aeb152fd9 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/iweb.py @@ -0,0 +1,830 @@ +# -*- test-case-name: twisted.web.test -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Interface definitions for L{twisted.web}. + +@var UNKNOWN_LENGTH: An opaque object which may be used as the value of + L{IBodyProducer.length} to indicate that the length of the entity + body is not known in advance. +""" +from typing import TYPE_CHECKING, Callable, List, Optional + +from zope.interface import Attribute, Interface + +from twisted.cred.credentials import IUsernameDigestHash +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IPushProducer +from twisted.web.http_headers import Headers + +if TYPE_CHECKING: + from twisted.web.template import Flattenable, Tag + + +class IRequest(Interface): + """ + An HTTP request. + + @since: 9.0 + """ + + method = Attribute("A L{bytes} giving the HTTP method that was used.") + uri = Attribute( + "A L{bytes} giving the full encoded URI which was requested (including" + " query arguments)." + ) + path = Attribute( + "A L{bytes} giving the encoded query path of the request URI (not " + "including query arguments)." + ) + args = Attribute( + "A mapping of decoded query argument names as L{bytes} to " + "corresponding query argument values as L{list}s of L{bytes}. " + "For example, for a URI with C{foo=bar&foo=baz&quux=spam} " + "for its query part, C{args} will be C{{b'foo': [b'bar', b'baz'], " + "b'quux': [b'spam']}}." + ) + + prepath = Attribute( + "The URL path segments which have been processed during resource " + "traversal, as a list of L{bytes}." + ) + + postpath = Attribute( + "The URL path segments which have not (yet) been processed " + "during resource traversal, as a list of L{bytes}." + ) + + requestHeaders = Attribute( + "A L{http_headers.Headers} instance giving all received HTTP request " + "headers." + ) + + content = Attribute( + "A file-like object giving the request body. This may be a file on " + "disk, an L{io.BytesIO}, or some other type. The implementation is " + "free to decide on a per-request basis." + ) + + responseHeaders = Attribute( + "A L{http_headers.Headers} instance holding all HTTP response " + "headers to be sent." + ) + + def getHeader(key): + """ + Get an HTTP request header. + + @type key: L{bytes} or L{str} + @param key: The name of the header to get the value of. + + @rtype: L{bytes} or L{str} or L{None} + @return: The value of the specified header, or L{None} if that header + was not present in the request. The string type of the result + matches the type of C{key}. + """ + + def getCookie(key): + """ + Get a cookie that was sent from the network. + + @type key: L{bytes} + @param key: The name of the cookie to get. + + @rtype: L{bytes} or L{None} + @returns: The value of the specified cookie, or L{None} if that cookie + was not present in the request. + """ + + def getAllHeaders(): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{requestHeaders.getAllRawHeaders()} may be preferred. + """ + + def getRequestHostname(): + """ + Get the hostname that the HTTP client passed in to the request. + + This will either use the C{Host:} header (if it is available; which, + for a spec-compliant request, it will be) or the IP address of the host + we are listening on if the header is unavailable. + + @note: This is the I{host portion} of the requested resource, which + means that: + + 1. it might be an IPv4 or IPv6 address, not just a DNS host + name, + + 2. there's no guarantee it's even a I{valid} host name or IP + address, since the C{Host:} header may be malformed, + + 3. it does not include the port number. + + @returns: the requested hostname + + @rtype: L{bytes} + """ + + def getHost(): + """ + Get my originally requesting transport's host. + + @return: An L{IAddress<twisted.internet.interfaces.IAddress>}. + """ + + def getClientAddress(): + """ + Return the address of the client who submitted this request. + + The address may not be a network address. Callers must check + its type before using it. + + @since: 18.4 + + @return: the client's address. + @rtype: an L{IAddress} provider. + """ + + def getClientIP(): + """ + Return the IP address of the client who submitted this request. + + This method is B{deprecated}. See L{getClientAddress} instead. + + @returns: the client IP address or L{None} if the request was submitted + over a transport where IP addresses do not make sense. + @rtype: L{str} or L{None} + """ + + def getUser(): + """ + Return the HTTP user sent with this request, if any. + + If no user was supplied, return the empty string. + + @returns: the HTTP user, if any + @rtype: L{str} + """ + + def getPassword(): + """ + Return the HTTP password sent with this request, if any. + + If no password was supplied, return the empty string. + + @returns: the HTTP password, if any + @rtype: L{str} + """ + + def isSecure(): + """ + Return True if this request is using a secure transport. + + Normally this method returns True if this request's HTTPChannel + instance is using a transport that implements ISSLTransport. + + This will also return True if setHost() has been called + with ssl=True. + + @returns: True if this request is secure + @rtype: C{bool} + """ + + def getSession(sessionInterface=None): + """ + Look up the session associated with this request or create a new one if + there is not one. + + @return: The L{Session} instance identified by the session cookie in + the request, or the C{sessionInterface} component of that session + if C{sessionInterface} is specified. + """ + + def URLPath(): + """ + @return: A L{URLPath<twisted.python.urlpath.URLPath>} instance + which identifies the URL for which this request is. + """ + + def prePathURL(): + """ + At any time during resource traversal or resource rendering, + returns an absolute URL to the most nested resource which has + yet been reached. + + @see: {twisted.web.server.Request.prepath} + + @return: An absolute URL. + @rtype: L{bytes} + """ + + def rememberRootURL(): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + + def getRootURL(): + """ + Get a previously-remembered URL. + + @return: An absolute URL. + @rtype: L{bytes} + """ + + # Methods for outgoing response + def finish(): + """ + Indicate that the response to this request is complete. + """ + + def write(data): + """ + Write some data to the body of the response to this request. Response + headers are written the first time this method is called, after which + new response headers may not be added. + + @param data: Bytes of the response body. + @type data: L{bytes} + """ + + def addCookie( + k, + v, + expires=None, + domain=None, + path=None, + max_age=None, + comment=None, + secure=None, + ): + """ + Set an outgoing HTTP cookie. + + In general, you should consider using sessions instead of cookies, see + L{twisted.web.server.Request.getSession} and the + L{twisted.web.server.Session} class for details. + """ + + def setResponseCode(code, message=None): + """ + Set the HTTP response code. + + @type code: L{int} + @type message: L{bytes} + """ + + def setHeader(k, v): + """ + Set an HTTP response header. Overrides any previously set values for + this header. + + @type k: L{bytes} or L{str} + @param k: The name of the header for which to set the value. + + @type v: L{bytes} or L{str} + @param v: The value to set for the named header. A L{str} will be + UTF-8 encoded, which may not interoperable with other + implementations. Avoid passing non-ASCII characters if possible. + """ + + def redirect(url): + """ + Utility function that does a redirect. + + The request should have finish() called after this. + """ + + def setLastModified(when): + """ + Set the C{Last-Modified} time for the response to this request. + + If I am called more than once, I ignore attempts to set Last-Modified + earlier, only replacing the Last-Modified time if it is to a later + value. + + If I am a conditional request, I may modify my response code to + L{NOT_MODIFIED<http.NOT_MODIFIED>} if appropriate for the time given. + + @param when: The last time the resource being returned was modified, in + seconds since the epoch. + @type when: L{int} or L{float} + + @return: If I am a C{If-Modified-Since} conditional request and the time + given is not newer than the condition, I return + L{CACHED<http.CACHED>} to indicate that you should write no body. + Otherwise, I return a false value. + """ + + def setETag(etag): + """ + Set an C{entity tag} for the outgoing response. + + That's "entity tag" as in the HTTP/1.1 I{ETag} header, "used for + comparing two or more entities from the same requested resource." + + If I am a conditional request, I may modify my response code to + L{NOT_MODIFIED<http.NOT_MODIFIED>} or + L{PRECONDITION_FAILED<http.PRECONDITION_FAILED>}, if appropriate for the + tag given. + + @param etag: The entity tag for the resource being returned. + @type etag: L{str} + + @return: If I am a C{If-None-Match} conditional request and the tag + matches one in the request, I return L{CACHED<http.CACHED>} to + indicate that you should write no body. Otherwise, I return a + false value. + """ + + def setHost(host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + This method is useful for working with reverse HTTP proxies (e.g. both + Squid and Apache's mod_proxy can do this), when the address the HTTP + client is using is different than the one we're listening on. + + For example, Apache may be listening on https://www.example.com, and + then forwarding requests to http://localhost:8080, but we don't want + HTML produced by Twisted to say 'http://localhost:8080', they should + say 'https://www.example.com', so we do:: + + request.setHost('www.example.com', 443, ssl=1) + """ + + +class INonQueuedRequestFactory(Interface): + """ + A factory of L{IRequest} objects that does not take a ``queued`` parameter. + """ + + def __call__(channel): + """ + Create an L{IRequest} that is operating on the given channel. There + must only be one L{IRequest} object processing at any given time on a + channel. + + @param channel: A L{twisted.web.http.HTTPChannel} object. + @type channel: L{twisted.web.http.HTTPChannel} + + @return: A request object. + @rtype: L{IRequest} + """ + + +class IAccessLogFormatter(Interface): + """ + An object which can represent an HTTP request as a line of text for + inclusion in an access log file. + """ + + def __call__(timestamp, request): + """ + Generate a line for the access log. + + @param timestamp: The time at which the request was completed in the + standard format for access logs. + @type timestamp: L{unicode} + + @param request: The request object about which to log. + @type request: L{twisted.web.server.Request} + + @return: One line describing the request without a trailing newline. + @rtype: L{unicode} + """ + + +class ICredentialFactory(Interface): + """ + A credential factory defines a way to generate a particular kind of + authentication challenge and a way to interpret the responses to these + challenges. It creates + L{ICredentials<twisted.cred.credentials.ICredentials>} providers from + responses. These objects will be used with L{twisted.cred} to authenticate + an authorize requests. + """ + + scheme = Attribute( + "A L{str} giving the name of the authentication scheme with which " + "this factory is associated. For example, C{'basic'} or C{'digest'}." + ) + + def getChallenge(request): + """ + Generate a new challenge to be sent to a client. + + @type request: L{twisted.web.http.Request} + @param request: The request the response to which this challenge will + be included. + + @rtype: L{dict} + @return: A mapping from L{str} challenge fields to associated L{str} + values. + """ + + def decode(response, request): + """ + Create a credentials object from the given response. + + @type response: L{str} + @param response: scheme specific response string + + @type request: L{twisted.web.http.Request} + @param request: The request being processed (from which the response + was taken). + + @raise twisted.cred.error.LoginFailed: If the response is invalid. + + @rtype: L{twisted.cred.credentials.ICredentials} provider + @return: The credentials represented by the given response. + """ + + +class IBodyProducer(IPushProducer): + """ + Objects which provide L{IBodyProducer} write bytes to an object which + provides L{IConsumer<twisted.internet.interfaces.IConsumer>} by calling its + C{write} method repeatedly. + + L{IBodyProducer} providers may start producing as soon as they have an + L{IConsumer<twisted.internet.interfaces.IConsumer>} provider. That is, they + should not wait for a C{resumeProducing} call to begin writing data. + + L{IConsumer.unregisterProducer<twisted.internet.interfaces.IConsumer.unregisterProducer>} + must not be called. Instead, the + L{Deferred<twisted.internet.defer.Deferred>} returned from C{startProducing} + must be fired when all bytes have been written. + + L{IConsumer.write<twisted.internet.interfaces.IConsumer.write>} may + synchronously invoke any of C{pauseProducing}, C{resumeProducing}, or + C{stopProducing}. These methods must be implemented with this in mind. + + @since: 9.0 + """ + + # Despite the restrictions above and the additional requirements of + # stopProducing documented below, this interface still needs to be an + # IPushProducer subclass. Providers of it will be passed to IConsumer + # providers which only know about IPushProducer and IPullProducer, not + # about this interface. This interface needs to remain close enough to one + # of those interfaces for consumers to work with it. + + length = Attribute( + """ + C{length} is a L{int} indicating how many bytes in total this + L{IBodyProducer} will write to the consumer or L{UNKNOWN_LENGTH} + if this is not known in advance. + """ + ) + + def startProducing(consumer): + """ + Start producing to the given + L{IConsumer<twisted.internet.interfaces.IConsumer>} provider. + + @return: A L{Deferred<twisted.internet.defer.Deferred>} which stops + production of data when L{Deferred.cancel} is called, and which + fires with L{None} when all bytes have been produced or with a + L{Failure<twisted.python.failure.Failure>} if there is any problem + before all bytes have been produced. + """ + + def stopProducing(): + """ + In addition to the standard behavior of + L{IProducer.stopProducing<twisted.internet.interfaces.IProducer.stopProducing>} + (stop producing data), make sure the + L{Deferred<twisted.internet.defer.Deferred>} returned by + C{startProducing} is never fired. + """ + + +class IRenderable(Interface): + """ + An L{IRenderable} is an object that may be rendered by the + L{twisted.web.template} templating system. + """ + + def lookupRenderMethod( + name: str, + ) -> Callable[[Optional[IRequest], "Tag"], "Flattenable"]: + """ + Look up and return the render method associated with the given name. + + @param name: The value of a render directive encountered in the + document returned by a call to L{IRenderable.render}. + + @return: A two-argument callable which will be invoked with the request + being responded to and the tag object on which the render directive + was encountered. + """ + + def render(request: Optional[IRequest]) -> "Flattenable": + """ + Get the document for this L{IRenderable}. + + @param request: The request in response to which this method is being + invoked. + + @return: An object which can be flattened. + """ + + +class ITemplateLoader(Interface): + """ + A loader for templates; something usable as a value for + L{twisted.web.template.Element}'s C{loader} attribute. + """ + + def load() -> List["Flattenable"]: + """ + Load a template suitable for rendering. + + @return: a L{list} of flattenable objects, such as byte and unicode + strings, L{twisted.web.template.Element}s and L{IRenderable} providers. + """ + + +class IResponse(Interface): + """ + An object representing an HTTP response received from an HTTP server. + + @since: 11.1 + """ + + version = Attribute( + "A three-tuple describing the protocol and protocol version " + "of the response. The first element is of type L{str}, the second " + "and third are of type L{int}. For example, C{(b'HTTP', 1, 1)}." + ) + + code = Attribute("The HTTP status code of this response, as a L{int}.") + + phrase = Attribute("The HTTP reason phrase of this response, as a L{str}.") + + headers = Attribute("The HTTP response L{Headers} of this response.") + + length = Attribute( + "The L{int} number of bytes expected to be in the body of this " + "response or L{UNKNOWN_LENGTH} if the server did not indicate how " + "many bytes to expect. For I{HEAD} responses, this will be 0; if " + "the response includes a I{Content-Length} header, it will be " + "available in C{headers}." + ) + + request = Attribute("The L{IClientRequest} that resulted in this response.") + + previousResponse = Attribute( + "The previous L{IResponse} from a redirect, or L{None} if there was no " + "previous response. This can be used to walk the response or request " + "history for redirections." + ) + + def deliverBody(protocol): + """ + Register an L{IProtocol<twisted.internet.interfaces.IProtocol>} provider + to receive the response body. + + The protocol will be connected to a transport which provides + L{IPushProducer}. The protocol's C{connectionLost} method will be + called with: + + - ResponseDone, which indicates that all bytes from the response + have been successfully delivered. + + - PotentialDataLoss, which indicates that it cannot be determined + if the entire response body has been delivered. This only occurs + when making requests to HTTP servers which do not set + I{Content-Length} or a I{Transfer-Encoding} in the response. + + - ResponseFailed, which indicates that some bytes from the response + were lost. The C{reasons} attribute of the exception may provide + more specific indications as to why. + """ + + def setPreviousResponse(response): + """ + Set the reference to the previous L{IResponse}. + + The value of the previous response can be read via + L{IResponse.previousResponse}. + """ + + +class _IRequestEncoder(Interface): + """ + An object encoding data passed to L{IRequest.write}, for example for + compression purpose. + + @since: 12.3 + """ + + def encode(data): + """ + Encode the data given and return the result. + + @param data: The content to encode. + @type data: L{str} + + @return: The encoded data. + @rtype: L{str} + """ + + def finish(): + """ + Callback called when the request is closing. + + @return: If necessary, the pending data accumulated from previous + C{encode} calls. + @rtype: L{str} + """ + + +class _IRequestEncoderFactory(Interface): + """ + A factory for returing L{_IRequestEncoder} instances. + + @since: 12.3 + """ + + def encoderForRequest(request): + """ + If applicable, returns a L{_IRequestEncoder} instance which will encode + the request. + """ + + +class IClientRequest(Interface): + """ + An object representing an HTTP request to make to an HTTP server. + + @since: 13.1 + """ + + method = Attribute( + "The HTTP method for this request, as L{bytes}. For example: " + "C{b'GET'}, C{b'HEAD'}, C{b'POST'}, etc." + ) + + absoluteURI = Attribute( + "The absolute URI of the requested resource, as L{bytes}; or L{None} " + "if the absolute URI cannot be determined." + ) + + headers = Attribute( + "Headers to be sent to the server, as " + "a L{twisted.web.http_headers.Headers} instance." + ) + + +class IAgent(Interface): + """ + An agent makes HTTP requests. + + The way in which requests are issued is left up to each implementation. + Some may issue them directly to the server indicated by the net location + portion of the request URL. Others may use a proxy specified by system + configuration. + + Processing of responses is also left very widely specified. An + implementation may perform no special handling of responses, or it may + implement redirect following or content negotiation, it may implement a + cookie store or automatically respond to authentication challenges. It may + implement many other unforeseen behaviors as well. + + It is also intended that L{IAgent} implementations be composable. An + implementation which provides cookie handling features should re-use an + implementation that provides connection pooling and this combination could + be used by an implementation which adds content negotiation functionality. + Some implementations will be completely self-contained, such as those which + actually perform the network operations to send and receive requests, but + most or all other implementations should implement a small number of new + features (perhaps one new feature) and delegate the rest of the + request/response machinery to another implementation. + + This allows for great flexibility in the behavior an L{IAgent} will + provide. For example, an L{IAgent} with web browser-like behavior could be + obtained by combining a number of (hypothetical) implementations:: + + baseAgent = Agent(reactor) + decode = ContentDecoderAgent(baseAgent, [(b"gzip", GzipDecoder())]) + cookie = CookieAgent(decode, diskStore.cookie) + authenticate = AuthenticateAgent( + cookie, [diskStore.credentials, GtkAuthInterface()]) + cache = CacheAgent(authenticate, diskStore.cache) + redirect = BrowserLikeRedirectAgent(cache, limit=10) + + doSomeRequests(cache) + """ + + def request( + method: bytes, + uri: bytes, + headers: Optional[Headers] = None, + bodyProducer: Optional[IBodyProducer] = None, + ) -> Deferred[IResponse]: + """ + Request the resource at the given location. + + @param method: The request method to use, such as C{b"GET"}, C{b"HEAD"}, + C{b"PUT"}, C{b"POST"}, etc. + + @param uri: The location of the resource to request. This should be an + absolute URI but some implementations may support relative URIs + (with absolute or relative paths). I{HTTP} and I{HTTPS} are the + schemes most likely to be supported but others may be as well. + + @param headers: The headers to send with the request (or L{None} to + send no extra headers). An implementation may add its own headers + to this (for example for client identification or content + negotiation). + + @param bodyProducer: An object which can generate bytes to make up the + body of this request (for example, the properly encoded contents of + a file for a file upload). Or, L{None} if the request is to have + no body. + + @return: A L{Deferred} that fires with an L{IResponse} provider when + the header of the response has been received (regardless of the + response status code) or with a L{Failure} if there is any problem + which prevents that response from being received (including + problems that prevent the request from being sent). + """ + + +class IPolicyForHTTPS(Interface): + """ + An L{IPolicyForHTTPS} provides a policy for verifying the certificates of + HTTPS connections, in the form of a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} per network + location. + + @since: 14.0 + """ + + def creatorForNetloc(hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + appropriate for the given URL "netloc"; i.e. hostname and port number + pair. + + @param hostname: The name of the requested remote host. + @type hostname: L{bytes} + + @param port: The number of the requested remote port. + @type port: L{int} + + @return: A client connection creator expressing the security + requirements for the given remote host. + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + + +class IAgentEndpointFactory(Interface): + """ + An L{IAgentEndpointFactory} provides a way of constructing an endpoint + used for outgoing Agent requests. This is useful in the case of needing to + proxy outgoing connections, or to otherwise vary the transport used. + + @since: 15.0 + """ + + def endpointForURI(uri): + """ + Construct and return an L{IStreamClientEndpoint} for the outgoing + request's connection. + + @param uri: The URI of the request. + @type uri: L{twisted.web.client.URI} + + @return: An endpoint which will have its C{connect} method called to + issue the request. + @rtype: an L{IStreamClientEndpoint} provider + + @raises twisted.internet.error.SchemeNotSupported: If the given + URI's scheme cannot be handled by this factory. + """ + + +UNKNOWN_LENGTH = "twisted.web.iweb.UNKNOWN_LENGTH" + +__all__ = [ + "IUsernameDigestHash", + "ICredentialFactory", + "IRequest", + "IBodyProducer", + "IRenderable", + "IResponse", + "_IRequestEncoder", + "_IRequestEncoderFactory", + "IClientRequest", + "UNKNOWN_LENGTH", +] diff --git a/contrib/python/Twisted/py3/twisted/web/microdom.py b/contrib/python/Twisted/py3/twisted/web/microdom.py new file mode 100644 index 0000000000..b80db5394e --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/microdom.py @@ -0,0 +1,1217 @@ +# -*- test-case-name: twisted.web.test.test_xml -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. +""" +Micro Document Object Model: a partial DOM implementation with SUX. + +This is an implementation of what we consider to be the useful subset of the +DOM. The chief advantage of this library is that, not being burdened with +standards compliance, it can remain very stable between versions. We can also +implement utility 'pythonic' ways to access and mutate the XML tree. + +Since this has not subjected to a serious trial by fire, it is not recommended +to use this outside of Twisted applications. However, it seems to work just +fine for the documentation generator, which parses a fairly representative +sample of XML. + +Microdom mainly focuses on working with HTML and XHTML. + +This module is now deprecated. +""" +from __future__ import annotations + +# System Imports +import re +import warnings +from io import BytesIO, StringIO + +from incremental import Version, getVersionString + +# Twisted Imports +from twisted.python.compat import ioType +from twisted.python.util import InsensitiveDict +from twisted.web.sux import ParseError, XMLParser + +warningString = "twisted.web.microdom was deprecated at {}".format( + getVersionString(Version("Twisted", 23, 10, 0)) +) +warnings.warn(warningString, DeprecationWarning, stacklevel=3) + + +def getElementsByTagName(iNode, name): + """ + Return a list of all child elements of C{iNode} with a name matching + C{name}. + + Note that this implementation does not conform to the DOM Level 1 Core + specification because it may return C{iNode}. + + @param iNode: An element at which to begin searching. If C{iNode} has a + name matching C{name}, it will be included in the result. + + @param name: A C{str} giving the name of the elements to return. + + @return: A C{list} of direct or indirect child elements of C{iNode} with + the name C{name}. This may include C{iNode}. + """ + matches = [] + matches_append = matches.append # faster lookup. don't do this at home + slice = [iNode] + while len(slice) > 0: + c = slice.pop(0) + if c.nodeName == name: + matches_append(c) + slice[:0] = c.childNodes + return matches + + +def getElementsByTagNameNoCase(iNode, name): + name = name.lower() + matches = [] + matches_append = matches.append + slice = [iNode] + while len(slice) > 0: + c = slice.pop(0) + if c.nodeName.lower() == name: + matches_append(c) + slice[:0] = c.childNodes + return matches + + +def _streamWriteWrapper(stream): + if ioType(stream) == bytes: + + def w(s): + if isinstance(s, str): + s = s.encode("utf-8") + stream.write(s) + + else: + + def w(s): + if isinstance(s, bytes): + s = s.decode("utf-8") + stream.write(s) + + return w + + +# order is important +HTML_ESCAPE_CHARS = ( + ("&", "&"), # don't add any entities before this one + ("<", "<"), + (">", ">"), + ('"', """), +) +REV_HTML_ESCAPE_CHARS = list(HTML_ESCAPE_CHARS) +REV_HTML_ESCAPE_CHARS.reverse() + +XML_ESCAPE_CHARS = HTML_ESCAPE_CHARS + (("'", "'"),) +REV_XML_ESCAPE_CHARS = list(XML_ESCAPE_CHARS) +REV_XML_ESCAPE_CHARS.reverse() + + +def unescape(text, chars=REV_HTML_ESCAPE_CHARS): + """ + Perform the exact opposite of 'escape'. + """ + for s, h in chars: + text = text.replace(h, s) + return text + + +def escape(text, chars=HTML_ESCAPE_CHARS): + """ + Escape a few XML special chars with XML entities. + """ + for s, h in chars: + text = text.replace(s, h) + return text + + +class MismatchedTags(Exception): + def __init__(self, filename, expect, got, endLine, endCol, begLine, begCol): + ( + self.filename, + self.expect, + self.got, + self.begLine, + self.begCol, + self.endLine, + self.endCol, + ) = (filename, expect, got, begLine, begCol, endLine, endCol) + + def __str__(self) -> str: + return ( + "expected </%s>, got </%s> line: %s col: %s, " + "began line: %s col: %s" + % ( + self.expect, + self.got, + self.endLine, + self.endCol, + self.begLine, + self.begCol, + ) + ) + + +class Node: + nodeName = "Node" + + def __init__(self, parentNode=None): + self.parentNode = parentNode + self.childNodes = [] + + def isEqualToNode(self, other): + """ + Compare this node to C{other}. If the nodes have the same number of + children and corresponding children are equal to each other, return + C{True}, otherwise return C{False}. + + @type other: L{Node} + @rtype: C{bool} + """ + if len(self.childNodes) != len(other.childNodes): + return False + for a, b in zip(self.childNodes, other.childNodes): + if not a.isEqualToNode(b): + return False + return True + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + raise NotImplementedError() + + def toxml( + self, indent="", addindent="", newl="", strip=0, nsprefixes={}, namespace="" + ): + s = StringIO() + self.writexml(s, indent, addindent, newl, strip, nsprefixes, namespace) + rv = s.getvalue() + return rv + + def writeprettyxml(self, stream, indent="", addindent=" ", newl="\n", strip=0): + return self.writexml(stream, indent, addindent, newl, strip) + + def toprettyxml(self, indent="", addindent=" ", newl="\n", strip=0): + return self.toxml(indent, addindent, newl, strip) + + def cloneNode(self, deep=0, parent=None): + raise NotImplementedError() + + def hasChildNodes(self): + if self.childNodes: + return 1 + else: + return 0 + + def appendChild(self, child): + """ + Make the given L{Node} the last child of this node. + + @param child: The L{Node} which will become a child of this node. + + @raise TypeError: If C{child} is not a C{Node} instance. + """ + if not isinstance(child, Node): + raise TypeError("expected Node instance") + self.childNodes.append(child) + child.parentNode = self + + def insertBefore(self, new, ref): + """ + Make the given L{Node} C{new} a child of this node which comes before + the L{Node} C{ref}. + + @param new: A L{Node} which will become a child of this node. + + @param ref: A L{Node} which is already a child of this node which + C{new} will be inserted before. + + @raise TypeError: If C{new} or C{ref} is not a C{Node} instance. + + @return: C{new} + """ + if not isinstance(new, Node) or not isinstance(ref, Node): + raise TypeError("expected Node instance") + i = self.childNodes.index(ref) + new.parentNode = self + self.childNodes.insert(i, new) + return new + + def removeChild(self, child): + """ + Remove the given L{Node} from this node's children. + + @param child: A L{Node} which is a child of this node which will no + longer be a child of this node after this method is called. + + @raise TypeError: If C{child} is not a C{Node} instance. + + @return: C{child} + """ + if not isinstance(child, Node): + raise TypeError("expected Node instance") + if child in self.childNodes: + self.childNodes.remove(child) + child.parentNode = None + return child + + def replaceChild(self, newChild, oldChild): + """ + Replace a L{Node} which is already a child of this node with a + different node. + + @param newChild: A L{Node} which will be made a child of this node. + + @param oldChild: A L{Node} which is a child of this node which will + give up its position to C{newChild}. + + @raise TypeError: If C{newChild} or C{oldChild} is not a C{Node} + instance. + + @raise ValueError: If C{oldChild} is not a child of this C{Node}. + """ + if not isinstance(newChild, Node) or not isinstance(oldChild, Node): + raise TypeError("expected Node instance") + if oldChild.parentNode is not self: + raise ValueError("oldChild is not a child of this node") + self.childNodes[self.childNodes.index(oldChild)] = newChild + oldChild.parentNode = None + newChild.parentNode = self + + def lastChild(self): + return self.childNodes[-1] + + def firstChild(self): + if len(self.childNodes): + return self.childNodes[0] + return None + + # def get_ownerDocument(self): + # """This doesn't really get the owner document; microdom nodes + # don't even have one necessarily. This gets the root node, + # which is usually what you really meant. + # *NOT DOM COMPLIANT.* + # """ + # node=self + # while (node.parentNode): node=node.parentNode + # return node + # ownerDocument=node.get_ownerDocument() + # leaving commented for discussion; see also domhelpers.getParents(node) + + +class Document(Node): + def __init__(self, documentElement=None): + Node.__init__(self) + if documentElement: + self.appendChild(documentElement) + + def cloneNode(self, deep=0, parent=None): + d = Document() + d.doctype = self.doctype + if deep: + newEl = self.documentElement.cloneNode(1, self) + else: + newEl = self.documentElement + d.appendChild(newEl) + return d + + doctype: None | str = None + + def isEqualToDocument(self, n): + return (self.doctype == n.doctype) and Node.isEqualToNode(self, n) + + isEqualToNode = isEqualToDocument + + @property + def documentElement(self): + return self.childNodes[0] + + def appendChild(self, child): + """ + Make the given L{Node} the I{document element} of this L{Document}. + + @param child: The L{Node} to make into this L{Document}'s document + element. + + @raise ValueError: If this document already has a document element. + """ + if self.childNodes: + raise ValueError("Only one element per document.") + Node.appendChild(self, child) + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + w = _streamWriteWrapper(stream) + + w('<?xml version="1.0"?>' + newl) + if self.doctype: + w(f"<!DOCTYPE {self.doctype}>{newl}") + self.documentElement.writexml( + stream, indent, addindent, newl, strip, nsprefixes, namespace + ) + + # of dubious utility (?) + def createElement(self, name, **kw): + return Element(name, **kw) + + def createTextNode(self, text): + return Text(text) + + def createComment(self, text): + return Comment(text) + + def getElementsByTagName(self, name): + if self.documentElement.caseInsensitive: + return getElementsByTagNameNoCase(self, name) + return getElementsByTagName(self, name) + + def getElementById(self, id): + childNodes = self.childNodes[:] + while childNodes: + node = childNodes.pop(0) + if node.childNodes: + childNodes.extend(node.childNodes) + if hasattr(node, "getAttribute") and node.getAttribute("id") == id: + return node + + +class EntityReference(Node): + def __init__(self, eref, parentNode=None): + Node.__init__(self, parentNode) + self.eref = eref + self.nodeValue = self.data = "&" + eref + ";" + + def isEqualToEntityReference(self, n): + if not isinstance(n, EntityReference): + return 0 + return (self.eref == n.eref) and (self.nodeValue == n.nodeValue) + + isEqualToNode = isEqualToEntityReference + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + w = _streamWriteWrapper(stream) + w("" + self.nodeValue) + + def cloneNode(self, deep=0, parent=None): + return EntityReference(self.eref, parent) + + +class CharacterData(Node): + def __init__(self, data, parentNode=None): + Node.__init__(self, parentNode) + self.value = self.data = self.nodeValue = data + + def isEqualToCharacterData(self, n): + return self.value == n.value + + isEqualToNode = isEqualToCharacterData + + +class Comment(CharacterData): + """ + A comment node. + """ + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + w = _streamWriteWrapper(stream) + val = self.data + w(f"<!--{val}-->") + + def cloneNode(self, deep=0, parent=None): + return Comment(self.nodeValue, parent) + + +class Text(CharacterData): + def __init__(self, data, parentNode=None, raw=0): + CharacterData.__init__(self, data, parentNode) + self.raw = raw + + def isEqualToNode(self, other): + """ + Compare this text to C{text}. If the underlying values and the C{raw} + flag are the same, return C{True}, otherwise return C{False}. + """ + return CharacterData.isEqualToNode(self, other) and self.raw == other.raw + + def cloneNode(self, deep=0, parent=None): + return Text(self.nodeValue, parent, self.raw) + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + w = _streamWriteWrapper(stream) + if self.raw: + val = self.nodeValue + if not isinstance(val, str): + val = str(self.nodeValue) + else: + v = self.nodeValue + if not isinstance(v, str): + v = str(v) + if strip: + v = " ".join(v.split()) + val = escape(v) + w(val) + + def __repr__(self) -> str: + return "Text(%s" % repr(self.nodeValue) + ")" + + +class CDATASection(CharacterData): + def cloneNode(self, deep=0, parent=None): + return CDATASection(self.nodeValue, parent) + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + w = _streamWriteWrapper(stream) + w("<![CDATA[") + w("" + self.nodeValue) + w("]]>") + + +def _genprefix(): + i = 0 + while True: + yield "p" + str(i) + i = i + 1 + + +genprefix = _genprefix() + + +class _Attr(CharacterData): + "Support class for getAttributeNode." + + +class Element(Node): + preserveCase = 0 + caseInsensitive = 1 + nsprefixes = None + + def __init__( + self, + tagName, + attributes=None, + parentNode=None, + filename=None, + markpos=None, + caseInsensitive=1, + preserveCase=0, + namespace=None, + ): + Node.__init__(self, parentNode) + self.preserveCase = preserveCase or not caseInsensitive + self.caseInsensitive = caseInsensitive + if not preserveCase: + tagName = tagName.lower() + if attributes is None: + self.attributes = {} + else: + self.attributes = attributes + for k, v in self.attributes.items(): + self.attributes[k] = unescape(v) + + if caseInsensitive: + self.attributes = InsensitiveDict(self.attributes, preserve=preserveCase) + + self.endTagName = self.nodeName = self.tagName = tagName + self._filename = filename + self._markpos = markpos + self.namespace = namespace + + def addPrefixes(self, pfxs): + if self.nsprefixes is None: + self.nsprefixes = pfxs + else: + self.nsprefixes.update(pfxs) + + def endTag(self, endTagName): + if not self.preserveCase: + endTagName = endTagName.lower() + self.endTagName = endTagName + + def isEqualToElement(self, n): + if self.caseInsensitive: + return (self.attributes == n.attributes) and ( + self.nodeName.lower() == n.nodeName.lower() + ) + return (self.attributes == n.attributes) and (self.nodeName == n.nodeName) + + def isEqualToNode(self, other): + """ + Compare this element to C{other}. If the C{nodeName}, C{namespace}, + C{attributes}, and C{childNodes} are all the same, return C{True}, + otherwise return C{False}. + """ + return ( + self.nodeName.lower() == other.nodeName.lower() + and self.namespace == other.namespace + and self.attributes == other.attributes + and Node.isEqualToNode(self, other) + ) + + def cloneNode(self, deep=0, parent=None): + clone = Element( + self.tagName, + parentNode=parent, + namespace=self.namespace, + preserveCase=self.preserveCase, + caseInsensitive=self.caseInsensitive, + ) + clone.attributes.update(self.attributes) + if deep: + clone.childNodes = [child.cloneNode(1, clone) for child in self.childNodes] + else: + clone.childNodes = [] + return clone + + def getElementsByTagName(self, name): + if self.caseInsensitive: + return getElementsByTagNameNoCase(self, name) + return getElementsByTagName(self, name) + + def hasAttributes(self): + return 1 + + def getAttribute(self, name, default=None): + return self.attributes.get(name, default) + + def getAttributeNS(self, ns, name, default=None): + nsk = (ns, name) + if nsk in self.attributes: + return self.attributes[nsk] + if ns == self.namespace: + return self.attributes.get(name, default) + return default + + def getAttributeNode(self, name): + return _Attr(self.getAttribute(name), self) + + def setAttribute(self, name, attr): + self.attributes[name] = attr + + def removeAttribute(self, name): + if name in self.attributes: + del self.attributes[name] + + def hasAttribute(self, name): + return name in self.attributes + + def writexml( + self, + stream, + indent="", + addindent="", + newl="", + strip=0, + nsprefixes={}, + namespace="", + ): + """ + Serialize this L{Element} to the given stream. + + @param stream: A file-like object to which this L{Element} will be + written. + + @param nsprefixes: A C{dict} mapping namespace URIs as C{str} to + prefixes as C{str}. This defines the prefixes which are already in + scope in the document at the point at which this L{Element} exists. + This is essentially an implementation detail for namespace support. + Applications should not try to use it. + + @param namespace: The namespace URI as a C{str} which is the default at + the point in the document at which this L{Element} exists. This is + essentially an implementation detail for namespace support. + Applications should not try to use it. + """ + # write beginning + ALLOWSINGLETON = ( + "img", + "br", + "hr", + "base", + "meta", + "link", + "param", + "area", + "input", + "col", + "basefont", + "isindex", + "frame", + ) + BLOCKELEMENTS = ( + "html", + "head", + "body", + "noscript", + "ins", + "del", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "script", + "ul", + "ol", + "dl", + "pre", + "hr", + "blockquote", + "address", + "p", + "div", + "fieldset", + "table", + "tr", + "form", + "object", + "fieldset", + "applet", + "map", + ) + FORMATNICELY = ("tr", "ul", "ol", "head") + + # this should never be necessary unless people start + # changing .tagName on the fly(?) + if not self.preserveCase: + self.endTagName = self.tagName + + w = _streamWriteWrapper(stream) + if self.nsprefixes: + newprefixes = self.nsprefixes.copy() + for ns in nsprefixes.keys(): + if ns in newprefixes: + del newprefixes[ns] + else: + newprefixes = {} + + begin = ["<"] + if self.tagName in BLOCKELEMENTS: + begin = [newl, indent] + begin + bext = begin.extend + writeattr = lambda _atr, _val: bext((" ", _atr, '="', escape(_val), '"')) + + # Make a local for tracking what end tag will be used. If namespace + # prefixes are involved, this will be changed to account for that + # before it's actually used. + endTagName = self.endTagName + + if namespace != self.namespace and self.namespace is not None: + # If the current default namespace is not the namespace of this tag + # (and this tag has a namespace at all) then we'll write out + # something related to namespaces. + if self.namespace in nsprefixes: + # This tag's namespace already has a prefix bound to it. Use + # that prefix. + prefix = nsprefixes[self.namespace] + bext(prefix + ":" + self.tagName) + # Also make sure we use it for the end tag. + endTagName = prefix + ":" + self.endTagName + else: + # This tag's namespace has no prefix bound to it. Change the + # default namespace to this tag's namespace so we don't need + # prefixes. Alternatively, we could add a new prefix binding. + # I'm not sure why the code was written one way rather than the + # other. -exarkun + bext(self.tagName) + writeattr("xmlns", self.namespace) + # The default namespace just changed. Make sure any children + # know about this. + namespace = self.namespace + else: + # This tag has no namespace or its namespace is already the default + # namespace. Nothing extra to do here. + bext(self.tagName) + + j = "".join + for attr, val in sorted(self.attributes.items()): + if isinstance(attr, tuple): + ns, key = attr + if ns in nsprefixes: + prefix = nsprefixes[ns] + else: + prefix = next(genprefix) + newprefixes[ns] = prefix + assert val is not None + writeattr(prefix + ":" + key, val) + else: + assert val is not None + writeattr(attr, val) + if newprefixes: + for ns, prefix in newprefixes.items(): + if prefix: + writeattr("xmlns:" + prefix, ns) + newprefixes.update(nsprefixes) + downprefixes = newprefixes + else: + downprefixes = nsprefixes + w(j(begin)) + if self.childNodes: + w(">") + newindent = indent + addindent + for child in self.childNodes: + if self.tagName in BLOCKELEMENTS and self.tagName in FORMATNICELY: + w(j((newl, newindent))) + child.writexml( + stream, newindent, addindent, newl, strip, downprefixes, namespace + ) + if self.tagName in BLOCKELEMENTS: + w(j((newl, indent))) + w(j(("</", endTagName, ">"))) + elif self.tagName.lower() not in ALLOWSINGLETON: + w(j(("></", endTagName, ">"))) + else: + w(" />") + + def __repr__(self) -> str: + rep = "Element(%s" % repr(self.nodeName) + if self.attributes: + rep += f", attributes={self.attributes!r}" + if self._filename: + rep += f", filename={self._filename!r}" + if self._markpos: + rep += f", markpos={self._markpos!r}" + return rep + ")" + + def __str__(self) -> str: + rep = "<" + self.nodeName + if self._filename or self._markpos: + rep += " (" + if self._filename: + rep += repr(self._filename) + if self._markpos: + rep += " line %s column %s" % self._markpos + if self._filename or self._markpos: + rep += ")" + for item in self.attributes.items(): + rep += " %s=%r" % item + if self.hasChildNodes(): + rep += " >...</%s>" % self.nodeName + else: + rep += " />" + return rep + + +def _unescapeDict(d): + dd = {} + for k, v in d.items(): + dd[k] = unescape(v) + return dd + + +def _reverseDict(d): + dd = {} + for k, v in d.items(): + dd[v] = k + return dd + + +class MicroDOMParser(XMLParser): + # <dash> glyph: a quick scan thru the DTD says BODY, AREA, LINK, IMG, HR, + # P, DT, DD, LI, INPUT, OPTION, THEAD, TFOOT, TBODY, COLGROUP, COL, TR, TH, + # TD, HEAD, BASE, META, HTML all have optional closing tags + + soonClosers = "area link br img hr input base meta".split() + laterClosers = { + "p": ["p", "dt"], + "dt": ["dt", "dd"], + "dd": ["dt", "dd"], + "li": ["li"], + "tbody": ["thead", "tfoot", "tbody"], + "thead": ["thead", "tfoot", "tbody"], + "tfoot": ["thead", "tfoot", "tbody"], + "colgroup": ["colgroup"], + "col": ["col"], + "tr": ["tr"], + "td": ["td"], + "th": ["th"], + "head": ["body"], + "title": ["head", "body"], # this looks wrong... + "option": ["option"], + } + + def __init__( + self, + beExtremelyLenient=0, + caseInsensitive=1, + preserveCase=0, + soonClosers=soonClosers, + laterClosers=laterClosers, + ): + self.elementstack = [] + d = {"xmlns": "xmlns", "": None} + dr = _reverseDict(d) + self.nsstack = [(d, None, dr)] + self.documents = [] + self._mddoctype = None + self.beExtremelyLenient = beExtremelyLenient + self.caseInsensitive = caseInsensitive + self.preserveCase = preserveCase or not caseInsensitive + self.soonClosers = soonClosers + self.laterClosers = laterClosers + # self.indentlevel = 0 + + def shouldPreserveSpace(self): + for edx in range(len(self.elementstack)): + el = self.elementstack[-edx] + if el.tagName == "pre" or el.getAttribute("xml:space", "") == "preserve": + return 1 + return 0 + + def _getparent(self): + if self.elementstack: + return self.elementstack[-1] + else: + return None + + COMMENT = re.compile(r"\s*/[/*]\s*") + + def _fixScriptElement(self, el): + # this deals with case where there is comment or CDATA inside + # <script> tag and we want to do the right thing with it + if not self.beExtremelyLenient or not len(el.childNodes) == 1: + return + c = el.firstChild() + if isinstance(c, Text): + # deal with nasty people who do stuff like: + # <script> // <!-- + # x = 1; + # // --></script> + # tidy does this, for example. + prefix = "" + oldvalue = c.value + match = self.COMMENT.match(oldvalue) + if match: + prefix = match.group() + oldvalue = oldvalue[len(prefix) :] + + # now see if contents are actual node and comment or CDATA + try: + e = parseString("<a>%s</a>" % oldvalue).childNodes[0] + except (ParseError, MismatchedTags): + return + if len(e.childNodes) != 1: + return + e = e.firstChild() + if isinstance(e, (CDATASection, Comment)): + el.childNodes = [] + if prefix: + el.childNodes.append(Text(prefix)) + el.childNodes.append(e) + + def gotDoctype(self, doctype): + self._mddoctype = doctype + + def gotTagStart(self, name, attributes): + # print ' '*self.indentlevel, 'start tag',name + # self.indentlevel += 1 + parent = self._getparent() + if self.beExtremelyLenient and isinstance(parent, Element): + parentName = parent.tagName + myName = name + if self.caseInsensitive: + parentName = parentName.lower() + myName = myName.lower() + if myName in self.laterClosers.get(parentName, []): + self.gotTagEnd(parent.tagName) + parent = self._getparent() + attributes = _unescapeDict(attributes) + namespaces = self.nsstack[-1][0] + newspaces = {} + keysToDelete = [] + for k, v in attributes.items(): + if k.startswith("xmlns"): + spacenames = k.split(":", 1) + if len(spacenames) == 2: + newspaces[spacenames[1]] = v + else: + newspaces[""] = v + keysToDelete.append(k) + for k in keysToDelete: + del attributes[k] + if newspaces: + namespaces = namespaces.copy() + namespaces.update(newspaces) + keysToDelete = [] + for k, v in attributes.items(): + ksplit = k.split(":", 1) + if len(ksplit) == 2: + pfx, tv = ksplit + if pfx != "xml" and pfx in namespaces: + attributes[namespaces[pfx], tv] = v + keysToDelete.append(k) + for k in keysToDelete: + del attributes[k] + el = Element( + name, + attributes, + parent, + self.filename, + self.saveMark(), + caseInsensitive=self.caseInsensitive, + preserveCase=self.preserveCase, + namespace=namespaces.get(""), + ) + revspaces = _reverseDict(newspaces) + el.addPrefixes(revspaces) + + if newspaces: + rscopy = self.nsstack[-1][2].copy() + rscopy.update(revspaces) + self.nsstack.append((namespaces, el, rscopy)) + self.elementstack.append(el) + if parent: + parent.appendChild(el) + if self.beExtremelyLenient and el.tagName in self.soonClosers: + self.gotTagEnd(name) + + def _gotStandalone(self, factory, data): + parent = self._getparent() + te = factory(data, parent) + if parent: + parent.appendChild(te) + elif self.beExtremelyLenient: + self.documents.append(te) + + def gotText(self, data): + if data.strip() or self.shouldPreserveSpace(): + self._gotStandalone(Text, data) + + def gotComment(self, data): + self._gotStandalone(Comment, data) + + def gotEntityReference(self, entityRef): + self._gotStandalone(EntityReference, entityRef) + + def gotCData(self, cdata): + self._gotStandalone(CDATASection, cdata) + + def gotTagEnd(self, name): + # print ' '*self.indentlevel, 'end tag',name + # self.indentlevel -= 1 + if not self.elementstack: + if self.beExtremelyLenient: + return + raise MismatchedTags( + *((self.filename, "NOTHING", name) + self.saveMark() + (0, 0)) + ) + el = self.elementstack.pop() + pfxdix = self.nsstack[-1][2] + if self.nsstack[-1][1] is el: + nstuple = self.nsstack.pop() + else: + nstuple = None + if self.caseInsensitive: + tn = el.tagName.lower() + cname = name.lower() + else: + tn = el.tagName + cname = name + + nsplit = name.split(":", 1) + if len(nsplit) == 2: + pfx, newname = nsplit + ns = pfxdix.get(pfx, None) + if ns is not None: + if el.namespace != ns: + if not self.beExtremelyLenient: + raise MismatchedTags( + *( + (self.filename, el.tagName, name) + + self.saveMark() + + el._markpos + ) + ) + if not (tn == cname): + if self.beExtremelyLenient: + if self.elementstack: + lastEl = self.elementstack[0] + for idx in range(len(self.elementstack)): + if self.elementstack[-(idx + 1)].tagName == cname: + self.elementstack[-(idx + 1)].endTag(name) + break + else: + # this was a garbage close tag; wait for a real one + self.elementstack.append(el) + if nstuple is not None: + self.nsstack.append(nstuple) + return + del self.elementstack[-(idx + 1) :] + if not self.elementstack: + self.documents.append(lastEl) + return + else: + raise MismatchedTags( + *((self.filename, el.tagName, name) + self.saveMark() + el._markpos) + ) + el.endTag(name) + if not self.elementstack: + self.documents.append(el) + if self.beExtremelyLenient and el.tagName == "script": + self._fixScriptElement(el) + + def connectionLost(self, reason): + XMLParser.connectionLost(self, reason) # This can cause more events! + if self.elementstack: + if self.beExtremelyLenient: + self.documents.append(self.elementstack[0]) + else: + raise MismatchedTags( + *( + (self.filename, self.elementstack[-1], "END_OF_FILE") + + self.saveMark() + + self.elementstack[-1]._markpos + ) + ) + + +def parse(readable, *args, **kwargs): + """ + Parse HTML or XML readable. + """ + if not hasattr(readable, "read"): + readable = open(readable, "rb") + mdp = MicroDOMParser(*args, **kwargs) + mdp.filename = getattr(readable, "name", "<xmlfile />") + mdp.makeConnection(None) + if hasattr(readable, "getvalue"): + mdp.dataReceived(readable.getvalue()) + else: + r = readable.read(1024) + while r: + mdp.dataReceived(r) + r = readable.read(1024) + mdp.connectionLost(None) + + if not mdp.documents: + raise ParseError(mdp.filename, 0, 0, "No top-level Nodes in document") + + if mdp.beExtremelyLenient: + if len(mdp.documents) == 1: + d = mdp.documents[0] + if not isinstance(d, Element): + el = Element("html") + el.appendChild(d) + d = el + else: + d = Element("html") + for child in mdp.documents: + d.appendChild(child) + else: + d = mdp.documents[0] + doc = Document(d) + doc.doctype = mdp._mddoctype + return doc + + +def parseString(st, *args, **kw): + if isinstance(st, str): + # this isn't particularly ideal, but it does work. + return parse(BytesIO(st.encode("UTF-16")), *args, **kw) + return parse(BytesIO(st), *args, **kw) + + +def parseXML(readable): + """ + Parse an XML readable object. + """ + return parse(readable, caseInsensitive=0, preserveCase=1) + + +def parseXMLString(st): + """ + Parse an XML readable object. + """ + return parseString(st, caseInsensitive=0, preserveCase=1) + + +class lmx: + """ + Easy creation of XML. + """ + + def __init__(self, node="div"): + if isinstance(node, str): + node = Element(node) + self.node = node + + def __getattr__(self, name): + if name[0] == "_": + raise AttributeError("no private attrs") + return lambda **kw: self.add(name, **kw) + + def __setitem__(self, key, val): + self.node.setAttribute(key, val) + + def __getitem__(self, key): + return self.node.getAttribute(key) + + def text(self, txt, raw=0): + nn = Text(txt, raw=raw) + self.node.appendChild(nn) + return self + + def add(self, tagName, **kw): + newNode = Element(tagName, caseInsensitive=0, preserveCase=0) + self.node.appendChild(newNode) + xf = lmx(newNode) + for k, v in kw.items(): + if k[0] == "_": + k = k[1:] + xf[k] = v + return xf diff --git a/contrib/python/Twisted/py3/twisted/web/newsfragments/.gitignore b/contrib/python/Twisted/py3/twisted/web/newsfragments/.gitignore new file mode 100644 index 0000000000..f935021a8f --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/newsfragments/.gitignore @@ -0,0 +1 @@ +!.gitignore diff --git a/contrib/python/Twisted/py3/twisted/web/pages.py b/contrib/python/Twisted/py3/twisted/web/pages.py new file mode 100644 index 0000000000..54ea1c431b --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/pages.py @@ -0,0 +1,134 @@ +# -*- test-case-name: twisted.web.test.test_pages -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Utility implementations of L{IResource}. +""" + +__all__ = ( + "errorPage", + "notFound", + "forbidden", +) + +from typing import cast + +from twisted.web import http +from twisted.web.iweb import IRenderable, IRequest +from twisted.web.resource import IResource, Resource +from twisted.web.template import renderElement, tags + + +class _ErrorPage(Resource): + """ + L{_ErrorPage} is a resource that responds to all requests with a particular + (parameterized) HTTP status code and an HTML body containing some + descriptive text. This is useful for rendering simple error pages. + + @see: L{twisted.web.pages.errorPage} + + @ivar _code: An integer HTTP status code which will be used for the + response. + + @ivar _brief: A short string which will be included in the response body as + the page title. + + @ivar _detail: A longer string which will be included in the response body. + """ + + def __init__(self, code: int, brief: str, detail: str) -> None: + super().__init__() + self._code: int = code + self._brief: str = brief + self._detail: str = detail + + def render(self, request: IRequest) -> object: + """ + Respond to all requests with the given HTTP status code and an HTML + document containing the explanatory strings. + """ + request.setResponseCode(self._code) + request.setHeader(b"content-type", b"text/html; charset=utf-8") + return renderElement( + request, + # cast because the type annotations here seem off; Tag isn't an + # IRenderable but also probably should be? See + # https://github.com/twisted/twisted/issues/4982 + cast( + IRenderable, + tags.html( + tags.head(tags.title(f"{self._code} - {self._brief}")), + tags.body(tags.h1(self._brief), tags.p(self._detail)), + ), + ), + ) + + def getChild(self, path: bytes, request: IRequest) -> Resource: + """ + Handle all requests for which L{_ErrorPage} lacks a child by returning + this error page. + + @param path: A path segment. + + @param request: HTTP request + """ + return self + + +def errorPage(code: int, brief: str, detail: str) -> IResource: + """ + Build a resource that responds to all requests with a particular HTTP + status code and an HTML body containing some descriptive text. This is + useful for rendering simple error pages. + + The resource dynamically handles all paths below it. Use + L{IResource.putChild()} to override a specific path. + + @param code: An integer HTTP status code which will be used for the + response. + + @param brief: A short string which will be included in the response + body as the page title. + + @param detail: A longer string which will be included in the + response body. + + @returns: An L{IResource} + """ + return _ErrorPage(code, brief, detail) + + +def notFound( + brief: str = "No Such Resource", + message: str = "Sorry. No luck finding that resource.", +) -> IResource: + """ + Generate an L{IResource} with a 404 Not Found status code. + + @see: L{twisted.web.pages.errorPage} + + @param brief: A short string displayed as the page title. + + @param brief: A longer string displayed in the page body. + + @returns: An L{IResource} + """ + return _ErrorPage(http.NOT_FOUND, brief, message) + + +def forbidden( + brief: str = "Forbidden Resource", message: str = "Sorry, resource is forbidden." +) -> IResource: + """ + Generate an L{IResource} with a 403 Forbidden status code. + + @see: L{twisted.web.pages.errorPage} + + @param brief: A short string displayed as the page title. + + @param brief: A longer string displayed in the page body. + + @returns: An L{IResource} + """ + return _ErrorPage(http.FORBIDDEN, brief, message) diff --git a/contrib/python/Twisted/py3/twisted/web/proxy.py b/contrib/python/Twisted/py3/twisted/web/proxy.py new file mode 100644 index 0000000000..e31ec7a65d --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/proxy.py @@ -0,0 +1,296 @@ +# -*- test-case-name: twisted.web.test.test_proxy -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Simplistic HTTP proxy support. + +This comes in two main variants - the Proxy and the ReverseProxy. + +When a Proxy is in use, a browser trying to connect to a server (say, +www.yahoo.com) will be intercepted by the Proxy, and the proxy will covertly +connect to the server, and return the result. + +When a ReverseProxy is in use, the client connects directly to the ReverseProxy +(say, www.yahoo.com) which farms off the request to one of a pool of servers, +and returns the result. + +Normally, a Proxy is used on the client end of an Internet connection, while a +ReverseProxy is used on the server end. +""" + +from urllib.parse import quote as urlquote, urlparse, urlunparse + +from twisted.internet import reactor +from twisted.internet.protocol import ClientFactory +from twisted.web.http import _QUEUED_SENTINEL, HTTPChannel, HTTPClient, Request +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + + +class ProxyClient(HTTPClient): + """ + Used by ProxyClientFactory to implement a simple web proxy. + + @ivar _finished: A flag which indicates whether or not the original request + has been finished yet. + """ + + _finished = False + + def __init__(self, command, rest, version, headers, data, father): + self.father = father + self.command = command + self.rest = rest + if b"proxy-connection" in headers: + del headers[b"proxy-connection"] + headers[b"connection"] = b"close" + headers.pop(b"keep-alive", None) + self.headers = headers + self.data = data + + def connectionMade(self): + self.sendCommand(self.command, self.rest) + for header, value in self.headers.items(): + self.sendHeader(header, value) + self.endHeaders() + self.transport.write(self.data) + + def handleStatus(self, version, code, message): + self.father.setResponseCode(int(code), message) + + def handleHeader(self, key, value): + # t.web.server.Request sets default values for these headers in its + # 'process' method. When these headers are received from the remote + # server, they ought to override the defaults, rather than append to + # them. + if key.lower() in [b"server", b"date", b"content-type"]: + self.father.responseHeaders.setRawHeaders(key, [value]) + else: + self.father.responseHeaders.addRawHeader(key, value) + + def handleResponsePart(self, buffer): + self.father.write(buffer) + + def handleResponseEnd(self): + """ + Finish the original request, indicating that the response has been + completely written to it, and disconnect the outgoing transport. + """ + if not self._finished: + self._finished = True + self.father.finish() + self.transport.loseConnection() + + +class ProxyClientFactory(ClientFactory): + """ + Used by ProxyRequest to implement a simple web proxy. + """ + + # Type is wrong. See: https://twistedmatrix.com/trac/ticket/10006 + protocol = ProxyClient # type: ignore[assignment] + + def __init__(self, command, rest, version, headers, data, father): + self.father = father + self.command = command + self.rest = rest + self.headers = headers + self.data = data + self.version = version + + def buildProtocol(self, addr): + return self.protocol( + self.command, self.rest, self.version, self.headers, self.data, self.father + ) + + def clientConnectionFailed(self, connector, reason): + """ + Report a connection failure in a response to the incoming request as + an error. + """ + self.father.setResponseCode(501, b"Gateway error") + self.father.responseHeaders.addRawHeader(b"Content-Type", b"text/html") + self.father.write(b"<H1>Could not connect</H1>") + self.father.finish() + + +class ProxyRequest(Request): + """ + Used by Proxy to implement a simple web proxy. + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + protocols = {b"http": ProxyClientFactory} + ports = {b"http": 80} + + def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor): + Request.__init__(self, channel, queued) + self.reactor = reactor + + def process(self): + parsed = urlparse(self.uri) + protocol = parsed[0] + host = parsed[1].decode("ascii") + port = self.ports[protocol] + if ":" in host: + host, port = host.split(":") + port = int(port) + rest = urlunparse((b"", b"") + parsed[2:]) + if not rest: + rest = rest + b"/" + class_ = self.protocols[protocol] + headers = self.getAllHeaders().copy() + if b"host" not in headers: + headers[b"host"] = host.encode("ascii") + self.content.seek(0, 0) + s = self.content.read() + clientFactory = class_(self.method, rest, self.clientproto, headers, s, self) + self.reactor.connectTCP(host, port, clientFactory) + + +class Proxy(HTTPChannel): + """ + This class implements a simple web proxy. + + Since it inherits from L{twisted.web.http.HTTPChannel}, to use it you + should do something like this:: + + from twisted.web import http + f = http.HTTPFactory() + f.protocol = Proxy + + Make the HTTPFactory a listener on a port as per usual, and you have + a fully-functioning web proxy! + """ + + requestFactory = ProxyRequest + + +class ReverseProxyRequest(Request): + """ + Used by ReverseProxy to implement a simple reverse proxy. + + @ivar proxyClientFactoryClass: a proxy client factory class, used to create + new connections. + @type proxyClientFactoryClass: L{ClientFactory} + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + proxyClientFactoryClass = ProxyClientFactory + + def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor): + Request.__init__(self, channel, queued) + self.reactor = reactor + + def process(self): + """ + Handle this request by connecting to the proxied server and forwarding + it there, then forwarding the response back as the response to this + request. + """ + self.requestHeaders.setRawHeaders(b"host", [self.factory.host.encode("ascii")]) + clientFactory = self.proxyClientFactoryClass( + self.method, + self.uri, + self.clientproto, + self.getAllHeaders(), + self.content.read(), + self, + ) + self.reactor.connectTCP(self.factory.host, self.factory.port, clientFactory) + + +class ReverseProxy(HTTPChannel): + """ + Implements a simple reverse proxy. + + For details of usage, see the file examples/reverse-proxy.py. + """ + + requestFactory = ReverseProxyRequest + + +class ReverseProxyResource(Resource): + """ + Resource that renders the results gotten from another server + + Put this resource in the tree to cause everything below it to be relayed + to a different server. + + @ivar proxyClientFactoryClass: a proxy client factory class, used to create + new connections. + @type proxyClientFactoryClass: L{ClientFactory} + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + proxyClientFactoryClass = ProxyClientFactory + + def __init__(self, host, port, path, reactor=reactor): + """ + @param host: the host of the web server to proxy. + @type host: C{str} + + @param port: the port of the web server to proxy. + @type port: C{port} + + @param path: the base path to fetch data from. Note that you shouldn't + put any trailing slashes in it, it will be added automatically in + request. For example, if you put B{/foo}, a request on B{/bar} will + be proxied to B{/foo/bar}. Any required encoding of special + characters (such as " " or "/") should have been done already. + + @type path: C{bytes} + """ + Resource.__init__(self) + self.host = host + self.port = port + self.path = path + self.reactor = reactor + + def getChild(self, path, request): + """ + Create and return a proxy resource with the same proxy configuration + as this one, except that its path also contains the segment given by + C{path} at the end. + """ + return ReverseProxyResource( + self.host, + self.port, + self.path + b"/" + urlquote(path, safe=b"").encode("utf-8"), + self.reactor, + ) + + def render(self, request): + """ + Render a request by forwarding it to the proxied server. + """ + # RFC 2616 tells us that we can omit the port if it's the default port, + # but we have to provide it otherwise + if self.port == 80: + host = self.host + else: + host = "%s:%d" % (self.host, self.port) + request.requestHeaders.setRawHeaders(b"host", [host.encode("ascii")]) + request.content.seek(0, 0) + qs = urlparse(request.uri)[4] + if qs: + rest = self.path + b"?" + qs + else: + rest = self.path + clientFactory = self.proxyClientFactoryClass( + request.method, + rest, + request.clientproto, + request.getAllHeaders(), + request.content.read(), + request, + ) + self.reactor.connectTCP(self.host, self.port, clientFactory) + return NOT_DONE_YET diff --git a/contrib/python/Twisted/py3/twisted/web/resource.py b/contrib/python/Twisted/py3/twisted/web/resource.py new file mode 100644 index 0000000000..33b172cdbf --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/resource.py @@ -0,0 +1,458 @@ +# -*- test-case-name: twisted.web.test.test_web, twisted.web.test.test_resource -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Implementation of the lowest-level Resource class. + +See L{twisted.web.pages} for some utility implementations. +""" + + +__all__ = [ + "IResource", + "getChildForRequest", + "Resource", + "ErrorPage", + "NoResource", + "ForbiddenResource", + "EncodingResourceWrapper", +] + +import warnings + +from zope.interface import Attribute, Interface, implementer + +from incremental import Version + +from twisted.python.compat import nativeString +from twisted.python.components import proxyForInterface +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.reflect import prefixedMethodNames +from twisted.web._responses import FORBIDDEN, NOT_FOUND +from twisted.web.error import UnsupportedMethod + + +class IResource(Interface): + """ + A web resource. + """ + + isLeaf = Attribute( + """ + Signal if this IResource implementor is a "leaf node" or not. If True, + getChildWithDefault will not be called on this Resource. + """ + ) + + def getChildWithDefault(name, request): + """ + Return a child with the given name for the given request. + This is the external interface used by the Resource publishing + machinery. If implementing IResource without subclassing + Resource, it must be provided. However, if subclassing Resource, + getChild overridden instead. + + @param name: A single path component from a requested URL. For example, + a request for I{http://example.com/foo/bar} will result in calls to + this method with C{b"foo"} and C{b"bar"} as values for this + argument. + @type name: C{bytes} + + @param request: A representation of all of the information about the + request that is being made for this child. + @type request: L{twisted.web.server.Request} + """ + + def putChild(path: bytes, child: "IResource") -> None: + """ + Put a child L{IResource} implementor at the given path. + + @param path: A single path component, to be interpreted relative to the + path this resource is found at, at which to put the given child. + For example, if resource A can be found at I{http://example.com/foo} + then a call like C{A.putChild(b"bar", B)} will make resource B + available at I{http://example.com/foo/bar}. + + The path component is I{not} URL-encoded -- pass C{b'foo bar'} + rather than C{b'foo%20bar'}. + """ + + def render(request): + """ + Render a request. This is called on the leaf resource for a request. + + @return: Either C{server.NOT_DONE_YET} to indicate an asynchronous or a + C{bytes} instance to write as the response to the request. If + C{NOT_DONE_YET} is returned, at some point later (for example, in a + Deferred callback) call C{request.write(b"<html>")} to write data to + the request, and C{request.finish()} to send the data to the + browser. + + @raise twisted.web.error.UnsupportedMethod: If the HTTP verb + requested is not supported by this resource. + """ + + +def getChildForRequest(resource, request): + """ + Traverse resource tree to find who will handle the request. + """ + while request.postpath and not resource.isLeaf: + pathElement = request.postpath.pop(0) + request.prepath.append(pathElement) + resource = resource.getChildWithDefault(pathElement, request) + return resource + + +@implementer(IResource) +class Resource: + """ + Define a web-accessible resource. + + This serves two main purposes: one is to provide a standard representation + for what HTTP specification calls an 'entity', and the other is to provide + an abstract directory structure for URL retrieval. + """ + + entityType = IResource + + server = None + + def __init__(self): + """ + Initialize. + """ + self.children = {} + + isLeaf = 0 + + ### Abstract Collection Interface + + def listStaticNames(self): + return list(self.children.keys()) + + def listStaticEntities(self): + return list(self.children.items()) + + def listNames(self): + return list(self.listStaticNames()) + self.listDynamicNames() + + def listEntities(self): + return list(self.listStaticEntities()) + self.listDynamicEntities() + + def listDynamicNames(self): + return [] + + def listDynamicEntities(self, request=None): + return [] + + def getStaticEntity(self, name): + return self.children.get(name) + + def getDynamicEntity(self, name, request): + if name not in self.children: + return self.getChild(name, request) + else: + return None + + def delEntity(self, name): + del self.children[name] + + def reallyPutEntity(self, name, entity): + self.children[name] = entity + + # Concrete HTTP interface + + def getChild(self, path, request): + """ + Retrieve a 'child' resource from me. + + Implement this to create dynamic resource generation -- resources which + are always available may be registered with self.putChild(). + + This will not be called if the class-level variable 'isLeaf' is set in + your subclass; instead, the 'postpath' attribute of the request will be + left as a list of the remaining path elements. + + For example, the URL /foo/bar/baz will normally be:: + + | site.resource.getChild('foo').getChild('bar').getChild('baz'). + + However, if the resource returned by 'bar' has isLeaf set to true, then + the getChild call will never be made on it. + + Parameters and return value have the same meaning and requirements as + those defined by L{IResource.getChildWithDefault}. + """ + return _UnsafeNoResource() + + def getChildWithDefault(self, path, request): + """ + Retrieve a static or dynamically generated child resource from me. + + First checks if a resource was added manually by putChild, and then + call getChild to check for dynamic resources. Only override if you want + to affect behaviour of all child lookups, rather than just dynamic + ones. + + This will check to see if I have a pre-registered child resource of the + given name, and call getChild if I do not. + + @see: L{IResource.getChildWithDefault} + """ + if path in self.children: + return self.children[path] + return self.getChild(path, request) + + def getChildForRequest(self, request): + """ + Deprecated in favor of L{getChildForRequest}. + + @see: L{twisted.web.resource.getChildForRequest}. + """ + warnings.warn( + "Please use module level getChildForRequest.", DeprecationWarning, 2 + ) + return getChildForRequest(self, request) + + def putChild(self, path: bytes, child: IResource) -> None: + """ + Register a static child. + + You almost certainly don't want '/' in your path. If you + intended to have the root of a folder, e.g. /foo/, you want + path to be ''. + + @param path: A single path component. + + @param child: The child resource to register. + + @see: L{IResource.putChild} + """ + if not isinstance(path, bytes): + raise TypeError(f"Path segment must be bytes, but {path!r} is {type(path)}") + + self.children[path] = child + # IResource is incomplete and doesn't mention this server attribute, see + # https://github.com/twisted/twisted/issues/11717 + child.server = self.server # type: ignore[attr-defined] + + def render(self, request): + """ + Render a given resource. See L{IResource}'s render method. + + I delegate to methods of self with the form 'render_METHOD' + where METHOD is the HTTP that was used to make the + request. Examples: render_GET, render_HEAD, render_POST, and + so on. Generally you should implement those methods instead of + overriding this one. + + render_METHOD methods are expected to return a byte string which will be + the rendered page, unless the return value is C{server.NOT_DONE_YET}, in + which case it is this class's responsibility to write the results using + C{request.write(data)} and then call C{request.finish()}. + + Old code that overrides render() directly is likewise expected + to return a byte string or NOT_DONE_YET. + + @see: L{IResource.render} + """ + m = getattr(self, "render_" + nativeString(request.method), None) + if not m: + try: + allowedMethods = self.allowedMethods + except AttributeError: + allowedMethods = _computeAllowedMethods(self) + raise UnsupportedMethod(allowedMethods) + return m(request) + + def render_HEAD(self, request): + """ + Default handling of HEAD method. + + I just return self.render_GET(request). When method is HEAD, + the framework will handle this correctly. + """ + return self.render_GET(request) + + +def _computeAllowedMethods(resource): + """ + Compute the allowed methods on a C{Resource} based on defined render_FOO + methods. Used when raising C{UnsupportedMethod} but C{Resource} does + not define C{allowedMethods} attribute. + """ + allowedMethods = [] + for name in prefixedMethodNames(resource.__class__, "render_"): + # Potentially there should be an API for encode('ascii') in this + # situation - an API for taking a Python native string (bytes on Python + # 2, text on Python 3) and returning a socket-compatible string type. + allowedMethods.append(name.encode("ascii")) + return allowedMethods + + +class _UnsafeErrorPage(Resource): + """ + L{_UnsafeErrorPage}, publicly available via the deprecated alias + C{ErrorPage}, is a resource which responds with a particular + (parameterized) status and a body consisting of HTML containing some + descriptive text. This is useful for rendering simple error pages. + + Deprecated in Twisted 22.10.0 because it permits HTML injection; use + L{twisted.web.pages.errorPage} instead. + + @ivar template: A native string which will have a dictionary interpolated + into it to generate the response body. The dictionary has the following + keys: + + - C{"code"}: The status code passed to L{_UnsafeErrorPage.__init__}. + - C{"brief"}: The brief description passed to + L{_UnsafeErrorPage.__init__}. + - C{"detail"}: The detailed description passed to + L{_UnsafeErrorPage.__init__}. + + @ivar code: An integer status code which will be used for the response. + @type code: C{int} + + @ivar brief: A short string which will be included in the response body as + the page title. + @type brief: C{str} + + @ivar detail: A longer string which will be included in the response body. + @type detail: C{str} + """ + + template = """ +<html> + <head><title>%(code)s - %(brief)s</title></head> + <body> + <h1>%(brief)s</h1> + <p>%(detail)s</p> + </body> +</html> +""" + + def __init__(self, status, brief, detail): + Resource.__init__(self) + self.code = status + self.brief = brief + self.detail = detail + + def render(self, request): + request.setResponseCode(self.code) + request.setHeader(b"content-type", b"text/html; charset=utf-8") + interpolated = self.template % dict( + code=self.code, brief=self.brief, detail=self.detail + ) + if isinstance(interpolated, str): + return interpolated.encode("utf-8") + return interpolated + + def getChild(self, chnam, request): + return self + + +class _UnsafeNoResource(_UnsafeErrorPage): + """ + L{_UnsafeNoResource}, publicly available via the deprecated alias + C{NoResource}, is a specialization of L{_UnsafeErrorPage} which + returns the HTTP response code I{NOT FOUND}. + + Deprecated in Twisted 22.10.0 because it permits HTML injection; use + L{twisted.web.pages.notFound} instead. + """ + + def __init__(self, message="Sorry. No luck finding that resource."): + _UnsafeErrorPage.__init__(self, NOT_FOUND, "No Such Resource", message) + + +class _UnsafeForbiddenResource(_UnsafeErrorPage): + """ + L{_UnsafeForbiddenResource}, publicly available via the deprecated alias + C{ForbiddenResource} is a specialization of L{_UnsafeErrorPage} which + returns the I{FORBIDDEN} HTTP response code. + + Deprecated in Twisted 22.10.0 because it permits HTML injection; use + L{twisted.web.pages.forbidden} instead. + """ + + def __init__(self, message="Sorry, resource is forbidden."): + _UnsafeErrorPage.__init__(self, FORBIDDEN, "Forbidden Resource", message) + + +# Deliberately undocumented public aliases. See GHSA-vg46-2rrj-3647. +ErrorPage = _UnsafeErrorPage +NoResource = _UnsafeNoResource +ForbiddenResource = _UnsafeForbiddenResource + +deprecatedModuleAttribute( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.errorPage instead, which properly escapes HTML.", + __name__, + "ErrorPage", +) + +deprecatedModuleAttribute( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.notFound instead, which properly escapes HTML.", + __name__, + "NoResource", +) + +deprecatedModuleAttribute( + Version("Twisted", 22, 10, 0), + "Use twisted.web.pages.forbidden instead, which properly escapes HTML.", + __name__, + "ForbiddenResource", +) + + +class _IEncodingResource(Interface): + """ + A resource which knows about L{_IRequestEncoderFactory}. + + @since: 12.3 + """ + + def getEncoder(request): + """ + Parse the request and return an encoder if applicable, using + L{_IRequestEncoderFactory.encoderForRequest}. + + @return: A L{_IRequestEncoder}, or L{None}. + """ + + +@implementer(_IEncodingResource) +class EncodingResourceWrapper(proxyForInterface(IResource)): # type: ignore[misc] + """ + Wrap a L{IResource}, potentially applying an encoding to the response body + generated. + + Note that the returned children resources won't be wrapped, so you have to + explicitly wrap them if you want the encoding to be applied. + + @ivar encoders: A list of + L{_IRequestEncoderFactory<twisted.web.iweb._IRequestEncoderFactory>} + returning L{_IRequestEncoder<twisted.web.iweb._IRequestEncoder>} that + may transform the data passed to C{Request.write}. The list must be + sorted in order of priority: the first encoder factory handling the + request will prevent the others from doing the same. + @type encoders: C{list}. + + @since: 12.3 + """ + + def __init__(self, original, encoders): + super().__init__(original) + self._encoders = encoders + + def getEncoder(self, request): + """ + Browser the list of encoders looking for one applicable encoder. + """ + for encoderFactory in self._encoders: + encoder = encoderFactory.encoderForRequest(request) + if encoder is not None: + return encoder diff --git a/contrib/python/Twisted/py3/twisted/web/rewrite.py b/contrib/python/Twisted/py3/twisted/web/rewrite.py new file mode 100644 index 0000000000..73f3c45b68 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/rewrite.py @@ -0,0 +1,55 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +# +from twisted.web import resource + + +class RewriterResource(resource.Resource): + def __init__(self, orig, *rewriteRules): + resource.Resource.__init__(self) + self.resource = orig + self.rewriteRules = list(rewriteRules) + + def _rewrite(self, request): + for rewriteRule in self.rewriteRules: + rewriteRule(request) + + def getChild(self, path, request): + request.postpath.insert(0, path) + request.prepath.pop() + self._rewrite(request) + path = request.postpath.pop(0) + request.prepath.append(path) + return self.resource.getChildWithDefault(path, request) + + def render(self, request): + self._rewrite(request) + return self.resource.render(request) + + +def tildeToUsers(request): + if request.postpath and request.postpath[0][:1] == "~": + request.postpath[:1] = ["users", request.postpath[0][1:]] + request.path = "/" + "/".join(request.prepath + request.postpath) + + +def alias(aliasPath, sourcePath): + """ + I am not a very good aliaser. But I'm the best I can be. If I'm + aliasing to a Resource that generates links, and it uses any parts + of request.prepath to do so, the links will not be relative to the + aliased path, but rather to the aliased-to path. That I can't + alias static.File directory listings that nicely. However, I can + still be useful, as many resources will play nice. + """ + sourcePath = sourcePath.split("/") + aliasPath = aliasPath.split("/") + + def rewriter(request): + if request.postpath[: len(aliasPath)] == aliasPath: + after = request.postpath[len(aliasPath) :] + request.postpath = sourcePath + after + request.path = "/" + "/".join(request.prepath + request.postpath) + + return rewriter diff --git a/contrib/python/Twisted/py3/twisted/web/script.py b/contrib/python/Twisted/py3/twisted/web/script.py new file mode 100644 index 0000000000..bc4a90f748 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/script.py @@ -0,0 +1,193 @@ +# -*- test-case-name: twisted.web.test.test_script -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I contain PythonScript, which is a very simple python script resource. +""" + + +import os +import traceback +from io import StringIO + +from twisted import copyright +from twisted.python.compat import execfile, networkString +from twisted.python.filepath import _coerceToFilesystemEncoding +from twisted.web import http, resource, server, static, util + +rpyNoResource = """<p>You forgot to assign to the variable "resource" in your script. For example:</p> +<pre> +# MyCoolWebApp.rpy + +import mygreatresource + +resource = mygreatresource.MyGreatResource() +</pre> +""" + + +class AlreadyCached(Exception): + """ + This exception is raised when a path has already been cached. + """ + + +class CacheScanner: + def __init__(self, path, registry): + self.path = path + self.registry = registry + self.doCache = 0 + + def cache(self): + c = self.registry.getCachedPath(self.path) + if c is not None: + raise AlreadyCached(c) + self.recache() + + def recache(self): + self.doCache = 1 + + +noRsrc = resource._UnsafeErrorPage(500, "Whoops! Internal Error", rpyNoResource) + + +def ResourceScript(path, registry): + """ + I am a normal py file which must define a 'resource' global, which should + be an instance of (a subclass of) web.resource.Resource; it will be + renderred. + """ + cs = CacheScanner(path, registry) + glob = { + "__file__": _coerceToFilesystemEncoding("", path), + "resource": noRsrc, + "registry": registry, + "cache": cs.cache, + "recache": cs.recache, + } + try: + execfile(path, glob, glob) + except AlreadyCached as ac: + return ac.args[0] + rsrc = glob["resource"] + if cs.doCache and rsrc is not noRsrc: + registry.cachePath(path, rsrc) + return rsrc + + +def ResourceTemplate(path, registry): + from quixote import ptl_compile # type: ignore[import] + + glob = { + "__file__": _coerceToFilesystemEncoding("", path), + "resource": resource._UnsafeErrorPage( + 500, "Whoops! Internal Error", rpyNoResource + ), + "registry": registry, + } + + with open(path) as f: # Not closed by quixote as of 2.9.1 + e = ptl_compile.compile_template(f, path) + code = compile(e, "<source>", "exec") + eval(code, glob, glob) + return glob["resource"] + + +class ResourceScriptWrapper(resource.Resource): + def __init__(self, path, registry=None): + resource.Resource.__init__(self) + self.path = path + self.registry = registry or static.Registry() + + def render(self, request): + res = ResourceScript(self.path, self.registry) + return res.render(request) + + def getChildWithDefault(self, path, request): + res = ResourceScript(self.path, self.registry) + return res.getChildWithDefault(path, request) + + +class ResourceScriptDirectory(resource.Resource): + """ + L{ResourceScriptDirectory} is a resource which serves scripts from a + filesystem directory. File children of a L{ResourceScriptDirectory} will + be served using L{ResourceScript}. Directory children will be served using + another L{ResourceScriptDirectory}. + + @ivar path: A C{str} giving the filesystem path in which children will be + looked up. + + @ivar registry: A L{static.Registry} instance which will be used to decide + how to interpret scripts found as children of this resource. + """ + + def __init__(self, pathname, registry=None): + resource.Resource.__init__(self) + self.path = pathname + self.registry = registry or static.Registry() + + def getChild(self, path, request): + fn = os.path.join(self.path, path) + + if os.path.isdir(fn): + return ResourceScriptDirectory(fn, self.registry) + if os.path.exists(fn): + return ResourceScript(fn, self.registry) + return resource._UnsafeNoResource() + + def render(self, request): + return resource._UnsafeNoResource().render(request) + + +class PythonScript(resource.Resource): + """ + I am an extremely simple dynamic resource; an embedded python script. + + This will execute a file (usually of the extension '.epy') as Python code, + internal to the webserver. + """ + + isLeaf = True + + def __init__(self, filename, registry): + """ + Initialize me with a script name. + """ + self.filename = filename + self.registry = registry + + def render(self, request): + """ + Render me to a web client. + + Load my file, execute it in a special namespace (with 'request' and + '__file__' global vars) and finish the request. Output to the web-page + will NOT be handled with print - standard output goes to the log - but + with request.write. + """ + request.setHeader( + b"x-powered-by", networkString("Twisted/%s" % copyright.version) + ) + namespace = { + "request": request, + "__file__": _coerceToFilesystemEncoding("", self.filename), + "registry": self.registry, + } + try: + execfile(self.filename, namespace, namespace) + except OSError as e: + if e.errno == 2: # file not found + request.setResponseCode(http.NOT_FOUND) + request.write( + resource._UnsafeNoResource("File not found.").render(request) + ) + except BaseException: + io = StringIO() + traceback.print_exc(file=io) + output = util._PRE(io.getvalue()) + output = output.encode("utf8") + request.write(output) + request.finish() + return server.NOT_DONE_YET diff --git a/contrib/python/Twisted/py3/twisted/web/server.py b/contrib/python/Twisted/py3/twisted/web/server.py new file mode 100644 index 0000000000..e8e01ec781 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/server.py @@ -0,0 +1,906 @@ +# -*- test-case-name: twisted.web.test.test_web -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +This is a web server which integrates with the twisted.internet infrastructure. + +@var NOT_DONE_YET: A token value which L{twisted.web.resource.IResource.render} + implementations can return to indicate that the application will later call + C{.write} and C{.finish} to complete the request, and that the HTTP + connection should be left open. +@type NOT_DONE_YET: Opaque; do not depend on any particular type for this + value. +""" + + +import copy +import os +import re +import zlib +from binascii import hexlify +from html import escape +from typing import List, Optional +from urllib.parse import quote as _quote + +from zope.interface import implementer + +from incremental import Version + +from twisted import copyright +from twisted.internet import address, interfaces +from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from twisted.logger import Logger +from twisted.python import components, failure, reflect +from twisted.python.compat import nativeString, networkString +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.spread.pb import Copyable, ViewPoint +from twisted.web import http, iweb, resource, util +from twisted.web.error import UnsupportedMethod +from twisted.web.http import unquote + +NOT_DONE_YET = 1 + +__all__ = [ + "supportedMethods", + "Request", + "Session", + "Site", + "version", + "NOT_DONE_YET", + "GzipEncoderFactory", +] + + +# backwards compatibility +deprecatedModuleAttribute( + Version("Twisted", 12, 1, 0), + "Please use twisted.web.http.datetimeToString instead", + "twisted.web.server", + "date_time_string", +) +deprecatedModuleAttribute( + Version("Twisted", 12, 1, 0), + "Please use twisted.web.http.stringToDatetime instead", + "twisted.web.server", + "string_date_time", +) +date_time_string = http.datetimeToString +string_date_time = http.stringToDatetime + +# Support for other methods may be implemented on a per-resource basis. +supportedMethods = (b"GET", b"HEAD", b"POST") + + +def quote(string, *args, **kwargs): + return _quote(string.decode("charmap"), *args, **kwargs).encode("charmap") + + +def _addressToTuple(addr): + if isinstance(addr, address.IPv4Address): + return ("INET", addr.host, addr.port) + elif isinstance(addr, address.UNIXAddress): + return ("UNIX", addr.name) + else: + return tuple(addr) + + +@implementer(iweb.IRequest) +class Request(Copyable, http.Request, components.Componentized): + """ + An HTTP request. + + @ivar defaultContentType: A L{bytes} giving the default I{Content-Type} + value to send in responses if no other value is set. L{None} disables + the default. + + @ivar _insecureSession: The L{Session} object representing state that will + be transmitted over plain-text HTTP. + + @ivar _secureSession: The L{Session} object representing the state that + will be transmitted only over HTTPS. + """ + + defaultContentType = b"text/html" + + site = None + appRootURL = None + prepath: Optional[List[bytes]] = None + postpath: Optional[List[bytes]] = None + __pychecker__ = "unusednames=issuer" + _inFakeHead = False + _encoder = None + _log = Logger() + + def __init__(self, *args, **kw): + http.Request.__init__(self, *args, **kw) + components.Componentized.__init__(self) + + def getStateToCopyFor(self, issuer): + x = self.__dict__.copy() + del x["transport"] + # XXX refactor this attribute out; it's from protocol + # del x['server'] + del x["channel"] + del x["content"] + del x["site"] + self.content.seek(0, 0) + x["content_data"] = self.content.read() + x["remote"] = ViewPoint(issuer, self) + + # Address objects aren't jellyable + x["host"] = _addressToTuple(x["host"]) + x["client"] = _addressToTuple(x["client"]) + + # Header objects also aren't jellyable. + x["requestHeaders"] = list(x["requestHeaders"].getAllRawHeaders()) + + return x + + # HTML generation helpers + + def sibLink(self, name): + """ + Return the text that links to a sibling of the requested resource. + + @param name: The sibling resource + @type name: C{bytes} + + @return: A relative URL. + @rtype: C{bytes} + """ + if self.postpath: + return (len(self.postpath) * b"../") + name + else: + return name + + def childLink(self, name): + """ + Return the text that links to a child of the requested resource. + + @param name: The child resource + @type name: C{bytes} + + @return: A relative URL. + @rtype: C{bytes} + """ + lpp = len(self.postpath) + if lpp > 1: + return ((lpp - 1) * b"../") + name + elif lpp == 1: + return name + else: # lpp == 0 + if len(self.prepath) and self.prepath[-1]: + return self.prepath[-1] + b"/" + name + else: + return name + + def gotLength(self, length): + """ + Called when HTTP channel got length of content in this request. + + This method is not intended for users. + + @param length: The length of the request body, as indicated by the + request headers. L{None} if the request headers do not indicate a + length. + """ + try: + getContentFile = self.channel.site.getContentFile + except AttributeError: + http.Request.gotLength(self, length) + else: + self.content = getContentFile(length) + + def process(self): + """ + Process a request. + + Find the addressed resource in this request's L{Site}, + and call L{self.render()<Request.render()>} with it. + + @see: L{Site.getResourceFor()} + """ + + # get site from channel + self.site = self.channel.site + + # set various default headers + self.setHeader(b"server", version) + self.setHeader(b"date", http.datetimeToString()) + + # Resource Identification + self.prepath = [] + self.postpath = list(map(unquote, self.path[1:].split(b"/"))) + + # Short-circuit for requests whose path is '*'. + if self.path == b"*": + self._handleStar() + return + + try: + resrc = self.site.getResourceFor(self) + if resource._IEncodingResource.providedBy(resrc): + encoder = resrc.getEncoder(self) + if encoder is not None: + self._encoder = encoder + self.render(resrc) + except BaseException: + self.processingFailed(failure.Failure()) + + def write(self, data): + """ + Write data to the transport (if not responding to a HEAD request). + + @param data: A string to write to the response. + @type data: L{bytes} + """ + if not self.startedWriting: + # Before doing the first write, check to see if a default + # Content-Type header should be supplied. We omit it on + # NOT_MODIFIED and NO_CONTENT responses. We also omit it if there + # is a Content-Length header set to 0, as empty bodies don't need + # a content-type. + needsCT = self.code not in (http.NOT_MODIFIED, http.NO_CONTENT) + contentType = self.responseHeaders.getRawHeaders(b"content-type") + contentLength = self.responseHeaders.getRawHeaders(b"content-length") + contentLengthZero = contentLength and (contentLength[0] == b"0") + + if ( + needsCT + and contentType is None + and self.defaultContentType is not None + and not contentLengthZero + ): + self.responseHeaders.setRawHeaders( + b"content-type", [self.defaultContentType] + ) + + # Only let the write happen if we're not generating a HEAD response by + # faking out the request method. Note, if we are doing that, + # startedWriting will never be true, and the above logic may run + # multiple times. It will only actually change the responseHeaders + # once though, so it's still okay. + if not self._inFakeHead: + if self._encoder: + data = self._encoder.encode(data) + http.Request.write(self, data) + + def finish(self): + """ + Override C{http.Request.finish} for possible encoding. + """ + if self._encoder: + data = self._encoder.finish() + if data: + http.Request.write(self, data) + return http.Request.finish(self) + + def render(self, resrc): + """ + Ask a resource to render itself. + + If the resource does not support the requested method, + generate a C{NOT IMPLEMENTED} or C{NOT ALLOWED} response. + + @param resrc: The resource to render. + @type resrc: L{twisted.web.resource.IResource} + + @see: L{IResource.render()<twisted.web.resource.IResource.render()>} + """ + try: + body = resrc.render(self) + except UnsupportedMethod as e: + allowedMethods = e.allowedMethods + if (self.method == b"HEAD") and (b"GET" in allowedMethods): + # We must support HEAD (RFC 2616, 5.1.1). If the + # resource doesn't, fake it by giving the resource + # a 'GET' request and then return only the headers, + # not the body. + self._log.info( + "Using GET to fake a HEAD request for {resrc}", resrc=resrc + ) + self.method = b"GET" + self._inFakeHead = True + body = resrc.render(self) + + if body is NOT_DONE_YET: + self._log.info( + "Tried to fake a HEAD request for {resrc}, but " + "it got away from me.", + resrc=resrc, + ) + # Oh well, I guess we won't include the content length. + else: + self.setHeader(b"content-length", b"%d" % (len(body),)) + + self._inFakeHead = False + self.method = b"HEAD" + self.write(b"") + self.finish() + return + + if self.method in (supportedMethods): + # We MUST include an Allow header + # (RFC 2616, 10.4.6 and 14.7) + self.setHeader(b"Allow", b", ".join(allowedMethods)) + s = ( + """Your browser approached me (at %(URI)s) with""" + """ the method "%(method)s". I only allow""" + """ the method%(plural)s %(allowed)s here.""" + % { + "URI": escape(nativeString(self.uri)), + "method": nativeString(self.method), + "plural": ((len(allowedMethods) > 1) and "s") or "", + "allowed": ", ".join([nativeString(x) for x in allowedMethods]), + } + ) + epage = resource._UnsafeErrorPage( + http.NOT_ALLOWED, "Method Not Allowed", s + ) + body = epage.render(self) + else: + epage = resource._UnsafeErrorPage( + http.NOT_IMPLEMENTED, + "Huh?", + "I don't know how to treat a %s request." + % (escape(self.method.decode("charmap")),), + ) + body = epage.render(self) + # end except UnsupportedMethod + + if body is NOT_DONE_YET: + return + if not isinstance(body, bytes): + body = resource._UnsafeErrorPage( + http.INTERNAL_SERVER_ERROR, + "Request did not return bytes", + "Request: " + # GHSA-vg46-2rrj-3647 note: _PRE does HTML-escape the input. + + util._PRE(reflect.safe_repr(self)) + + "<br />" + + "Resource: " + + util._PRE(reflect.safe_repr(resrc)) + + "<br />" + + "Value: " + + util._PRE(reflect.safe_repr(body)), + ).render(self) + + if self.method == b"HEAD": + if len(body) > 0: + # This is a Bad Thing (RFC 2616, 9.4) + self._log.info( + "Warning: HEAD request {slf} for resource {resrc} is" + " returning a message body. I think I'll eat it.", + slf=self, + resrc=resrc, + ) + self.setHeader(b"content-length", b"%d" % (len(body),)) + self.write(b"") + else: + self.setHeader(b"content-length", b"%d" % (len(body),)) + self.write(body) + self.finish() + + def processingFailed(self, reason): + """ + Finish this request with an indication that processing failed and + possibly display a traceback. + + @param reason: Reason this request has failed. + @type reason: L{twisted.python.failure.Failure} + + @return: The reason passed to this method. + @rtype: L{twisted.python.failure.Failure} + """ + self._log.failure("", failure=reason) + if self.site.displayTracebacks: + body = ( + b"<html><head><title>web.Server Traceback" + b" (most recent call last)</title></head>" + b"<body><b>web.Server Traceback" + b" (most recent call last):</b>\n\n" + + util.formatFailure(reason) + + b"\n\n</body></html>\n" + ) + else: + body = ( + b"<html><head><title>Processing Failed" + b"</title></head><body>" + b"<b>Processing Failed</b></body></html>" + ) + + self.setResponseCode(http.INTERNAL_SERVER_ERROR) + self.setHeader(b"content-type", b"text/html") + self.setHeader(b"content-length", b"%d" % (len(body),)) + self.write(body) + self.finish() + return reason + + def view_write(self, issuer, data): + """Remote version of write; same interface.""" + self.write(data) + + def view_finish(self, issuer): + """Remote version of finish; same interface.""" + self.finish() + + def view_addCookie(self, issuer, k, v, **kwargs): + """Remote version of addCookie; same interface.""" + self.addCookie(k, v, **kwargs) + + def view_setHeader(self, issuer, k, v): + """Remote version of setHeader; same interface.""" + self.setHeader(k, v) + + def view_setLastModified(self, issuer, when): + """Remote version of setLastModified; same interface.""" + self.setLastModified(when) + + def view_setETag(self, issuer, tag): + """Remote version of setETag; same interface.""" + self.setETag(tag) + + def view_setResponseCode(self, issuer, code, message=None): + """ + Remote version of setResponseCode; same interface. + """ + self.setResponseCode(code, message) + + def view_registerProducer(self, issuer, producer, streaming): + """Remote version of registerProducer; same interface. + (requires a remote producer.) + """ + self.registerProducer(_RemoteProducerWrapper(producer), streaming) + + def view_unregisterProducer(self, issuer): + self.unregisterProducer() + + ### these calls remain local + + _secureSession = None + _insecureSession = None + + @property + def session(self): + """ + If a session has already been created or looked up with + L{Request.getSession}, this will return that object. (This will always + be the session that matches the security of the request; so if + C{forceNotSecure} is used on a secure request, this will not return + that session.) + + @return: the session attribute + @rtype: L{Session} or L{None} + """ + if self.isSecure(): + return self._secureSession + else: + return self._insecureSession + + def getSession(self, sessionInterface=None, forceNotSecure=False): + """ + Check if there is a session cookie, and if not, create it. + + By default, the cookie with be secure for HTTPS requests and not secure + for HTTP requests. If for some reason you need access to the insecure + cookie from a secure request you can set C{forceNotSecure = True}. + + @param forceNotSecure: Should we retrieve a session that will be + transmitted over HTTP, even if this L{Request} was delivered over + HTTPS? + @type forceNotSecure: L{bool} + """ + # Make sure we aren't creating a secure session on a non-secure page + secure = self.isSecure() and not forceNotSecure + + if not secure: + cookieString = b"TWISTED_SESSION" + sessionAttribute = "_insecureSession" + else: + cookieString = b"TWISTED_SECURE_SESSION" + sessionAttribute = "_secureSession" + + session = getattr(self, sessionAttribute) + + if session is not None: + # We have a previously created session. + try: + # Refresh the session, to keep it alive. + session.touch() + except (AlreadyCalled, AlreadyCancelled): + # Session has already expired. + session = None + + if session is None: + # No session was created yet for this request. + cookiename = b"_".join([cookieString] + self.sitepath) + sessionCookie = self.getCookie(cookiename) + if sessionCookie: + try: + session = self.site.getSession(sessionCookie) + except KeyError: + pass + # if it still hasn't been set, fix it up. + if not session: + session = self.site.makeSession() + self.addCookie(cookiename, session.uid, path=b"/", secure=secure) + + setattr(self, sessionAttribute, session) + + if sessionInterface: + return session.getComponent(sessionInterface) + + return session + + def _prePathURL(self, prepath): + port = self.getHost().port + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostport = "" + else: + hostport = ":%d" % port + prefix = networkString( + "http%s://%s%s/" + % ( + self.isSecure() and "s" or "", + nativeString(self.getRequestHostname()), + hostport, + ) + ) + path = b"/".join([quote(segment, safe=b"") for segment in prepath]) + return prefix + path + + def prePathURL(self): + return self._prePathURL(self.prepath) + + def URLPath(self): + from twisted.python import urlpath + + return urlpath.URLPath.fromRequest(self) + + def rememberRootURL(self): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + url = self._prePathURL(self.prepath[:-1]) + self.appRootURL = url + + def getRootURL(self): + """ + Get a previously-remembered URL. + + @return: An absolute URL. + @rtype: L{bytes} + """ + return self.appRootURL + + def _handleStar(self): + """ + Handle receiving a request whose path is '*'. + + RFC 7231 defines an OPTIONS * request as being something that a client + can send as a low-effort way to probe server capabilities or readiness. + Rather than bother the user with this, we simply fast-path it back to + an empty 200 OK. Any non-OPTIONS verb gets a 405 Method Not Allowed + telling the client they can only use OPTIONS. + """ + if self.method == b"OPTIONS": + self.setResponseCode(http.OK) + else: + self.setResponseCode(http.NOT_ALLOWED) + self.setHeader(b"Allow", b"OPTIONS") + + # RFC 7231 says we MUST set content-length 0 when responding to this + # with no body. + self.setHeader(b"Content-Length", b"0") + self.finish() + + +@implementer(iweb._IRequestEncoderFactory) +class GzipEncoderFactory: + """ + @cvar compressLevel: The compression level used by the compressor, default + to 9 (highest). + + @since: 12.3 + """ + + _gzipCheckRegex = re.compile(rb"(:?^|[\s,])gzip(:?$|[\s,])") + compressLevel = 9 + + def encoderForRequest(self, request): + """ + Check the headers if the client accepts gzip encoding, and encodes the + request if so. + """ + acceptHeaders = b",".join( + request.requestHeaders.getRawHeaders(b"accept-encoding", []) + ) + if self._gzipCheckRegex.search(acceptHeaders): + encoding = request.responseHeaders.getRawHeaders(b"content-encoding") + if encoding: + encoding = b",".join(encoding + [b"gzip"]) + else: + encoding = b"gzip" + + request.responseHeaders.setRawHeaders(b"content-encoding", [encoding]) + return _GzipEncoder(self.compressLevel, request) + + +@implementer(iweb._IRequestEncoder) +class _GzipEncoder: + """ + An encoder which supports gzip. + + @ivar _zlibCompressor: The zlib compressor instance used to compress the + stream. + + @ivar _request: A reference to the originating request. + + @since: 12.3 + """ + + _zlibCompressor = None + + def __init__(self, compressLevel, request): + self._zlibCompressor = zlib.compressobj( + compressLevel, zlib.DEFLATED, 16 + zlib.MAX_WBITS + ) + self._request = request + + def encode(self, data): + """ + Write to the request, automatically compressing data on the fly. + """ + if not self._request.startedWriting: + # Remove the content-length header, we can't honor it + # because we compress on the fly. + self._request.responseHeaders.removeHeader(b"content-length") + return self._zlibCompressor.compress(data) + + def finish(self): + """ + Finish handling the request request, flushing any data from the zlib + buffer. + """ + remain = self._zlibCompressor.flush() + self._zlibCompressor = None + return remain + + +class _RemoteProducerWrapper: + def __init__(self, remote): + self.resumeProducing = remote.remoteMethod("resumeProducing") + self.pauseProducing = remote.remoteMethod("pauseProducing") + self.stopProducing = remote.remoteMethod("stopProducing") + + +class Session(components.Componentized): + """ + A user's session with a system. + + This utility class contains no functionality, but is used to + represent a session. + + @ivar site: The L{Site} that generated the session. + @type site: L{Site} + + @ivar uid: A unique identifier for the session. + @type uid: L{bytes} + + @ivar _reactor: An object providing L{IReactorTime} to use for scheduling + expiration. + + @ivar sessionTimeout: Time after last modification the session will expire, + in seconds. + @type sessionTimeout: L{float} + + @ivar lastModified: Time the C{touch()} method was last called (or time the + session was created). A UNIX timestamp as returned by + L{IReactorTime.seconds()}. + @type lastModified: L{float} + """ + + sessionTimeout = 900 + + _expireCall = None + + def __init__(self, site, uid, reactor=None): + """ + Initialize a session with a unique ID for that session. + + @param reactor: L{IReactorTime} used to schedule expiration of the + session. If C{None}, the reactor associated with I{site} is used. + """ + super().__init__() + + if reactor is None: + reactor = site.reactor + self._reactor = reactor + + self.site = site + self.uid = uid + self.expireCallbacks = [] + self.touch() + self.sessionNamespaces = {} + + def startCheckingExpiration(self): + """ + Start expiration tracking. + + @return: L{None} + """ + self._expireCall = self._reactor.callLater(self.sessionTimeout, self.expire) + + def notifyOnExpire(self, callback): + """ + Call this callback when the session expires or logs out. + """ + self.expireCallbacks.append(callback) + + def expire(self): + """ + Expire/logout of the session. + """ + del self.site.sessions[self.uid] + for c in self.expireCallbacks: + c() + self.expireCallbacks = [] + if self._expireCall and self._expireCall.active(): + self._expireCall.cancel() + # Break reference cycle. + self._expireCall = None + + def touch(self): + """ + Mark the session as modified, which resets expiration timer. + """ + self.lastModified = self._reactor.seconds() + if self._expireCall is not None: + self._expireCall.reset(self.sessionTimeout) + + +version = networkString(f"TwistedWeb/{copyright.version}") + + +@implementer(interfaces.IProtocolNegotiationFactory) +class Site(http.HTTPFactory): + """ + A web site: manage log, sessions, and resources. + + @ivar requestFactory: A factory which is called with (channel) + and creates L{Request} instances. Default to L{Request}. + + @ivar displayTracebacks: If set, unhandled exceptions raised during + rendering are returned to the client as HTML. Default to C{False}. + + @ivar sessionFactory: factory for sessions objects. Default to L{Session}. + + @ivar sessions: Mapping of session IDs to objects returned by + C{sessionFactory}. + @type sessions: L{dict} mapping L{bytes} to L{Session} given the default + C{sessionFactory} + + @ivar counter: The number of sessions that have been generated. + @type counter: L{int} + + @ivar sessionCheckTime: Deprecated and unused. See + L{Session.sessionTimeout} instead. + """ + + counter = 0 + requestFactory = Request + displayTracebacks = False + sessionFactory = Session + sessionCheckTime = 1800 + _entropy = os.urandom + + def __init__(self, resource, requestFactory=None, *args, **kwargs): + """ + @param resource: The root of the resource hierarchy. All request + traversal for requests received by this factory will begin at this + resource. + @type resource: L{IResource} provider + @param requestFactory: Overwrite for default requestFactory. + @type requestFactory: C{callable} or C{class}. + + @see: L{twisted.web.http.HTTPFactory.__init__} + """ + super().__init__(*args, **kwargs) + self.sessions = {} + self.resource = resource + if requestFactory is not None: + self.requestFactory = requestFactory + + def _openLogFile(self, path): + from twisted.python import logfile + + return logfile.LogFile(os.path.basename(path), os.path.dirname(path)) + + def __getstate__(self): + d = self.__dict__.copy() + d["sessions"] = {} + return d + + def _mkuid(self): + """ + (internal) Generate an opaque, unique ID for a user's session. + """ + self.counter = self.counter + 1 + return hexlify(self._entropy(32)) + + def makeSession(self): + """ + Generate a new Session instance, and store it for future reference. + """ + uid = self._mkuid() + session = self.sessions[uid] = self.sessionFactory(self, uid) + session.startCheckingExpiration() + return session + + def getSession(self, uid): + """ + Get a previously generated session. + + @param uid: Unique ID of the session. + @type uid: L{bytes}. + + @raise KeyError: If the session is not found. + """ + return self.sessions[uid] + + def buildProtocol(self, addr): + """ + Generate a channel attached to this site. + """ + channel = super().buildProtocol(addr) + channel.requestFactory = self.requestFactory + channel.site = self + return channel + + isLeaf = 0 + + def render(self, request): + """ + Redirect because a Site is always a directory. + """ + request.redirect(request.prePathURL() + b"/") + request.finish() + + def getChildWithDefault(self, pathEl, request): + """ + Emulate a resource's getChild method. + """ + request.site = self + return self.resource.getChildWithDefault(pathEl, request) + + def getResourceFor(self, request): + """ + Get a resource for a request. + + This iterates through the resource hierarchy, calling + getChildWithDefault on each resource it finds for a path element, + stopping when it hits an element where isLeaf is true. + """ + request.site = self + # Sitepath is used to determine cookie names between distributed + # servers and disconnected sites. + request.sitepath = copy.copy(request.prepath) + return resource.getChildForRequest(self.resource, request) + + # IProtocolNegotiationFactory + def acceptableProtocols(self): + """ + Protocols this server can speak. + """ + baseProtocols = [b"http/1.1"] + + if http.H2_ENABLED: + baseProtocols.insert(0, b"h2") + + return baseProtocols diff --git a/contrib/python/Twisted/py3/twisted/web/soap.py b/contrib/python/Twisted/py3/twisted/web/soap.py new file mode 100644 index 0000000000..c60bc92b91 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/soap.py @@ -0,0 +1,166 @@ +# -*- test-case-name: twisted.web.test.test_soap -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +SOAP support for twisted.web. + +Requires SOAPpy 0.10.1 or later. + +Maintainer: Itamar Shtull-Trauring + +Future plans: +SOAPContext support of some kind. +Pluggable method lookup policies. +""" + +# SOAPpy +import SOAPpy # type: ignore[import] + +from twisted.internet import defer + +# twisted imports +from twisted.web import client, resource, server + + +class SOAPPublisher(resource.Resource): + """Publish SOAP methods. + + By default, publish methods beginning with 'soap_'. If the method + has an attribute 'useKeywords', it well get the arguments passed + as keyword args. + """ + + isLeaf = 1 + + # override to change the encoding used for responses + encoding = "UTF-8" + + def lookupFunction(self, functionName): + """Lookup published SOAP function. + + Override in subclasses. Default behaviour - publish methods + starting with soap_. + + @return: callable or None if not found. + """ + return getattr(self, "soap_%s" % functionName, None) + + def render(self, request): + """Handle a SOAP command.""" + data = request.content.read() + + p, header, body, attrs = SOAPpy.parseSOAPRPC(data, 1, 1, 1) + + methodName, args, kwargs = p._name, p._aslist, p._asdict + + # deal with changes in SOAPpy 0.11 + if callable(args): + args = args() + if callable(kwargs): + kwargs = kwargs() + + function = self.lookupFunction(methodName) + + if not function: + self._methodNotFound(request, methodName) + return server.NOT_DONE_YET + else: + if hasattr(function, "useKeywords"): + keywords = {} + for k, v in kwargs.items(): + keywords[str(k)] = v + d = defer.maybeDeferred(function, **keywords) + else: + d = defer.maybeDeferred(function, *args) + + d.addCallback(self._gotResult, request, methodName) + d.addErrback(self._gotError, request, methodName) + return server.NOT_DONE_YET + + def _methodNotFound(self, request, methodName): + response = SOAPpy.buildSOAP( + SOAPpy.faultType( + "%s:Client" % SOAPpy.NS.ENV_T, "Method %s not found" % methodName + ), + encoding=self.encoding, + ) + self._sendResponse(request, response, status=500) + + def _gotResult(self, result, request, methodName): + if not isinstance(result, SOAPpy.voidType): + result = {"Result": result} + response = SOAPpy.buildSOAP( + kw={"%sResponse" % methodName: result}, encoding=self.encoding + ) + self._sendResponse(request, response) + + def _gotError(self, failure, request, methodName): + e = failure.value + if isinstance(e, SOAPpy.faultType): + fault = e + else: + fault = SOAPpy.faultType( + "%s:Server" % SOAPpy.NS.ENV_T, "Method %s failed." % methodName + ) + response = SOAPpy.buildSOAP(fault, encoding=self.encoding) + self._sendResponse(request, response, status=500) + + def _sendResponse(self, request, response, status=200): + request.setResponseCode(status) + + if self.encoding is not None: + mimeType = 'text/xml; charset="%s"' % self.encoding + else: + mimeType = "text/xml" + request.setHeader("Content-type", mimeType) + request.setHeader("Content-length", str(len(response))) + request.write(response) + request.finish() + + +class Proxy: + """A Proxy for making remote SOAP calls. + + Pass the URL of the remote SOAP server to the constructor. + + Use proxy.callRemote('foobar', 1, 2) to call remote method + 'foobar' with args 1 and 2, proxy.callRemote('foobar', x=1) + will call foobar with named argument 'x'. + """ + + # at some point this should have encoding etc. kwargs + def __init__(self, url, namespace=None, header=None): + self.url = url + self.namespace = namespace + self.header = header + + def _cbGotResult(self, result): + result = SOAPpy.parseSOAPRPC(result) + if hasattr(result, "Result"): + return result.Result + elif len(result) == 1: + ## SOAPpy 0.11.6 wraps the return results in a containing structure. + ## This check added to make Proxy behaviour emulate SOAPProxy, which + ## flattens the structure by default. + ## This behaviour is OK because even singleton lists are wrapped in + ## another singleton structType, which is almost always useless. + return result[0] + else: + return result + + def callRemote(self, method, *args, **kwargs): + payload = SOAPpy.buildSOAP( + args=args, + kw=kwargs, + method=method, + header=self.header, + namespace=self.namespace, + ) + return client.getPage( + self.url, + postdata=payload, + method="POST", + headers={"content-type": "text/xml", "SOAPAction": method}, + ).addCallback(self._cbGotResult) diff --git a/contrib/python/Twisted/py3/twisted/web/static.py b/contrib/python/Twisted/py3/twisted/web/static.py new file mode 100644 index 0000000000..aeffd03fb1 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/static.py @@ -0,0 +1,1078 @@ +# -*- test-case-name: twisted.web.test.test_static -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Static resources for L{twisted.web}. +""" +from __future__ import annotations + +import errno +import itertools +import mimetypes +import os +import time +import warnings +from html import escape +from typing import Any, Callable, Dict, Sequence +from urllib.parse import quote, unquote + +from zope.interface import implementer + +from incremental import Version +from typing_extensions import Literal + +from twisted.internet import abstract, interfaces +from twisted.python import components, filepath, log +from twisted.python.compat import nativeString, networkString +from twisted.python.deprecate import deprecated +from twisted.python.runtime import platformType +from twisted.python.url import URL +from twisted.python.util import InsensitiveDict +from twisted.web import http, resource, server +from twisted.web.util import redirectTo + +dangerousPathError = resource._UnsafeNoResource("Invalid request URL.") + + +def isDangerous(path): + return path == b".." or b"/" in path or networkString(os.sep) in path + + +class Data(resource.Resource): + """ + This is a static, in-memory resource. + """ + + def __init__(self, data, type): + """ + @param data: The bytes that make up this data resource. + @type data: L{bytes} + + @param type: A native string giving the Internet media type for this + content. + @type type: L{str} + """ + resource.Resource.__init__(self) + self.data = data + self.type = type + + def render_GET(self, request): + request.setHeader(b"content-type", networkString(self.type)) + request.setHeader(b"content-length", b"%d" % (len(self.data),)) + if request.method == b"HEAD": + return b"" + return self.data + + render_HEAD = render_GET + + +@deprecated(Version("Twisted", 16, 0, 0)) +def addSlash(request): + """ + Add a trailing slash to C{request}'s URI. Deprecated, do not use. + """ + return _addSlash(request) + + +def _addSlash(request): + """ + Add a trailing slash to C{request}'s URI. + + @param request: The incoming request to add the ending slash to. + @type request: An object conforming to L{twisted.web.iweb.IRequest} + + @return: A URI with a trailing slash, with query and fragment preserved. + @rtype: L{bytes} + """ + url = URL.fromText(request.uri.decode("ascii")) + # Add an empty path segment at the end, so that it adds a trailing slash + url = url.replace(path=list(url.path) + [""]) + return url.asText().encode("ascii") + + +class Redirect(resource.Resource): + def __init__(self, request): + resource.Resource.__init__(self) + self.url = _addSlash(request) + + def render(self, request): + return redirectTo(self.url, request) + + +class Registry(components.Componentized): + """ + I am a Componentized object that will be made available to internal Twisted + file-based dynamic web content such as .rpy and .epy scripts. + """ + + def __init__(self): + components.Componentized.__init__(self) + self._pathCache = {} + + def cachePath(self, path, rsrc): + self._pathCache[path] = rsrc + + def getCachedPath(self, path): + return self._pathCache.get(path) + + +def loadMimeTypes(mimetype_locations=None, init=mimetypes.init): + """ + Produces a mapping of extensions (with leading dot) to MIME types. + + It does this by calling the C{init} function of the L{mimetypes} module. + This will have the side effect of modifying the global MIME types cache + in that module. + + Multiple file locations containing mime-types can be passed as a list. + The files will be sourced in that order, overriding mime-types from the + files sourced beforehand, but only if a new entry explicitly overrides + the current entry. + + @param mimetype_locations: Optional. List of paths to C{mime.types} style + files that should be used. + @type mimetype_locations: iterable of paths or L{None} + @param init: The init function to call. Defaults to the global C{init} + function of the C{mimetypes} module. For internal use (testing) only. + @type init: callable + """ + init(mimetype_locations) + mimetypes.types_map.update( + { + ".conf": "text/plain", + ".diff": "text/plain", + ".flac": "audio/x-flac", + ".java": "text/plain", + ".oz": "text/x-oz", + ".swf": "application/x-shockwave-flash", + ".wml": "text/vnd.wap.wml", + ".xul": "application/vnd.mozilla.xul+xml", + ".patch": "text/plain", + } + ) + return mimetypes.types_map + + +def getTypeAndEncoding(filename, types, encodings, defaultType): + p, ext = filepath.FilePath(filename).splitext() + ext = filepath._coerceToFilesystemEncoding("", ext.lower()) + if ext in encodings: + enc = encodings[ext] + ext = os.path.splitext(p)[1].lower() + else: + enc = None + type = types.get(ext, defaultType) + return type, enc + + +class File(resource.Resource, filepath.FilePath[str]): + """ + File is a resource that represents a plain non-interpreted file + (although it can look for an extension like .rpy or .cgi and hand the + file to a processor for interpretation if you wish). Its constructor + takes a file path. + + Alternatively, you can give a directory path to the constructor. In this + case the resource will represent that directory, and its children will + be files underneath that directory. This provides access to an entire + filesystem tree with a single Resource. + + If you map the URL 'http://server/FILE' to a resource created as + File('/tmp'), then http://server/FILE/ will return an HTML-formatted + listing of the /tmp/ directory, and http://server/FILE/foo/bar.html will + return the contents of /tmp/foo/bar.html . + + @cvar childNotFound: L{Resource} used to render 404 Not Found error pages. + @cvar forbidden: L{Resource} used to render 403 Forbidden error pages. + + @ivar contentTypes: a mapping of extensions to MIME types used to set the + default value for the Content-Type header. + It is initialized with the values returned by L{loadMimeTypes}. + @type contentTypes: C{dict} + + @ivar contentEncodings: a mapping of extensions to encoding types used to + set default value for the Content-Encoding header. + @type contentEncodings: C{dict} + """ + + contentTypes = loadMimeTypes() + + contentEncodings = {".gz": "gzip", ".bz2": "bzip2"} + + processors: Dict[str, Callable[[str, Any], Data]] = {} + + indexNames = ["index", "index.html", "index.htm", "index.rpy"] + + type = None + + def __init__( + self, + path: str, + defaultType: str = "text/html", + ignoredExts: Sequence[str] = (), + registry: Registry | None = None, + allowExt: Literal[0] = 0, + ) -> None: + """ + Create a file with the given path. + + @param path: The filename of the file from which this L{File} will + serve data. + @type path: C{str} + + @param defaultType: A I{major/minor}-style MIME type specifier + indicating the I{Content-Type} with which this L{File}'s data + will be served if a MIME type cannot be determined based on + C{path}'s extension. + @type defaultType: C{str} + + @param ignoredExts: A sequence giving the extensions of paths in the + filesystem which will be ignored for the purposes of child + lookup. For example, if C{ignoredExts} is C{(".bar",)} and + C{path} is a directory containing a file named C{"foo.bar"}, a + request for the C{"foo"} child of this resource will succeed + with a L{File} pointing to C{"foo.bar"}. + + @param registry: The registry object being used to handle this + request. If L{None}, one will be created. + @type registry: L{Registry} + + @param allowExt: Ignored parameter, only present for backwards + compatibility. Do not pass a value for this parameter. + """ + resource.Resource.__init__(self) + filepath.FilePath.__init__(self, path) + self.defaultType = defaultType + if ignoredExts in (0, 1) or allowExt: + warnings.warn("ignoredExts should receive a list, not a boolean") + if ignoredExts or allowExt: + self.ignoredExts = ["*"] + else: + self.ignoredExts = [] + else: + self.ignoredExts = list(ignoredExts) + self.registry = registry or Registry() + + def ignoreExt(self, ext): + """Ignore the given extension. + + Serve file.ext if file is requested + """ + self.ignoredExts.append(ext) + + childNotFound = resource._UnsafeNoResource("File not found.") + forbidden = resource._UnsafeForbiddenResource() + + def directoryListing(self): + """ + Return a resource that generates an HTML listing of the + directory this path represents. + + @return: A resource that renders the directory to HTML. + @rtype: L{DirectoryLister} + """ + path = self.path + names = self.listNames() + return DirectoryLister( + path, names, self.contentTypes, self.contentEncodings, self.defaultType + ) + + def getChild(self, path, request): + """ + If this L{File}"s path refers to a directory, return a L{File} + referring to the file named C{path} in that directory. + + If C{path} is the empty string, return a L{DirectoryLister} + instead. + + @param path: The current path segment. + @type path: L{bytes} + + @param request: The incoming request. + @type request: An that provides L{twisted.web.iweb.IRequest}. + + @return: A resource representing the requested file or + directory, or L{NoResource} if the path cannot be + accessed. + @rtype: An object that provides L{resource.IResource}. + """ + if isinstance(path, bytes): + try: + # Request calls urllib.unquote on each path segment, + # leaving us with raw bytes. + path = path.decode("utf-8") + except UnicodeDecodeError: + log.err(None, f"Could not decode path segment as utf-8: {path!r}") + return self.childNotFound + + self.restat(reraise=False) + + if not self.isdir(): + return self.childNotFound + + if path: + try: + fpath = self.child(path) + except filepath.InsecurePath: + return self.childNotFound + else: + fpath = self.childSearchPreauth(*self.indexNames) + if fpath is None: + return self.directoryListing() + + if not fpath.exists(): + fpath = fpath.siblingExtensionSearch(*self.ignoredExts) + if fpath is None: + return self.childNotFound + + extension = fpath.splitext()[1] + if platformType == "win32": + # don't want .RPY to be different than .rpy, since that would allow + # source disclosure. + processor = InsensitiveDict(self.processors).get(extension) + else: + processor = self.processors.get(extension) + if processor: + return resource.IResource(processor(fpath.path, self.registry)) + return self.createSimilarFile(fpath.path) + + # methods to allow subclasses to e.g. decrypt files on the fly: + def openForReading(self): + """Open a file and return it.""" + return self.open() + + def getFileSize(self): + """Return file size.""" + return self.getsize() + + def _parseRangeHeader(self, range): + """ + Parse the value of a Range header into (start, stop) pairs. + + In a given pair, either of start or stop can be None, signifying that + no value was provided, but not both. + + @return: A list C{[(start, stop)]} of pairs of length at least one. + + @raise ValueError: if the header is syntactically invalid or if the + Bytes-Unit is anything other than "bytes'. + """ + try: + kind, value = range.split(b"=", 1) + except ValueError: + raise ValueError("Missing '=' separator") + kind = kind.strip() + if kind != b"bytes": + raise ValueError(f"Unsupported Bytes-Unit: {kind!r}") + unparsedRanges = list(filter(None, map(bytes.strip, value.split(b",")))) + parsedRanges = [] + for byteRange in unparsedRanges: + try: + start, end = byteRange.split(b"-", 1) + except ValueError: + raise ValueError(f"Invalid Byte-Range: {byteRange!r}") + if start: + try: + start = int(start) + except ValueError: + raise ValueError(f"Invalid Byte-Range: {byteRange!r}") + else: + start = None + if end: + try: + end = int(end) + except ValueError: + raise ValueError(f"Invalid Byte-Range: {byteRange!r}") + else: + end = None + if start is not None: + if end is not None and start > end: + # Start must be less than or equal to end or it is invalid. + raise ValueError(f"Invalid Byte-Range: {byteRange!r}") + elif end is None: + # One or both of start and end must be specified. Omitting + # both is invalid. + raise ValueError(f"Invalid Byte-Range: {byteRange!r}") + parsedRanges.append((start, end)) + return parsedRanges + + def _rangeToOffsetAndSize(self, start, end): + """ + Convert a start and end from a Range header to an offset and size. + + This method checks that the resulting range overlaps with the resource + being served (and so has the value of C{getFileSize()} as an indirect + input). + + Either but not both of start or end can be L{None}: + + - Omitted start means that the end value is actually a start value + relative to the end of the resource. + + - Omitted end means the end of the resource should be the end of + the range. + + End is interpreted as inclusive, as per RFC 2616. + + If this range doesn't overlap with any of this resource, C{(0, 0)} is + returned, which is not otherwise a value return value. + + @param start: The start value from the header, or L{None} if one was + not present. + @param end: The end value from the header, or L{None} if one was not + present. + @return: C{(offset, size)} where offset is how far into this resource + this resource the range begins and size is how long the range is, + or C{(0, 0)} if the range does not overlap this resource. + """ + size = self.getFileSize() + if start is None: + start = size - end + end = size + elif end is None: + end = size + elif end < size: + end += 1 + elif end > size: + end = size + if start >= size: + start = end = 0 + return start, (end - start) + + def _contentRange(self, offset, size): + """ + Return a string suitable for the value of a Content-Range header for a + range with the given offset and size. + + The offset and size are not sanity checked in any way. + + @param offset: How far into this resource the range begins. + @param size: How long the range is. + @return: The value as appropriate for the value of a Content-Range + header. + """ + return networkString( + "bytes %d-%d/%d" % (offset, offset + size - 1, self.getFileSize()) + ) + + def _doSingleRangeRequest(self, request, startAndEnd): + """ + Set up the response for Range headers that specify a single range. + + This method checks if the request is satisfiable and sets the response + code and Content-Range header appropriately. The return value + indicates which part of the resource to return. + + @param request: The Request object. + @param startAndEnd: A 2-tuple of start of the byte range as specified by + the header and the end of the byte range as specified by the header. + At most one of the start and end may be L{None}. + @return: A 2-tuple of the offset and size of the range to return. + offset == size == 0 indicates that the request is not satisfiable. + """ + start, end = startAndEnd + offset, size = self._rangeToOffsetAndSize(start, end) + if offset == size == 0: + # This range doesn't overlap with any of this resource, so the + # request is unsatisfiable. + request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) + request.setHeader( + b"content-range", networkString("bytes */%d" % (self.getFileSize(),)) + ) + else: + request.setResponseCode(http.PARTIAL_CONTENT) + request.setHeader(b"content-range", self._contentRange(offset, size)) + return offset, size + + def _doMultipleRangeRequest(self, request, byteRanges): + """ + Set up the response for Range headers that specify a single range. + + This method checks if the request is satisfiable and sets the response + code and Content-Type and Content-Length headers appropriately. The + return value, which is a little complicated, indicates which parts of + the resource to return and the boundaries that should separate the + parts. + + In detail, the return value is a tuple rangeInfo C{rangeInfo} is a + list of 3-tuples C{(partSeparator, partOffset, partSize)}. The + response to this request should be, for each element of C{rangeInfo}, + C{partSeparator} followed by C{partSize} bytes of the resource + starting at C{partOffset}. Each C{partSeparator} includes the + MIME-style boundary and the part-specific Content-type and + Content-range headers. It is convenient to return the separator as a + concrete string from this method, because this method needs to compute + the number of bytes that will make up the response to be able to set + the Content-Length header of the response accurately. + + @param request: The Request object. + @param byteRanges: A list of C{(start, end)} values as specified by + the header. For each range, at most one of C{start} and C{end} + may be L{None}. + @return: See above. + """ + matchingRangeFound = False + rangeInfo = [] + contentLength = 0 + boundary = networkString(f"{int(time.time() * 1000000):x}{os.getpid():x}") + if self.type: + contentType = self.type + else: + contentType = b"bytes" # It's what Apache does... + for start, end in byteRanges: + partOffset, partSize = self._rangeToOffsetAndSize(start, end) + if partOffset == partSize == 0: + continue + contentLength += partSize + matchingRangeFound = True + partContentRange = self._contentRange(partOffset, partSize) + partSeparator = networkString( + ( + "\r\n" + "--%s\r\n" + "Content-type: %s\r\n" + "Content-range: %s\r\n" + "\r\n" + ) + % ( + nativeString(boundary), + nativeString(contentType), + nativeString(partContentRange), + ) + ) + contentLength += len(partSeparator) + rangeInfo.append((partSeparator, partOffset, partSize)) + if not matchingRangeFound: + request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) + request.setHeader(b"content-length", b"0") + request.setHeader( + b"content-range", networkString("bytes */%d" % (self.getFileSize(),)) + ) + return [], b"" + finalBoundary = b"\r\n--" + boundary + b"--\r\n" + rangeInfo.append((finalBoundary, 0, 0)) + request.setResponseCode(http.PARTIAL_CONTENT) + request.setHeader( + b"content-type", + networkString(f'multipart/byteranges; boundary="{nativeString(boundary)}"'), + ) + request.setHeader( + b"content-length", b"%d" % (contentLength + len(finalBoundary),) + ) + return rangeInfo + + def _setContentHeaders(self, request, size=None): + """ + Set the Content-length and Content-type headers for this request. + + This method is not appropriate for requests for multiple byte ranges; + L{_doMultipleRangeRequest} will set these headers in that case. + + @param request: The L{twisted.web.http.Request} object. + @param size: The size of the response. If not specified, default to + C{self.getFileSize()}. + """ + if size is None: + size = self.getFileSize() + request.setHeader(b"content-length", b"%d" % (size,)) + if self.type: + request.setHeader(b"content-type", networkString(self.type)) + if self.encoding: + request.setHeader(b"content-encoding", networkString(self.encoding)) + + def makeProducer(self, request, fileForReading): + """ + Make a L{StaticProducer} that will produce the body of this response. + + This method will also set the response code and Content-* headers. + + @param request: The L{twisted.web.http.Request} object. + @param fileForReading: The file object containing the resource. + @return: A L{StaticProducer}. Calling C{.start()} on this will begin + producing the response. + """ + byteRange = request.getHeader(b"range") + if byteRange is None: + self._setContentHeaders(request) + request.setResponseCode(http.OK) + return NoRangeStaticProducer(request, fileForReading) + try: + parsedRanges = self._parseRangeHeader(byteRange) + except ValueError: + log.msg(f"Ignoring malformed Range header {byteRange.decode()!r}") + self._setContentHeaders(request) + request.setResponseCode(http.OK) + return NoRangeStaticProducer(request, fileForReading) + + if len(parsedRanges) == 1: + offset, size = self._doSingleRangeRequest(request, parsedRanges[0]) + self._setContentHeaders(request, size) + return SingleRangeStaticProducer(request, fileForReading, offset, size) + else: + rangeInfo = self._doMultipleRangeRequest(request, parsedRanges) + return MultipleRangeStaticProducer(request, fileForReading, rangeInfo) + + def render_GET(self, request): + """ + Begin sending the contents of this L{File} (or a subset of the + contents, based on the 'range' header) to the given request. + """ + self.restat(False) + + if self.type is None: + self.type, self.encoding = getTypeAndEncoding( + self.basename(), + self.contentTypes, + self.contentEncodings, + self.defaultType, + ) + + if not self.exists(): + return self.childNotFound.render(request) + + if self.isdir(): + return self.redirect(request) + + request.setHeader(b"accept-ranges", b"bytes") + + try: + fileForReading = self.openForReading() + except OSError as e: + if e.errno == errno.EACCES: + return self.forbidden.render(request) + else: + raise + + if request.setLastModified(self.getModificationTime()) is http.CACHED: + # `setLastModified` also sets the response code for us, so if the + # request is cached, we close the file now that we've made sure that + # the request would otherwise succeed and return an empty body. + fileForReading.close() + return b"" + + if request.method == b"HEAD": + # Set the content headers here, rather than making a producer. + self._setContentHeaders(request) + # We've opened the file to make sure it's accessible, so close it + # now that we don't need it. + fileForReading.close() + return b"" + + producer = self.makeProducer(request, fileForReading) + producer.start() + + # and make sure the connection doesn't get closed + return server.NOT_DONE_YET + + render_HEAD = render_GET + + def redirect(self, request): + return redirectTo(_addSlash(request), request) + + def listNames(self): + if not self.isdir(): + return [] + directory = self.listdir() + directory.sort() + return directory + + def listEntities(self): + return list( + map( + lambda fileName, self=self: self.createSimilarFile( + os.path.join(self.path, fileName) + ), + self.listNames(), + ) + ) + + def createSimilarFile(self, path): + f = self.__class__(path, self.defaultType, self.ignoredExts, self.registry) + # refactoring by steps, here - constructor should almost certainly take these + f.processors = self.processors + f.indexNames = self.indexNames[:] + f.childNotFound = self.childNotFound + return f + + +@implementer(interfaces.IPullProducer) +class StaticProducer: + """ + Superclass for classes that implement the business of producing. + + @ivar request: The L{IRequest} to write the contents of the file to. + @ivar fileObject: The file the contents of which to write to the request. + """ + + bufferSize = abstract.FileDescriptor.bufferSize + + def __init__(self, request, fileObject): + """ + Initialize the instance. + """ + self.request = request + self.fileObject = fileObject + + def start(self): + raise NotImplementedError(self.start) + + def resumeProducing(self): + raise NotImplementedError(self.resumeProducing) + + def stopProducing(self): + """ + Stop producing data. + + L{twisted.internet.interfaces.IProducer.stopProducing} + is called when our consumer has died, and subclasses also call this + method when they are done producing data. + """ + self.fileObject.close() + self.request = None + + +class NoRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes the entire file to the request. + """ + + def start(self): + self.request.registerProducer(self, False) + + def resumeProducing(self): + if not self.request: + return + data = self.fileObject.read(self.bufferSize) + if data: + # this .write will spin the reactor, calling .doWrite and then + # .resumeProducing again, so be prepared for a re-entrant call + self.request.write(data) + else: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + +class SingleRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes a single chunk of a file to the request. + """ + + def __init__(self, request, fileObject, offset, size): + """ + Initialize the instance. + + @param request: See L{StaticProducer}. + @param fileObject: See L{StaticProducer}. + @param offset: The offset into the file of the chunk to be written. + @param size: The size of the chunk to write. + """ + StaticProducer.__init__(self, request, fileObject) + self.offset = offset + self.size = size + + def start(self): + self.fileObject.seek(self.offset) + self.bytesWritten = 0 + self.request.registerProducer(self, 0) + + def resumeProducing(self): + if not self.request: + return + data = self.fileObject.read(min(self.bufferSize, self.size - self.bytesWritten)) + if data: + self.bytesWritten += len(data) + # this .write will spin the reactor, calling .doWrite and then + # .resumeProducing again, so be prepared for a re-entrant call + self.request.write(data) + if self.request and self.bytesWritten == self.size: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + +class MultipleRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes several chunks of a file to the request. + """ + + def __init__(self, request, fileObject, rangeInfo): + """ + Initialize the instance. + + @param request: See L{StaticProducer}. + @param fileObject: See L{StaticProducer}. + @param rangeInfo: A list of tuples C{[(boundary, offset, size)]} + where: + - C{boundary} will be written to the request first. + - C{offset} the offset into the file of chunk to write. + - C{size} the size of the chunk to write. + """ + StaticProducer.__init__(self, request, fileObject) + self.rangeInfo = rangeInfo + + def start(self): + self.rangeIter = iter(self.rangeInfo) + self._nextRange() + self.request.registerProducer(self, 0) + + def _nextRange(self): + self.partBoundary, partOffset, self._partSize = next(self.rangeIter) + self._partBytesWritten = 0 + self.fileObject.seek(partOffset) + + def resumeProducing(self): + if not self.request: + return + data = [] + dataLength = 0 + done = False + while dataLength < self.bufferSize: + if self.partBoundary: + dataLength += len(self.partBoundary) + data.append(self.partBoundary) + self.partBoundary = None + p = self.fileObject.read( + min( + self.bufferSize - dataLength, + self._partSize - self._partBytesWritten, + ) + ) + self._partBytesWritten += len(p) + dataLength += len(p) + data.append(p) + if self.request and self._partBytesWritten == self._partSize: + try: + self._nextRange() + except StopIteration: + done = True + break + self.request.write(b"".join(data)) + if done: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + +class ASISProcessor(resource.Resource): + """ + Serve files exactly as responses without generating a status-line or any + headers. Inspired by Apache's mod_asis. + """ + + def __init__(self, path, registry=None): + resource.Resource.__init__(self) + self.path = path + self.registry = registry or Registry() + + def render(self, request): + request.startedWriting = 1 + res = File(self.path, registry=self.registry) + return res.render(request) + + +def formatFileSize(size): + """ + Format the given file size in bytes to human readable format. + """ + if size < 1024: + return "%iB" % size + elif size < (1024**2): + return "%iK" % (size / 1024) + elif size < (1024**3): + return "%iM" % (size / (1024**2)) + else: + return "%iG" % (size / (1024**3)) + + +class DirectoryLister(resource.Resource): + """ + Print the content of a directory. + + @ivar template: page template used to render the content of the directory. + It must contain the format keys B{header} and B{tableContent}. + @type template: C{str} + + @ivar linePattern: template used to render one line in the listing table. + It must contain the format keys B{class}, B{href}, B{text}, B{size}, + B{type} and B{encoding}. + @type linePattern: C{str} + + @ivar contentTypes: a mapping of extensions to MIME types used to populate + the information of a member of this directory. + It is initialized with the value L{File.contentTypes}. + @type contentTypes: C{dict} + + @ivar contentEncodings: a mapping of extensions to encoding types. + It is initialized with the value L{File.contentEncodings}. + @type contentEncodings: C{dict} + + @ivar defaultType: default type used when no mimetype is detected. + @type defaultType: C{str} + + @ivar dirs: filtered content of C{path}, if the whole content should not be + displayed (default to L{None}, which means the actual content of + C{path} is printed). + @type dirs: L{None} or C{list} + + @ivar path: directory which content should be listed. + @type path: C{str} + """ + + template = """<html> +<head> +<title>%(header)s</title> +<style> +.even-dir { background-color: #efe0ef } +.even { background-color: #eee } +.odd-dir {background-color: #f0d0ef } +.odd { background-color: #dedede } +.icon { text-align: center } +.listing { + margin-left: auto; + margin-right: auto; + width: 50%%; + padding: 0.1em; + } + +body { border: 0; padding: 0; margin: 0; background-color: #efefef; } +h1 {padding: 0.1em; background-color: #777; color: white; border-bottom: thin white dashed;} + +</style> +</head> + +<body> +<h1>%(header)s</h1> + +<table> + <thead> + <tr> + <th>Filename</th> + <th>Size</th> + <th>Content type</th> + <th>Content encoding</th> + </tr> + </thead> + <tbody> +%(tableContent)s + </tbody> +</table> + +</body> +</html> +""" + + linePattern = """<tr class="%(class)s"> + <td><a href="%(href)s">%(text)s</a></td> + <td>%(size)s</td> + <td>%(type)s</td> + <td>%(encoding)s</td> +</tr> +""" + + def __init__( + self, + pathname, + dirs=None, + contentTypes=File.contentTypes, + contentEncodings=File.contentEncodings, + defaultType="text/html", + ): + resource.Resource.__init__(self) + self.contentTypes = contentTypes + self.contentEncodings = contentEncodings + self.defaultType = defaultType + # dirs allows usage of the File to specify what gets listed + self.dirs = dirs + self.path = pathname + + def _getFilesAndDirectories(self, directory): + """ + Helper returning files and directories in given directory listing, with + attributes to be used to build a table content with + C{self.linePattern}. + + @return: tuple of (directories, files) + @rtype: C{tuple} of C{list} + """ + files = [] + dirs = [] + + for path in directory: + if isinstance(path, bytes): + path = path.decode("utf8") + + url = quote(path, "/") + escapedPath = escape(path) + childPath = filepath.FilePath(self.path).child(path) + + if childPath.isdir(): + dirs.append( + { + "text": escapedPath + "/", + "href": url + "/", + "size": "", + "type": "[Directory]", + "encoding": "", + } + ) + else: + mimetype, encoding = getTypeAndEncoding( + path, self.contentTypes, self.contentEncodings, self.defaultType + ) + try: + size = childPath.getsize() + except OSError: + continue + files.append( + { + "text": escapedPath, + "href": url, + "type": "[%s]" % mimetype, + "encoding": (encoding and "[%s]" % encoding or ""), + "size": formatFileSize(size), + } + ) + return dirs, files + + def _buildTableContent(self, elements): + """ + Build a table content using C{self.linePattern} and giving elements odd + and even classes. + """ + tableContent = [] + rowClasses = itertools.cycle(["odd", "even"]) + for element, rowClass in zip(elements, rowClasses): + element["class"] = rowClass + tableContent.append(self.linePattern % element) + return tableContent + + def render(self, request): + """ + Render a listing of the content of C{self.path}. + """ + request.setHeader(b"content-type", b"text/html; charset=utf-8") + if self.dirs is None: + directory = os.listdir(self.path) + directory.sort() + else: + directory = self.dirs + + dirs, files = self._getFilesAndDirectories(directory) + + tableContent = "".join(self._buildTableContent(dirs + files)) + + header = "Directory listing for {}".format( + escape(unquote(nativeString(request.uri))), + ) + + done = self.template % {"header": header, "tableContent": tableContent} + done = done.encode("utf8") + + return done + + def __repr__(self) -> str: + return "<DirectoryLister of %r>" % self.path + + __str__ = __repr__ diff --git a/contrib/python/Twisted/py3/twisted/web/sux.py b/contrib/python/Twisted/py3/twisted/web/sux.py new file mode 100644 index 0000000000..69ad4dff95 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/sux.py @@ -0,0 +1,644 @@ +# -*- test-case-name: twisted.web.test.test_xml -*- +# +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +*S*mall, *U*ncomplicated *X*ML. + +This is a very simple implementation of XML/HTML as a network +protocol. It is not at all clever. Its main features are that it +does not: + + - support namespaces + - mung mnemonic entity references + - validate + - perform *any* external actions (such as fetching URLs or writing files) + under *any* circumstances + - has lots and lots of horrible hacks for supporting broken HTML (as an + option, they're not on by default). +""" + + +from twisted.internet.protocol import Protocol +from twisted.python.reflect import prefixedMethodNames + +# Elements of the three-tuples in the state table. +BEGIN_HANDLER = 0 +DO_HANDLER = 1 +END_HANDLER = 2 + +identChars = ".-_:" +lenientIdentChars = identChars + ";+#/%~" + + +def nop(*args, **kw): + "Do nothing." + + +def unionlist(*args): + l = [] + for x in args: + l.extend(x) + d = {x: 1 for x in l} + return d.keys() + + +def zipfndict(*args, **kw): + default = kw.get("default", nop) + d = {} + for key in unionlist(*(fndict.keys() for fndict in args)): + d[key] = tuple(x.get(key, default) for x in args) + return d + + +def prefixedMethodClassDict(clazz, prefix): + return { + name: getattr(clazz, prefix + name) + for name in prefixedMethodNames(clazz, prefix) + } + + +def prefixedMethodObjDict(obj, prefix): + return { + name: getattr(obj, prefix + name) + for name in prefixedMethodNames(obj.__class__, prefix) + } + + +class ParseError(Exception): + def __init__(self, filename, line, col, message): + self.filename = filename + self.line = line + self.col = col + self.message = message + + def __str__(self) -> str: + return f"{self.filename}:{self.line}:{self.col}: {self.message}" + + +class XMLParser(Protocol): + state = None + encodings = None + filename = "<xml />" + beExtremelyLenient = 0 + _prepend = None + + # _leadingBodyData will sometimes be set before switching to the + # 'bodydata' state, when we "accidentally" read a byte of bodydata + # in a different state. + _leadingBodyData = None + + def connectionMade(self): + self.lineno = 1 + self.colno = 0 + self.encodings = [] + + def saveMark(self): + """Get the line number and column of the last character parsed""" + # This gets replaced during dataReceived, restored afterwards + return (self.lineno, self.colno) + + def _parseError(self, message): + raise ParseError(*((self.filename,) + self.saveMark() + (message,))) + + def _buildStateTable(self): + """Return a dictionary of begin, do, end state function tuples""" + # _buildStateTable leaves something to be desired but it does what it + # does.. probably slowly, so I'm doing some evil caching so it doesn't + # get called more than once per class. + stateTable = getattr(self.__class__, "__stateTable", None) + if stateTable is None: + stateTable = self.__class__.__stateTable = zipfndict( + *( + prefixedMethodObjDict(self, prefix) + for prefix in ("begin_", "do_", "end_") + ) + ) + return stateTable + + def _decode(self, data): + if "UTF-16" in self.encodings or "UCS-2" in self.encodings: + assert not len(data) & 1, "UTF-16 must come in pairs for now" + if self._prepend: + data = self._prepend + data + for encoding in self.encodings: + data = str(data, encoding) + return data + + def maybeBodyData(self): + if self.endtag: + return "bodydata" + + # Get ready for fun! We're going to allow + # <script>if (foo < bar)</script> to work! + # We do this by making everything between <script> and + # </script> a Text + # BUT <script src="foo"> will be special-cased to do regular, + # lenient behavior, because those may not have </script> + # -radix + + if self.tagName == "script" and "src" not in self.tagAttributes: + # we do this ourselves rather than having begin_waitforendscript + # because that can get called multiple times and we don't want + # bodydata to get reset other than the first time. + self.begin_bodydata(None) + return "waitforendscript" + return "bodydata" + + def dataReceived(self, data): + stateTable = self._buildStateTable() + if not self.state: + # all UTF-16 starts with this string + if data.startswith((b"\xff\xfe", b"\xfe\xff")): + self._prepend = data[0:2] + self.encodings.append("UTF-16") + data = data[2:] + self.state = "begin" + if self.encodings: + data = self._decode(data) + else: + data = data.decode("utf-8") + # bring state, lineno, colno into local scope + lineno, colno = self.lineno, self.colno + curState = self.state + # replace saveMark with a nested scope function + _saveMark = self.saveMark + + def saveMark(): + return (lineno, colno) + + self.saveMark = saveMark + # fetch functions from the stateTable + beginFn, doFn, endFn = stateTable[curState] + try: + for byte in data: + # do newline stuff + if byte == "\n": + lineno += 1 + colno = 0 + else: + colno += 1 + newState = doFn(byte) + if newState is not None and newState != curState: + # this is the endFn from the previous state + endFn() + curState = newState + beginFn, doFn, endFn = stateTable[curState] + beginFn(byte) + finally: + self.saveMark = _saveMark + self.lineno, self.colno = lineno, colno + # state doesn't make sense if there's an exception.. + self.state = curState + + def connectionLost(self, reason): + """ + End the last state we were in. + """ + stateTable = self._buildStateTable() + stateTable[self.state][END_HANDLER]() + + # state methods + + def do_begin(self, byte): + if byte.isspace(): + return + if byte != "<": + if self.beExtremelyLenient: + self._leadingBodyData = byte + return "bodydata" + self._parseError(f"First char of document [{byte!r}] wasn't <") + return "tagstart" + + def begin_comment(self, byte): + self.commentbuf = "" + + def do_comment(self, byte): + self.commentbuf += byte + if self.commentbuf.endswith("-->"): + self.gotComment(self.commentbuf[:-3]) + return "bodydata" + + def begin_tagstart(self, byte): + self.tagName = "" # name of the tag + self.tagAttributes = {} # attributes of the tag + self.termtag = 0 # is the tag self-terminating + self.endtag = 0 + + def do_tagstart(self, byte): + if byte.isalnum() or byte in identChars: + self.tagName += byte + if self.tagName == "!--": + return "comment" + elif byte.isspace(): + if self.tagName: + if self.endtag: + # properly strict thing to do here is probably to only + # accept whitespace + return "waitforgt" + return "attrs" + else: + self._parseError("Whitespace before tag-name") + elif byte == ">": + if self.endtag: + self.gotTagEnd(self.tagName) + return "bodydata" + else: + self.gotTagStart(self.tagName, {}) + return ( + (not self.beExtremelyLenient) and "bodydata" or self.maybeBodyData() + ) + elif byte == "/": + if self.tagName: + return "afterslash" + else: + self.endtag = 1 + elif byte in "!?": + if self.tagName: + if not self.beExtremelyLenient: + self._parseError("Invalid character in tag-name") + else: + self.tagName += byte + self.termtag = 1 + elif byte == "[": + if self.tagName == "!": + return "expectcdata" + else: + self._parseError("Invalid '[' in tag-name") + else: + if self.beExtremelyLenient: + self.bodydata = "<" + return "unentity" + self._parseError("Invalid tag character: %r" % byte) + + def begin_unentity(self, byte): + self.bodydata += byte + + def do_unentity(self, byte): + self.bodydata += byte + return "bodydata" + + def end_unentity(self): + self.gotText(self.bodydata) + + def begin_expectcdata(self, byte): + self.cdatabuf = byte + + def do_expectcdata(self, byte): + self.cdatabuf += byte + cdb = self.cdatabuf + cd = "[CDATA[" + if len(cd) > len(cdb): + if cd.startswith(cdb): + return + elif self.beExtremelyLenient: + ## WHAT THE CRAP!? MSWord9 generates HTML that includes these + ## bizarre <![if !foo]> <![endif]> chunks, so I've gotta ignore + ## 'em as best I can. this should really be a separate parse + ## state but I don't even have any idea what these _are_. + return "waitforgt" + else: + self._parseError("Mal-formed CDATA header") + if cd == cdb: + self.cdatabuf = "" + return "cdata" + self._parseError("Mal-formed CDATA header") + + def do_cdata(self, byte): + self.cdatabuf += byte + if self.cdatabuf.endswith("]]>"): + self.cdatabuf = self.cdatabuf[:-3] + return "bodydata" + + def end_cdata(self): + self.gotCData(self.cdatabuf) + self.cdatabuf = "" + + def do_attrs(self, byte): + if byte.isalnum() or byte in identChars: + # XXX FIXME really handle !DOCTYPE at some point + if self.tagName == "!DOCTYPE": + return "doctype" + if self.tagName[0] in "!?": + return "waitforgt" + return "attrname" + elif byte.isspace(): + return + elif byte == ">": + self.gotTagStart(self.tagName, self.tagAttributes) + return (not self.beExtremelyLenient) and "bodydata" or self.maybeBodyData() + elif byte == "/": + return "afterslash" + elif self.beExtremelyLenient: + # discard and move on? Only case I've seen of this so far was: + # <foo bar="baz""> + return + self._parseError("Unexpected character: %r" % byte) + + def begin_doctype(self, byte): + self.doctype = byte + + def do_doctype(self, byte): + if byte == ">": + return "bodydata" + self.doctype += byte + + def end_doctype(self): + self.gotDoctype(self.doctype) + self.doctype = None + + def do_waitforgt(self, byte): + if byte == ">": + if self.endtag or not self.beExtremelyLenient: + return "bodydata" + return self.maybeBodyData() + + def begin_attrname(self, byte): + self.attrname = byte + self._attrname_termtag = 0 + + def do_attrname(self, byte): + if byte.isalnum() or byte in identChars: + self.attrname += byte + return + elif byte == "=": + return "beforeattrval" + elif byte.isspace(): + return "beforeeq" + elif self.beExtremelyLenient: + if byte in "\"'": + return "attrval" + if byte in lenientIdentChars or byte.isalnum(): + self.attrname += byte + return + if byte == "/": + self._attrname_termtag = 1 + return + if byte == ">": + self.attrval = "True" + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if self._attrname_termtag: + self.gotTagEnd(self.tagName) + return "bodydata" + return self.maybeBodyData() + # something is really broken. let's leave this attribute where it + # is and move on to the next thing + return + self._parseError(f"Invalid attribute name: {self.attrname!r} {byte!r}") + + def do_beforeattrval(self, byte): + if byte in "\"'": + return "attrval" + elif byte.isspace(): + return + elif self.beExtremelyLenient: + if byte in lenientIdentChars or byte.isalnum(): + return "messyattr" + if byte == ">": + self.attrval = "True" + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + return self.maybeBodyData() + if byte == "\\": + # I saw this in actual HTML once: + # <font size=\"3\"><sup>SM</sup></font> + return + self._parseError( + "Invalid initial attribute value: %r; Attribute values must be quoted." + % byte + ) + + attrname = "" + attrval = "" + + def begin_beforeeq(self, byte): + self._beforeeq_termtag = 0 + + def do_beforeeq(self, byte): + if byte == "=": + return "beforeattrval" + elif byte.isspace(): + return + elif self.beExtremelyLenient: + if byte.isalnum() or byte in identChars: + self.attrval = "True" + self.tagAttributes[self.attrname] = self.attrval + return "attrname" + elif byte == ">": + self.attrval = "True" + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if self._beforeeq_termtag: + self.gotTagEnd(self.tagName) + return "bodydata" + return self.maybeBodyData() + elif byte == "/": + self._beforeeq_termtag = 1 + return + self._parseError("Invalid attribute") + + def begin_attrval(self, byte): + self.quotetype = byte + self.attrval = "" + + def do_attrval(self, byte): + if byte == self.quotetype: + return "attrs" + self.attrval += byte + + def end_attrval(self): + self.tagAttributes[self.attrname] = self.attrval + self.attrname = self.attrval = "" + + def begin_messyattr(self, byte): + self.attrval = byte + + def do_messyattr(self, byte): + if byte.isspace(): + return "attrs" + elif byte == ">": + endTag = 0 + if self.attrval.endswith("/"): + endTag = 1 + self.attrval = self.attrval[:-1] + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if endTag: + self.gotTagEnd(self.tagName) + return "bodydata" + return self.maybeBodyData() + else: + self.attrval += byte + + def end_messyattr(self): + if self.attrval: + self.tagAttributes[self.attrname] = self.attrval + + def begin_afterslash(self, byte): + self._after_slash_closed = 0 + + def do_afterslash(self, byte): + # this state is only after a self-terminating slash, e.g. <foo/> + if self._after_slash_closed: + self._parseError("Mal-formed") # XXX When does this happen?? + if byte != ">": + if self.beExtremelyLenient: + return + else: + self._parseError("No data allowed after '/'") + self._after_slash_closed = 1 + self.gotTagStart(self.tagName, self.tagAttributes) + self.gotTagEnd(self.tagName) + # don't need maybeBodyData here because there better not be + # any javascript code after a <script/>... we'll see :( + return "bodydata" + + def begin_bodydata(self, byte): + if self._leadingBodyData: + self.bodydata = self._leadingBodyData + del self._leadingBodyData + else: + self.bodydata = "" + + def do_bodydata(self, byte): + if byte == "<": + return "tagstart" + if byte == "&": + return "entityref" + self.bodydata += byte + + def end_bodydata(self): + self.gotText(self.bodydata) + self.bodydata = "" + + def do_waitforendscript(self, byte): + if byte == "<": + return "waitscriptendtag" + self.bodydata += byte + + def begin_waitscriptendtag(self, byte): + self.temptagdata = "" + self.tagName = "" + self.endtag = 0 + + def do_waitscriptendtag(self, byte): + # 1 enforce / as first byte read + # 2 enforce following bytes to be subset of "script" until + # tagName == "script" + # 2a when that happens, gotText(self.bodydata) and gotTagEnd(self.tagName) + # 3 spaces can happen anywhere, they're ignored + # e.g. < / script > + # 4 anything else causes all data I've read to be moved to the + # bodydata, and switch back to waitforendscript state + + # If it turns out this _isn't_ a </script>, we need to + # remember all the data we've been through so we can append it + # to bodydata + self.temptagdata += byte + + # 1 + if byte == "/": + self.endtag = True + elif not self.endtag: + self.bodydata += "<" + self.temptagdata + return "waitforendscript" + # 2 + elif byte.isalnum() or byte in identChars: + self.tagName += byte + if not "script".startswith(self.tagName): + self.bodydata += "<" + self.temptagdata + return "waitforendscript" + elif self.tagName == "script": + self.gotText(self.bodydata) + self.gotTagEnd(self.tagName) + return "waitforgt" + # 3 + elif byte.isspace(): + return "waitscriptendtag" + # 4 + else: + self.bodydata += "<" + self.temptagdata + return "waitforendscript" + + def begin_entityref(self, byte): + self.erefbuf = "" + self.erefextra = "" # extra bit for lenient mode + + def do_entityref(self, byte): + if byte.isspace() or byte == "<": + if self.beExtremelyLenient: + # '&foo' probably was '&foo' + if self.erefbuf and self.erefbuf != "amp": + self.erefextra = self.erefbuf + self.erefbuf = "amp" + if byte == "<": + return "tagstart" + else: + self.erefextra += byte + return "spacebodydata" + self._parseError("Bad entity reference") + elif byte != ";": + self.erefbuf += byte + else: + return "bodydata" + + def end_entityref(self): + self.gotEntityReference(self.erefbuf) + + # hacky support for space after & in entityref in beExtremelyLenient + # state should only happen in that case + def begin_spacebodydata(self, byte): + self.bodydata = self.erefextra + self.erefextra = None + + do_spacebodydata = do_bodydata + end_spacebodydata = end_bodydata + + # Sorta SAX-ish API + + def gotTagStart(self, name, attributes): + """Encountered an opening tag. + + Default behaviour is to print.""" + print("begin", name, attributes) + + def gotText(self, data): + """Encountered text + + Default behaviour is to print.""" + print("text:", repr(data)) + + def gotEntityReference(self, entityRef): + """Encountered mnemonic entity reference + + Default behaviour is to print.""" + print("entityRef: &%s;" % entityRef) + + def gotComment(self, comment): + """Encountered comment. + + Default behaviour is to ignore.""" + pass + + def gotCData(self, cdata): + """Encountered CDATA + + Default behaviour is to call the gotText method""" + self.gotText(cdata) + + def gotDoctype(self, doctype): + """Encountered DOCTYPE + + This is really grotty: it basically just gives you everything between + '<!DOCTYPE' and '>' as an argument. + """ + print("!DOCTYPE", repr(doctype)) + + def gotTagEnd(self, name): + """Encountered closing tag + + Default behaviour is to print.""" + print("end", name) diff --git a/contrib/python/Twisted/py3/twisted/web/tap.py b/contrib/python/Twisted/py3/twisted/web/tap.py new file mode 100644 index 0000000000..2ed783848a --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/tap.py @@ -0,0 +1,322 @@ +# -*- test-case-name: twisted.web.test.test_tap -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Support for creating a service which runs a web server. +""" + + +import os +import warnings + +import incremental + +from twisted.application import service, strports +from twisted.internet import interfaces, reactor +from twisted.python import deprecate, reflect, threadpool, usage +from twisted.spread import pb +from twisted.web import demo, distrib, resource, script, server, static, twcgi, wsgi + + +class Options(usage.Options): + """ + Define the options accepted by the I{twistd web} plugin. + """ + + synopsis = "[web options]" + + optParameters = [ + ["logfile", "l", None, "Path to web CLF (Combined Log Format) log file."], + [ + "certificate", + "c", + "server.pem", + "(DEPRECATED: use --listen) " "SSL certificate to use for HTTPS. ", + ], + [ + "privkey", + "k", + "server.pem", + "(DEPRECATED: use --listen) " "SSL certificate to use for HTTPS.", + ], + ] + + optFlags = [ + [ + "notracebacks", + "n", + ( + "(DEPRECATED: Tracebacks are disabled by default. " + "See --enable-tracebacks to turn them on." + ), + ], + [ + "display-tracebacks", + "", + ( + "Show uncaught exceptions during rendering tracebacks to " + "the client. WARNING: This may be a security risk and " + "expose private data!" + ), + ], + ] + + optFlags.append( + [ + "personal", + "", + "Instead of generating a webserver, generate a " + "ResourcePublisher which listens on the port given by " + "--listen, or ~/%s " % (distrib.UserDirectory.userSocketName,) + + "if --listen is not specified.", + ] + ) + + compData = usage.Completions( + optActions={ + "logfile": usage.CompleteFiles("*.log"), + "certificate": usage.CompleteFiles("*.pem"), + "privkey": usage.CompleteFiles("*.pem"), + } + ) + + longdesc = """\ +This starts a webserver. If you specify no arguments, it will be a +demo webserver that has the Test class from twisted.web.demo in it.""" + + def __init__(self): + usage.Options.__init__(self) + self["indexes"] = [] + self["root"] = None + self["extraHeaders"] = [] + self["ports"] = [] + self["port"] = self["https"] = None + + def opt_port(self, port): + """ + (DEPRECATED: use --listen) + Strports description of port to start the server on + """ + msg = deprecate.getDeprecationWarningString( + self.opt_port, incremental.Version("Twisted", 18, 4, 0) + ) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + self["port"] = port + + opt_p = opt_port + + def opt_https(self, port): + """ + (DEPRECATED: use --listen) + Port to listen on for Secure HTTP. + """ + msg = deprecate.getDeprecationWarningString( + self.opt_https, incremental.Version("Twisted", 18, 4, 0) + ) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + self["https"] = port + + def opt_listen(self, port): + """ + Add an strports description of port to start the server on. + [default: tcp:8080] + """ + self["ports"].append(port) + + def opt_index(self, indexName): + """ + Add the name of a file used to check for directory indexes. + [default: index, index.html] + """ + self["indexes"].append(indexName) + + opt_i = opt_index + + def opt_user(self): + """ + Makes a server with ~/public_html and ~/.twistd-web-pb support for + users. + """ + self["root"] = distrib.UserDirectory() + + opt_u = opt_user + + def opt_path(self, path): + """ + <path> is either a specific file or a directory to be set as the root + of the web server. Use this if you have a directory full of HTML, cgi, + epy, or rpy files or any other files that you want to be served up raw. + """ + self["root"] = static.File(os.path.abspath(path)) + self["root"].processors = { + ".epy": script.PythonScript, + ".rpy": script.ResourceScript, + } + self["root"].processors[".cgi"] = twcgi.CGIScript + + def opt_processor(self, proc): + """ + `ext=class' where `class' is added as a Processor for files ending + with `ext'. + """ + if not isinstance(self["root"], static.File): + raise usage.UsageError("You can only use --processor after --path.") + ext, klass = proc.split("=", 1) + self["root"].processors[ext] = reflect.namedClass(klass) + + def opt_class(self, className): + """ + Create a Resource subclass with a zero-argument constructor. + """ + classObj = reflect.namedClass(className) + self["root"] = classObj() + + def opt_resource_script(self, name): + """ + An .rpy file to be used as the root resource of the webserver. + """ + self["root"] = script.ResourceScriptWrapper(name) + + def opt_wsgi(self, name): + """ + The FQPN of a WSGI application object to serve as the root resource of + the webserver. + """ + try: + application = reflect.namedAny(name) + except (AttributeError, ValueError): + raise usage.UsageError(f"No such WSGI application: {name!r}") + pool = threadpool.ThreadPool() + reactor.callWhenRunning(pool.start) + reactor.addSystemEventTrigger("after", "shutdown", pool.stop) + self["root"] = wsgi.WSGIResource(reactor, pool, application) + + def opt_mime_type(self, defaultType): + """ + Specify the default mime-type for static files. + """ + if not isinstance(self["root"], static.File): + raise usage.UsageError("You can only use --mime_type after --path.") + self["root"].defaultType = defaultType + + opt_m = opt_mime_type + + def opt_allow_ignore_ext(self): + """ + Specify whether or not a request for 'foo' should return 'foo.ext' + """ + if not isinstance(self["root"], static.File): + raise usage.UsageError( + "You can only use --allow_ignore_ext " "after --path." + ) + self["root"].ignoreExt("*") + + def opt_ignore_ext(self, ext): + """ + Specify an extension to ignore. These will be processed in order. + """ + if not isinstance(self["root"], static.File): + raise usage.UsageError("You can only use --ignore_ext " "after --path.") + self["root"].ignoreExt(ext) + + def opt_add_header(self, header): + """ + Specify an additional header to be included in all responses. Specified + as "HeaderName: HeaderValue". + """ + name, value = header.split(":", 1) + self["extraHeaders"].append((name.strip(), value.strip())) + + def postOptions(self): + """ + Set up conditional defaults and check for dependencies. + + If SSL is not available but an HTTPS server was configured, raise a + L{UsageError} indicating that this is not possible. + + If no server port was supplied, select a default appropriate for the + other options supplied. + """ + if self["port"] is not None: + self["ports"].append(self["port"]) + if self["https"] is not None: + try: + reflect.namedModule("OpenSSL.SSL") + except ImportError: + raise usage.UsageError("SSL support not installed") + sslStrport = "ssl:port={}:privateKey={}:certKey={}".format( + self["https"], + self["privkey"], + self["certificate"], + ) + self["ports"].append(sslStrport) + if len(self["ports"]) == 0: + if self["personal"]: + path = os.path.expanduser( + os.path.join("~", distrib.UserDirectory.userSocketName) + ) + self["ports"].append("unix:" + path) + else: + self["ports"].append("tcp:8080") + + +def makePersonalServerFactory(site): + """ + Create and return a factory which will respond to I{distrib} requests + against the given site. + + @type site: L{twisted.web.server.Site} + @rtype: L{twisted.internet.protocol.Factory} + """ + return pb.PBServerFactory(distrib.ResourcePublisher(site)) + + +class _AddHeadersResource(resource.Resource): + def __init__(self, originalResource, headers): + self._originalResource = originalResource + self._headers = headers + + def getChildWithDefault(self, name, request): + for k, v in self._headers: + request.responseHeaders.addRawHeader(k, v) + return self._originalResource.getChildWithDefault(name, request) + + +def makeService(config): + s = service.MultiService() + if config["root"]: + root = config["root"] + if config["indexes"]: + config["root"].indexNames = config["indexes"] + else: + # This really ought to be web.Admin or something + root = demo.Test() + + if isinstance(root, static.File): + root.registry.setComponent(interfaces.IServiceCollection, s) + + if config["extraHeaders"]: + root = _AddHeadersResource(root, config["extraHeaders"]) + + if config["logfile"]: + site = server.Site(root, logPath=config["logfile"]) + else: + site = server.Site(root) + + if config["display-tracebacks"]: + site.displayTracebacks = True + + # Deprecate --notracebacks/-n + if config["notracebacks"]: + msg = deprecate._getDeprecationWarningString( + "--notracebacks", incremental.Version("Twisted", 19, 7, 0) + ) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + + if config["personal"]: + site = makePersonalServerFactory(site) + for port in config["ports"]: + svc = strports.service(port, site) + svc.setServiceParent(s) + return s diff --git a/contrib/python/Twisted/py3/twisted/web/template.py b/contrib/python/Twisted/py3/twisted/web/template.py new file mode 100644 index 0000000000..162dc7d206 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/template.py @@ -0,0 +1,60 @@ +# -*- test-case-name: twisted.web.test.test_template -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTML rendering for twisted.web. + +@var VALID_HTML_TAG_NAMES: A list of recognized HTML tag names, used by the + L{tag} object. + +@var TEMPLATE_NAMESPACE: The XML namespace used to identify attributes and + elements used by the templating system, which should be removed from the + final output document. + +@var tags: A convenience object which can produce L{Tag} objects on demand via + attribute access. For example: C{tags.div} is equivalent to C{Tag("div")}. + Tags not specified in L{VALID_HTML_TAG_NAMES} will result in an + L{AttributeError}. +""" + + +__all__ = [ + "TEMPLATE_NAMESPACE", + "VALID_HTML_TAG_NAMES", + "Element", + "Flattenable", + "TagLoader", + "XMLString", + "XMLFile", + "renderer", + "flatten", + "flattenString", + "tags", + "Comment", + "CDATA", + "Tag", + "slot", + "CharRef", + "renderElement", +] + +from ._stan import CharRef +from ._template_util import ( + CDATA, + TEMPLATE_NAMESPACE, + VALID_HTML_TAG_NAMES, + Comment, + Element, + Flattenable, + Tag, + TagLoader, + XMLFile, + XMLString, + flatten, + flattenString, + renderElement, + renderer, + slot, + tags, +) diff --git a/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py b/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py new file mode 100644 index 0000000000..a3b0904427 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/test/requesthelper.py @@ -0,0 +1,512 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Helpers related to HTTP requests, used by tests. +""" + +from __future__ import annotations + +__all__ = ["DummyChannel", "DummyRequest"] + +from io import BytesIO +from typing import Dict, List, Optional + +from zope.interface import implementer, verify + +from incremental import Version + +from twisted.internet.address import IPv4Address, IPv6Address +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IAddress, ISSLTransport +from twisted.internet.task import Clock +from twisted.python.deprecate import deprecated +from twisted.trial import unittest +from twisted.web._responses import FOUND +from twisted.web.http_headers import Headers +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET, Session, Site + +textLinearWhitespaceComponents = [f"Foo{lw}bar" for lw in ["\r", "\n", "\r\n"]] + +sanitizedText = "Foo bar" +bytesLinearWhitespaceComponents = [ + component.encode("ascii") for component in textLinearWhitespaceComponents +] +sanitizedBytes = sanitizedText.encode("ascii") + + +@implementer(IAddress) +class NullAddress: + """ + A null implementation of L{IAddress}. + """ + + +class DummyChannel: + class TCP: + port = 80 + disconnected = False + + def __init__(self, peer=None): + if peer is None: + peer = IPv4Address("TCP", "192.168.1.1", 12344) + self._peer = peer + self.written = BytesIO() + self.producers = [] + + def getPeer(self): + return self._peer + + def write(self, data): + if not isinstance(data, bytes): + raise TypeError(f"Can only write bytes to a transport, not {data!r}") + self.written.write(data) + + def writeSequence(self, iovec): + for data in iovec: + self.write(data) + + def getHost(self): + return IPv4Address("TCP", "10.0.0.1", self.port) + + def registerProducer(self, producer, streaming): + self.producers.append((producer, streaming)) + + def unregisterProducer(self): + pass + + def loseConnection(self): + self.disconnected = True + + @implementer(ISSLTransport) + class SSL(TCP): + def abortConnection(self): + # ITCPTransport.abortConnection + pass + + def getTcpKeepAlive(self): + # ITCPTransport.getTcpKeepAlive + pass + + def getTcpNoDelay(self): + # ITCPTransport.getTcpNoDelay + pass + + def loseWriteConnection(self): + # ITCPTransport.loseWriteConnection + pass + + def setTcpKeepAlive(self, enabled): + # ITCPTransport.setTcpKeepAlive + pass + + def setTcpNoDelay(self, enabled): + # ITCPTransport.setTcpNoDelay + pass + + def getPeerCertificate(self): + # ISSLTransport.getPeerCertificate + pass + + site = Site(Resource()) + + def __init__(self, peer=None): + self.transport = self.TCP(peer) + + def requestDone(self, request): + pass + + def writeHeaders(self, version, code, reason, headers): + response_line = version + b" " + code + b" " + reason + b"\r\n" + headerSequence = [response_line] + headerSequence.extend(name + b": " + value + b"\r\n" for name, value in headers) + headerSequence.append(b"\r\n") + self.transport.writeSequence(headerSequence) + + def getPeer(self): + return self.transport.getPeer() + + def getHost(self): + return self.transport.getHost() + + def registerProducer(self, producer, streaming): + self.transport.registerProducer(producer, streaming) + + def unregisterProducer(self): + self.transport.unregisterProducer() + + def write(self, data): + self.transport.write(data) + + def writeSequence(self, iovec): + self.transport.writeSequence(iovec) + + def loseConnection(self): + self.transport.loseConnection() + + def endRequest(self): + pass + + def isSecure(self): + return isinstance(self.transport, self.SSL) + + def abortConnection(self): + # ITCPTransport.abortConnection + pass + + def getTcpKeepAlive(self): + # ITCPTransport.getTcpKeepAlive + pass + + def getTcpNoDelay(self): + # ITCPTransport.getTcpNoDelay + pass + + def loseWriteConnection(self): + # ITCPTransport.loseWriteConnection + pass + + def setTcpKeepAlive(self): + # ITCPTransport.setTcpKeepAlive + pass + + def setTcpNoDelay(self): + # ITCPTransport.setTcpNoDelay + pass + + def getPeerCertificate(self): + # ISSLTransport.getPeerCertificate + pass + + +class DummyRequest: + """ + Represents a dummy or fake request. See L{twisted.web.server.Request}. + + @ivar _finishedDeferreds: L{None} or a C{list} of L{Deferreds} which will + be called back with L{None} when C{finish} is called or which will be + errbacked if C{processingFailed} is called. + + @type requestheaders: C{Headers} + @ivar requestheaders: A Headers instance that stores values for all request + headers. + + @type responseHeaders: C{Headers} + @ivar responseHeaders: A Headers instance that stores values for all + response headers. + + @type responseCode: C{int} + @ivar responseCode: The response code which was passed to + C{setResponseCode}. + + @type written: C{list} of C{bytes} + @ivar written: The bytes which have been written to the request. + """ + + uri = b"http://dummy/" + method = b"GET" + client: Optional[IAddress] = None + sitepath: List[bytes] + written: List[bytes] + prepath: List[bytes] + args: Dict[bytes, List[bytes]] + _finishedDeferreds: List[Deferred[None]] + + def registerProducer(self, prod, s): + """ + Call an L{IPullProducer}'s C{resumeProducing} method in a + loop until it unregisters itself. + + @param prod: The producer. + @type prod: L{IPullProducer} + + @param s: Whether or not the producer is streaming. + """ + # XXX: Handle IPushProducers + self.go = 1 + while self.go: + prod.resumeProducing() + + def unregisterProducer(self): + self.go = 0 + + def __init__( + self, + postpath: list[bytes], + session: Optional[Session] = None, + client: Optional[IAddress] = None, + ) -> None: + self.sitepath = [] + self.written = [] + self.finished = 0 + self.postpath = postpath + self.prepath = [] + self.session = None + self.protoSession = session or Session(site=None, uid=b"0", reactor=Clock()) + self.args = {} + self.requestHeaders = Headers() + self.responseHeaders = Headers() + self.responseCode = None + self._finishedDeferreds = [] + self._serverName = b"dummy" + self.clientproto = b"HTTP/1.0" + + def getAllHeaders(self): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{self.requestHeaders.getAllRawHeaders()} may be preferred. + + NOTE: This function is a direct copy of + C{twisted.web.http.Request.getAllRawHeaders}. + """ + headers = {} + for k, v in self.requestHeaders.getAllRawHeaders(): + headers[k.lower()] = v[-1] + return headers + + def getHeader(self, name): + """ + Retrieve the value of a request header. + + @type name: C{bytes} + @param name: The name of the request header for which to retrieve the + value. Header names are compared case-insensitively. + + @rtype: C{bytes} or L{None} + @return: The value of the specified request header. + """ + return self.requestHeaders.getRawHeaders(name.lower(), [None])[0] + + def setHeader(self, name, value): + """TODO: make this assert on write() if the header is content-length""" + self.responseHeaders.addRawHeader(name, value) + + def getSession(self, sessionInterface=None): + if self.session: + return self.session + assert ( + not self.written + ), "Session cannot be requested after data has been written." + self.session = self.protoSession + return self.session + + def render(self, resource): + """ + Render the given resource as a response to this request. + + This implementation only handles a few of the most common behaviors of + resources. It can handle a render method that returns a string or + C{NOT_DONE_YET}. It doesn't know anything about the semantics of + request methods (eg HEAD) nor how to set any particular headers. + Basically, it's largely broken, but sufficient for some tests at least. + It should B{not} be expanded to do all the same stuff L{Request} does. + Instead, L{DummyRequest} should be phased out and L{Request} (or some + other real code factored in a different way) used. + """ + result = resource.render(self) + if result is NOT_DONE_YET: + return + self.write(result) + self.finish() + + def write(self, data): + if not isinstance(data, bytes): + raise TypeError("write() only accepts bytes") + self.written.append(data) + + def notifyFinish(self) -> Deferred[None]: + """ + Return a L{Deferred} which is called back with L{None} when the request + is finished. This will probably only work if you haven't called + C{finish} yet. + """ + finished: Deferred[None] = Deferred() + self._finishedDeferreds.append(finished) + return finished + + def finish(self): + """ + Record that the request is finished and callback and L{Deferred}s + waiting for notification of this. + """ + self.finished = self.finished + 1 + if self._finishedDeferreds is not None: + observers = self._finishedDeferreds + self._finishedDeferreds = None + for obs in observers: + obs.callback(None) + + def processingFailed(self, reason): + """ + Errback and L{Deferreds} waiting for finish notification. + """ + if self._finishedDeferreds is not None: + observers = self._finishedDeferreds + self._finishedDeferreds = None + for obs in observers: + obs.errback(reason) + + def addArg(self, name, value): + self.args[name] = [value] + + def setResponseCode(self, code, message=None): + """ + Set the HTTP status response code, but takes care that this is called + before any data is written. + """ + assert ( + not self.written + ), "Response code cannot be set after data has" "been written: {}.".format( + "@@@@".join(self.written) + ) + self.responseCode = code + self.responseMessage = message + + def setLastModified(self, when): + assert ( + not self.written + ), "Last-Modified cannot be set after data has " "been written: {}.".format( + "@@@@".join(self.written) + ) + + def setETag(self, tag): + assert ( + not self.written + ), "ETag cannot be set after data has been " "written: {}.".format( + "@@@@".join(self.written) + ) + + @deprecated(Version("Twisted", 18, 4, 0), replacement="getClientAddress") + def getClientIP(self): + """ + Return the IPv4 address of the client which made this request, if there + is one, otherwise L{None}. + """ + if isinstance(self.client, (IPv4Address, IPv6Address)): + return self.client.host + return None + + def getClientAddress(self): + """ + Return the L{IAddress} of the client that made this request. + + @return: an address. + @rtype: an L{IAddress} provider. + """ + if self.client is None: + return NullAddress() + return self.client + + def getRequestHostname(self): + """ + Get a dummy hostname associated to the HTTP request. + + @rtype: C{bytes} + @returns: a dummy hostname + """ + return self._serverName + + def getHost(self): + """ + Get a dummy transport's host. + + @rtype: C{IPv4Address} + @returns: a dummy transport's host + """ + return IPv4Address("TCP", "127.0.0.1", 80) + + def setHost(self, host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + @type host: C{bytes} + @param host: The value to which to change the host header. + + @type ssl: C{bool} + @param ssl: A flag which, if C{True}, indicates that the request is + considered secure (if C{True}, L{isSecure} will return C{True}). + """ + self._forceSSL = ssl # set first so isSecure will work + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostHeader = host + else: + hostHeader = b"%b:%d" % (host, port) + self.requestHeaders.addRawHeader(b"host", hostHeader) + + def redirect(self, url): + """ + Utility function that does a redirect. + + The request should have finish() called after this. + """ + self.setResponseCode(FOUND) + self.setHeader(b"location", url) + + +class DummyRequestTests(unittest.SynchronousTestCase): + """ + Tests for L{DummyRequest}. + """ + + def test_getClientIPDeprecated(self): + """ + L{DummyRequest.getClientIP} is deprecated in favor of + L{DummyRequest.getClientAddress} + """ + + request = DummyRequest([]) + request.getClientIP() + + warnings = self.flushWarnings( + offendingFunctions=[self.test_getClientIPDeprecated] + ) + + self.assertEqual(1, len(warnings)) + [warning] = warnings + self.assertEqual(warning.get("category"), DeprecationWarning) + self.assertEqual( + warning.get("message"), + ( + "twisted.web.test.requesthelper.DummyRequest.getClientIP " + "was deprecated in Twisted 18.4.0; " + "please use getClientAddress instead" + ), + ) + + def test_getClientIPSupportsIPv6(self): + """ + L{DummyRequest.getClientIP} supports IPv6 addresses, just like + L{twisted.web.http.Request.getClientIP}. + """ + request = DummyRequest([]) + client = IPv6Address("TCP", "::1", 12345) + request.client = client + + self.assertEqual("::1", request.getClientIP()) + + def test_getClientAddressWithoutClient(self): + """ + L{DummyRequest.getClientAddress} returns an L{IAddress} + provider no C{client} has been set. + """ + request = DummyRequest([]) + null = request.getClientAddress() + verify.verifyObject(IAddress, null) + + def test_getClientAddress(self): + """ + L{DummyRequest.getClientAddress} returns the C{client}. + """ + request = DummyRequest([]) + client = IPv4Address("TCP", "127.0.0.1", 12345) + request.client = client + address = request.getClientAddress() + self.assertIs(address, client) diff --git a/contrib/python/Twisted/py3/twisted/web/twcgi.py b/contrib/python/Twisted/py3/twisted/web/twcgi.py new file mode 100644 index 0000000000..fcf831108b --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/twcgi.py @@ -0,0 +1,343 @@ +# -*- test-case-name: twisted.web.test.test_cgi -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +I hold resource classes and helper classes that deal with CGI scripts. +""" + +# System Imports +import os +import urllib +from typing import AnyStr + +# Twisted Imports +from twisted.internet import protocol +from twisted.logger import Logger +from twisted.python import filepath +from twisted.spread import pb +from twisted.web import http, resource, server, static + + +class CGIDirectory(resource.Resource, filepath.FilePath[AnyStr]): + def __init__(self, pathname): + resource.Resource.__init__(self) + filepath.FilePath.__init__(self, pathname) + + def getChild(self, path, request): + fnp = self.child(path) + if not fnp.exists(): + return static.File.childNotFound + elif fnp.isdir(): + return CGIDirectory(fnp.path) + else: + return CGIScript(fnp.path) + + def render(self, request): + notFound = resource.NoResource( + "CGI directories do not support directory listing." + ) + return notFound.render(request) + + +class CGIScript(resource.Resource): + """ + L{CGIScript} is a resource which runs child processes according to the CGI + specification. + + The implementation is complex due to the fact that it requires asynchronous + IPC with an external process with an unpleasant protocol. + """ + + isLeaf = 1 + + def __init__(self, filename, registry=None, reactor=None): + """ + Initialize, with the name of a CGI script file. + """ + self.filename = filename + if reactor is None: + # This installs a default reactor, if None was installed before. + # We do a late import here, so that importing the current module + # won't directly trigger installing a default reactor. + from twisted.internet import reactor + self._reactor = reactor + + def render(self, request): + """ + Do various things to conform to the CGI specification. + + I will set up the usual slew of environment variables, then spin off a + process. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + """ + scriptName = b"/" + b"/".join(request.prepath) + serverName = request.getRequestHostname().split(b":")[0] + env = { + "SERVER_SOFTWARE": server.version, + "SERVER_NAME": serverName, + "GATEWAY_INTERFACE": "CGI/1.1", + "SERVER_PROTOCOL": request.clientproto, + "SERVER_PORT": str(request.getHost().port), + "REQUEST_METHOD": request.method, + "SCRIPT_NAME": scriptName, + "SCRIPT_FILENAME": self.filename, + "REQUEST_URI": request.uri, + } + + ip = request.getClientAddress().host + if ip is not None: + env["REMOTE_ADDR"] = ip + pp = request.postpath + if pp: + env["PATH_INFO"] = "/" + "/".join(pp) + + if hasattr(request, "content"): + # 'request.content' is either a StringIO or a TemporaryFile, and + # the file pointer is sitting at the beginning (seek(0,0)) + request.content.seek(0, 2) + length = request.content.tell() + request.content.seek(0, 0) + env["CONTENT_LENGTH"] = str(length) + + try: + qindex = request.uri.index(b"?") + except ValueError: + env["QUERY_STRING"] = "" + qargs = [] + else: + qs = env["QUERY_STRING"] = request.uri[qindex + 1 :] + if b"=" in qs: + qargs = [] + else: + qargs = [urllib.parse.unquote(x.decode()) for x in qs.split(b"+")] + + # Propagate HTTP headers + for title, header in request.getAllHeaders().items(): + envname = title.replace(b"-", b"_").upper() + if title not in (b"content-type", b"content-length", b"proxy"): + envname = b"HTTP_" + envname + env[envname] = header + # Propagate our environment + for key, value in os.environ.items(): + if key not in env: + env[key] = value + # And they're off! + self.runProcess(env, request, qargs) + return server.NOT_DONE_YET + + def runProcess(self, env, request, qargs=[]): + """ + Run the cgi script. + + @type env: A L{dict} of L{str}, or L{None} + @param env: The environment variables to pass to the process that will + get spawned. See + L{twisted.internet.interfaces.IReactorProcess.spawnProcess} for + more information about environments and process creation. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + + @type qargs: A L{list} of L{str} + @param qargs: The command line arguments to pass to the process that + will get spawned. + """ + p = CGIProcessProtocol(request) + self._reactor.spawnProcess( + p, + self.filename, + [self.filename] + qargs, + env, + os.path.dirname(self.filename), + ) + + +class FilteredScript(CGIScript): + """ + I am a special version of a CGI script, that uses a specific executable. + + This is useful for interfacing with other scripting languages that adhere + to the CGI standard. My C{filter} attribute specifies what executable to + run, and my C{filename} init parameter describes which script to pass to + the first argument of that script. + + To customize me for a particular location of a CGI interpreter, override + C{filter}. + + @type filter: L{str} + @ivar filter: The absolute path to the executable. + """ + + filter = "/usr/bin/cat" + + def runProcess(self, env, request, qargs=[]): + """ + Run a script through the C{filter} executable. + + @type env: A L{dict} of L{str}, or L{None} + @param env: The environment variables to pass to the process that will + get spawned. See + L{twisted.internet.interfaces.IReactorProcess.spawnProcess} + for more information about environments and process creation. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + + @type qargs: A L{list} of L{str} + @param qargs: The command line arguments to pass to the process that + will get spawned. + """ + p = CGIProcessProtocol(request) + self._reactor.spawnProcess( + p, + self.filter, + [self.filter, self.filename] + qargs, + env, + os.path.dirname(self.filename), + ) + + +class CGIProcessProtocol(protocol.ProcessProtocol, pb.Viewable): + handling_headers = 1 + headers_written = 0 + headertext = b"" + errortext = b"" + _log = Logger() + _requestFinished = False + + # Remotely relay producer interface. + + def view_resumeProducing(self, issuer): + self.resumeProducing() + + def view_pauseProducing(self, issuer): + self.pauseProducing() + + def view_stopProducing(self, issuer): + self.stopProducing() + + def resumeProducing(self): + self.transport.resumeProducing() + + def pauseProducing(self): + self.transport.pauseProducing() + + def stopProducing(self): + self.transport.loseConnection() + + def __init__(self, request): + self.request = request + self.request.notifyFinish().addBoth(self._finished) + + def connectionMade(self): + self.request.registerProducer(self, 1) + self.request.content.seek(0, 0) + content = self.request.content.read() + if content: + self.transport.write(content) + self.transport.closeStdin() + + def errReceived(self, error): + self.errortext = self.errortext + error + + def outReceived(self, output): + """ + Handle a chunk of input + """ + # First, make sure that the headers from the script are sorted + # out (we'll want to do some parsing on these later.) + if self.handling_headers: + text = self.headertext + output + headerEnds = [] + for delimiter in b"\n\n", b"\r\n\r\n", b"\r\r", b"\n\r\n": + headerend = text.find(delimiter) + if headerend != -1: + headerEnds.append((headerend, delimiter)) + if headerEnds: + # The script is entirely in control of response headers; + # disable the default Content-Type value normally provided by + # twisted.web.server.Request. + self.request.defaultContentType = None + + headerEnds.sort() + headerend, delimiter = headerEnds[0] + self.headertext = text[:headerend] + # This is a final version of the header text. + linebreak = delimiter[: len(delimiter) // 2] + headers = self.headertext.split(linebreak) + for header in headers: + br = header.find(b": ") + if br == -1: + self._log.error( + "ignoring malformed CGI header: {header!r}", header=header + ) + else: + headerName = header[:br].lower() + headerText = header[br + 2 :] + if headerName == b"location": + self.request.setResponseCode(http.FOUND) + if headerName == b"status": + try: + # "XXX <description>" sometimes happens. + statusNum = int(headerText[:3]) + except BaseException: + self._log.error("malformed status header") + else: + self.request.setResponseCode(statusNum) + else: + # Don't allow the application to control + # these required headers. + if headerName.lower() not in (b"server", b"date"): + self.request.responseHeaders.addRawHeader( + headerName, headerText + ) + output = text[headerend + len(delimiter) :] + self.handling_headers = 0 + if self.handling_headers: + self.headertext = text + if not self.handling_headers: + self.request.write(output) + + def processEnded(self, reason): + if reason.value.exitCode != 0: + self._log.error( + "CGI {uri} exited with exit code {exitCode}", + uri=self.request.uri, + exitCode=reason.value.exitCode, + ) + if self.errortext: + self._log.error( + "Errors from CGI {uri}: {errorText}", + uri=self.request.uri, + errorText=self.errortext, + ) + + if self.handling_headers: + self._log.error( + "Premature end of headers in {uri}: {headerText}", + uri=self.request.uri, + headerText=self.headertext, + ) + if not self._requestFinished: + self.request.write( + resource.ErrorPage( + http.INTERNAL_SERVER_ERROR, + "CGI Script Error", + "Premature end of script headers.", + ).render(self.request) + ) + + if not self._requestFinished: + self.request.unregisterProducer() + self.request.finish() + + def _finished(self, ignored): + """ + Record the end of the response generation for the request being + serviced. + """ + self._requestFinished = True diff --git a/contrib/python/Twisted/py3/twisted/web/util.py b/contrib/python/Twisted/py3/twisted/web/util.py new file mode 100644 index 0000000000..3135f05cd9 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/util.py @@ -0,0 +1,38 @@ +# -*- test-case-name: twisted.web.test.test_util -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An assortment of web server-related utilities. +""" + +__all__ = [ + "redirectTo", + "Redirect", + "ChildRedirector", + "ParentRedirect", + "DeferredResource", + "FailureElement", + "formatFailure", + # publicized by unit tests: + "_FrameElement", + "_SourceFragmentElement", + "_SourceLineElement", + "_StackElement", + "_PRE", +] + +from ._template_util import ( + _PRE, + ChildRedirector, + DeferredResource, + FailureElement, + ParentRedirect, + Redirect, + _FrameElement, + _SourceFragmentElement, + _SourceLineElement, + _StackElement, + formatFailure, + redirectTo, +) diff --git a/contrib/python/Twisted/py3/twisted/web/vhost.py b/contrib/python/Twisted/py3/twisted/web/vhost.py new file mode 100644 index 0000000000..9576252b0f --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/vhost.py @@ -0,0 +1,137 @@ +# -*- test-case-name: twisted.web. +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I am a virtual hosts implementation. +""" + + +# Twisted Imports +from twisted.python import roots +from twisted.web import pages, resource + + +class VirtualHostCollection(roots.Homogenous): + """Wrapper for virtual hosts collection. + + This exists for configuration purposes. + """ + + entityType = resource.Resource + + def __init__(self, nvh): + self.nvh = nvh + + def listStaticEntities(self): + return self.nvh.hosts.items() + + def getStaticEntity(self, name): + return self.nvh.hosts.get(self) + + def reallyPutEntity(self, name, entity): + self.nvh.addHost(name, entity) + + def delEntity(self, name): + self.nvh.removeHost(name) + + +class NameVirtualHost(resource.Resource): + """I am a resource which represents named virtual hosts.""" + + default = None + + def __init__(self): + """Initialize.""" + resource.Resource.__init__(self) + self.hosts = {} + + def listStaticEntities(self): + return resource.Resource.listStaticEntities(self) + [ + ("Virtual Hosts", VirtualHostCollection(self)) + ] + + def getStaticEntity(self, name): + if name == "Virtual Hosts": + return VirtualHostCollection(self) + else: + return resource.Resource.getStaticEntity(self, name) + + def addHost(self, name, resrc): + """Add a host to this virtual host. + + This will take a host named `name', and map it to a resource + `resrc'. For example, a setup for our virtual hosts would be:: + + nvh.addHost('divunal.com', divunalDirectory) + nvh.addHost('www.divunal.com', divunalDirectory) + nvh.addHost('twistedmatrix.com', twistedMatrixDirectory) + nvh.addHost('www.twistedmatrix.com', twistedMatrixDirectory) + """ + self.hosts[name] = resrc + + def removeHost(self, name): + """Remove a host.""" + del self.hosts[name] + + def _getResourceForRequest(self, request): + """(Internal) Get the appropriate resource for the given host.""" + hostHeader = request.getHeader(b"host") + if hostHeader is None: + return self.default or pages.notFound() + else: + host = hostHeader.lower().split(b":", 1)[0] + return self.hosts.get(host, self.default) or pages.notFound( + "Not Found", + f"host {host.decode('ascii', 'replace')!r} not in vhost map", + ) + + def render(self, request): + """Implementation of resource.Resource's render method.""" + resrc = self._getResourceForRequest(request) + return resrc.render(request) + + def getChild(self, path, request): + """Implementation of resource.Resource's getChild method.""" + resrc = self._getResourceForRequest(request) + if resrc.isLeaf: + request.postpath.insert(0, request.prepath.pop(-1)) + return resrc + else: + return resrc.getChildWithDefault(path, request) + + +class _HostResource(resource.Resource): + def getChild(self, path, request): + if b":" in path: + host, port = path.split(b":", 1) + port = int(port) + else: + host, port = path, 80 + request.setHost(host, port) + prefixLen = 3 + request.isSecure() + 4 + len(path) + len(request.prepath[-3]) + request.path = b"/" + b"/".join(request.postpath) + request.uri = request.uri[prefixLen:] + del request.prepath[:3] + return request.site.getResourceFor(request) + + +class VHostMonsterResource(resource.Resource): + + """ + Use this to be able to record the hostname and method (http vs. https) + in the URL without disturbing your web site. If you put this resource + in a URL http://foo.com/bar then requests to + http://foo.com/bar/http/baz.com/something will be equivalent to + http://foo.com/something, except that the hostname the request will + appear to be accessing will be "baz.com". So if "baz.com" is redirecting + all requests for to foo.com, while foo.com is inaccessible from the outside, + then redirect and url generation will work correctly + """ + + def getChild(self, path, request): + if path == b"http": + request.isSecure = lambda: 0 + elif path == b"https": + request.isSecure = lambda: 1 + return _HostResource() diff --git a/contrib/python/Twisted/py3/twisted/web/wsgi.py b/contrib/python/Twisted/py3/twisted/web/wsgi.py new file mode 100644 index 0000000000..43227f40e3 --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/wsgi.py @@ -0,0 +1,589 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An implementation of +U{Python Web Server Gateway Interface v1.0.1<http://www.python.org/dev/peps/pep-3333/>}. +""" + +from collections.abc import Sequence +from sys import exc_info +from warnings import warn + +from zope.interface import implementer + +from twisted.internet.threads import blockingCallFromThread +from twisted.logger import Logger +from twisted.python.failure import Failure +from twisted.web.http import INTERNAL_SERVER_ERROR +from twisted.web.resource import IResource +from twisted.web.server import NOT_DONE_YET + +# PEP-3333 -- which has superseded PEP-333 -- states that, in both Python 2 +# and Python 3, text strings MUST be represented using the platform's native +# string type, limited to characters defined in ISO-8859-1. Byte strings are +# used only for values read from wsgi.input, passed to write() or yielded by +# the application. +# +# Put another way: +# +# - In Python 2, all text strings and binary data are of type str/bytes and +# NEVER of type unicode. Whether the strings contain binary data or +# ISO-8859-1 text depends on context. +# +# - In Python 3, all text strings are of type str, and all binary data are of +# type bytes. Text MUST always be limited to that which can be encoded as +# ISO-8859-1, U+0000 to U+00FF inclusive. +# +# The following pair of functions -- _wsgiString() and _wsgiStringToBytes() -- +# are used to make Twisted's WSGI support compliant with the standard. +if str is bytes: + + def _wsgiString(string): # Python 2. + """ + Convert C{string} to an ISO-8859-1 byte string, if it is not already. + + @type string: C{str}/C{bytes} or C{unicode} + @rtype: C{str}/C{bytes} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + if isinstance(string, str): + return string + else: + return string.encode("iso-8859-1") + + def _wsgiStringToBytes(string): # Python 2. + """ + Return C{string} as is; a WSGI string is a byte string in Python 2. + + @type string: C{str}/C{bytes} + @rtype: C{str}/C{bytes} + """ + return string + +else: + + def _wsgiString(string): # Python 3. + """ + Convert C{string} to a WSGI "bytes-as-unicode" string. + + If it's a byte string, decode as ISO-8859-1. If it's a Unicode string, + round-trip it to bytes and back using ISO-8859-1 as the encoding. + + @type string: C{str} or C{bytes} + @rtype: C{str} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + if isinstance(string, str): + return string.encode("iso-8859-1").decode("iso-8859-1") + else: + return string.decode("iso-8859-1") + + def _wsgiStringToBytes(string): # Python 3. + """ + Convert C{string} from a WSGI "bytes-as-unicode" string to an + ISO-8859-1 byte string. + + @type string: C{str} + @rtype: C{bytes} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + return string.encode("iso-8859-1") + + +class _ErrorStream: + """ + File-like object instances of which are used as the value for the + C{'wsgi.errors'} key in the C{environ} dictionary passed to the application + object. + + This simply passes writes on to L{logging<twisted.logger>} system as + error events from the C{'wsgi'} system. In the future, it may be desirable + to expose more information in the events it logs, such as the application + object which generated the message. + """ + + _log = Logger() + + def write(self, data): + """ + Generate an event for the logging system with the given bytes as the + message. + + This is called in a WSGI application thread, not the I/O thread. + + @type data: str + + @raise TypeError: On Python 3, if C{data} is not a native string. On + Python 2 a warning will be issued. + """ + if not isinstance(data, str): + if str is bytes: + warn( + "write() argument should be str, not %r (%s)" + % (data, type(data).__name__), + category=UnicodeWarning, + ) + else: + raise TypeError( + "write() argument must be str, not %r (%s)" + % (data, type(data).__name__) + ) + + # Note that in old style, message was a tuple. logger._legacy + # will overwrite this value if it is not properly formatted here. + self._log.error(data, system="wsgi", isError=True, message=(data,)) + + def writelines(self, iovec): + """ + Join the given lines and pass them to C{write} to be handled in the + usual way. + + This is called in a WSGI application thread, not the I/O thread. + + @param iovec: A C{list} of C{'\\n'}-terminated C{str} which will be + logged. + + @raise TypeError: On Python 3, if C{iovec} contains any non-native + strings. On Python 2 a warning will be issued. + """ + self.write("".join(iovec)) + + def flush(self): + """ + Nothing is buffered, so flushing does nothing. This method is required + to exist by PEP 333, though. + + This is called in a WSGI application thread, not the I/O thread. + """ + + +class _InputStream: + """ + File-like object instances of which are used as the value for the + C{'wsgi.input'} key in the C{environ} dictionary passed to the application + object. + + This only exists to make the handling of C{readline(-1)} consistent across + different possible underlying file-like object implementations. The other + supported methods pass through directly to the wrapped object. + """ + + def __init__(self, input): + """ + Initialize the instance. + + This is called in the I/O thread, not a WSGI application thread. + """ + self._wrapped = input + + def read(self, size=None): + """ + Pass through to the underlying C{read}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Avoid passing None because cStringIO and file don't like it. + if size is None: + return self._wrapped.read() + return self._wrapped.read(size) + + def readline(self, size=None): + """ + Pass through to the underlying C{readline}, with a size of C{-1} replaced + with a size of L{None}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Check for -1 because StringIO doesn't handle it correctly. Check for + # None because files and tempfiles don't accept that. + if size == -1 or size is None: + return self._wrapped.readline() + return self._wrapped.readline(size) + + def readlines(self, size=None): + """ + Pass through to the underlying C{readlines}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Avoid passing None because cStringIO and file don't like it. + if size is None: + return self._wrapped.readlines() + return self._wrapped.readlines(size) + + def __iter__(self): + """ + Pass through to the underlying C{__iter__}. + + This is called in a WSGI application thread, not the I/O thread. + """ + return iter(self._wrapped) + + +class _WSGIResponse: + """ + Helper for L{WSGIResource} which drives the WSGI application using a + threadpool and hooks it up to the L{http.Request}. + + @ivar started: A L{bool} indicating whether or not the response status and + headers have been written to the request yet. This may only be read or + written in the WSGI application thread. + + @ivar reactor: An L{IReactorThreads} provider which is used to call methods + on the request in the I/O thread. + + @ivar threadpool: A L{ThreadPool} which is used to call the WSGI + application object in a non-I/O thread. + + @ivar application: The WSGI application object. + + @ivar request: The L{http.Request} upon which the WSGI environment is + based and to which the application's output will be sent. + + @ivar environ: The WSGI environment L{dict}. + + @ivar status: The HTTP response status L{str} supplied to the WSGI + I{start_response} callable by the application. + + @ivar headers: A list of HTTP response headers supplied to the WSGI + I{start_response} callable by the application. + + @ivar _requestFinished: A flag which indicates whether it is possible to + generate more response data or not. This is L{False} until + L{http.Request.notifyFinish} tells us the request is done, + then L{True}. + """ + + _requestFinished = False + _log = Logger() + + def __init__(self, reactor, threadpool, application, request): + self.started = False + self.reactor = reactor + self.threadpool = threadpool + self.application = application + self.request = request + self.request.notifyFinish().addBoth(self._finished) + + if request.prepath: + scriptName = b"/" + b"/".join(request.prepath) + else: + scriptName = b"" + + if request.postpath: + pathInfo = b"/" + b"/".join(request.postpath) + else: + pathInfo = b"" + + parts = request.uri.split(b"?", 1) + if len(parts) == 1: + queryString = b"" + else: + queryString = parts[1] + + # All keys and values need to be native strings, i.e. of type str in + # *both* Python 2 and Python 3, so says PEP-3333. + self.environ = { + "REQUEST_METHOD": _wsgiString(request.method), + "REMOTE_ADDR": _wsgiString(request.getClientAddress().host), + "SCRIPT_NAME": _wsgiString(scriptName), + "PATH_INFO": _wsgiString(pathInfo), + "QUERY_STRING": _wsgiString(queryString), + "CONTENT_TYPE": _wsgiString(request.getHeader(b"content-type") or ""), + "CONTENT_LENGTH": _wsgiString(request.getHeader(b"content-length") or ""), + "SERVER_NAME": _wsgiString(request.getRequestHostname()), + "SERVER_PORT": _wsgiString(str(request.getHost().port)), + "SERVER_PROTOCOL": _wsgiString(request.clientproto), + } + + # The application object is entirely in control of response headers; + # disable the default Content-Type value normally provided by + # twisted.web.server.Request. + self.request.defaultContentType = None + + for name, values in request.requestHeaders.getAllRawHeaders(): + name = "HTTP_" + _wsgiString(name).upper().replace("-", "_") + # It might be preferable for http.HTTPChannel to clear out + # newlines. + self.environ[name] = ",".join(_wsgiString(v) for v in values).replace( + "\n", " " + ) + + self.environ.update( + { + "wsgi.version": (1, 0), + "wsgi.url_scheme": request.isSecure() and "https" or "http", + "wsgi.run_once": False, + "wsgi.multithread": True, + "wsgi.multiprocess": False, + "wsgi.errors": _ErrorStream(), + # Attend: request.content was owned by the I/O thread up until + # this point. By wrapping it and putting the result into the + # environment dictionary, it is effectively being given to + # another thread. This means that whatever it is, it has to be + # safe to access it from two different threads. The access + # *should* all be serialized (first the I/O thread writes to + # it, then the WSGI thread reads from it, then the I/O thread + # closes it). However, since the request is made available to + # arbitrary application code during resource traversal, it's + # possible that some other code might decide to use it in the + # I/O thread concurrently with its use in the WSGI thread. + # More likely than not, this will break. This seems like an + # unlikely possibility to me, but if it is to be allowed, + # something here needs to change. -exarkun + "wsgi.input": _InputStream(request.content), + } + ) + + def _finished(self, ignored): + """ + Record the end of the response generation for the request being + serviced. + """ + self._requestFinished = True + + def startResponse(self, status, headers, excInfo=None): + """ + The WSGI I{start_response} callable. The given values are saved until + they are needed to generate the response. + + This will be called in a non-I/O thread. + """ + if self.started and excInfo is not None: + raise excInfo[1].with_traceback(excInfo[2]) + + # PEP-3333 mandates that status should be a native string. In practice + # this is mandated by Twisted's HTTP implementation too, so we enforce + # on both Python 2 and Python 3. + if not isinstance(status, str): + raise TypeError( + "status must be str, not {!r} ({})".format( + status, type(status).__name__ + ) + ) + + # PEP-3333 mandates that headers should be a plain list, but in + # practice we work with any sequence type and only warn when it's not + # a plain list. + if isinstance(headers, list): + pass # This is okay. + elif isinstance(headers, Sequence): + warn( + "headers should be a list, not %r (%s)" + % (headers, type(headers).__name__), + category=RuntimeWarning, + ) + else: + raise TypeError( + "headers must be a list, not %r (%s)" + % (headers, type(headers).__name__) + ) + + # PEP-3333 mandates that each header should be a (str, str) tuple, but + # in practice we work with any sequence type and only warn when it's + # not a plain list. + for header in headers: + if isinstance(header, tuple): + pass # This is okay. + elif isinstance(header, Sequence): + warn( + "header should be a (str, str) tuple, not %r (%s)" + % (header, type(header).__name__), + category=RuntimeWarning, + ) + else: + raise TypeError( + "header must be a (str, str) tuple, not %r (%s)" + % (header, type(header).__name__) + ) + + # However, the sequence MUST contain only 2 elements. + if len(header) != 2: + raise TypeError(f"header must be a (str, str) tuple, not {header!r}") + + # Both elements MUST be native strings. Non-native strings will be + # rejected by the underlying HTTP machinery in any case, but we + # reject them here in order to provide a more informative error. + for elem in header: + if not isinstance(elem, str): + raise TypeError(f"header must be (str, str) tuple, not {header!r}") + + self.status = status + self.headers = headers + return self.write + + def write(self, data): + """ + The WSGI I{write} callable returned by the I{start_response} callable. + The given bytes will be written to the response body, possibly flushing + the status and headers first. + + This will be called in a non-I/O thread. + """ + # PEP-3333 states: + # + # The server or gateway must transmit the yielded bytestrings to the + # client in an unbuffered fashion, completing the transmission of + # each bytestring before requesting another one. + # + # This write() method is used for the imperative and (indirectly) for + # the more familiar iterable-of-bytestrings WSGI mechanism. It uses + # C{blockingCallFromThread} to schedule writes. This allows exceptions + # to propagate up from the underlying HTTP implementation. However, + # that underlying implementation does not, as yet, provide any way to + # know if the written data has been transmitted, so this method + # violates the above part of PEP-3333. + # + # PEP-3333 also says that a server may: + # + # Use a different thread to ensure that the block continues to be + # transmitted while the application produces the next block. + # + # Which suggests that this is actually compliant with PEP-3333, + # because writes are done in the reactor thread. + # + # However, providing some back-pressure may nevertheless be a Good + # Thing at some point in the future. + + def wsgiWrite(started): + if not started: + self._sendResponseHeaders() + self.request.write(data) + + try: + return blockingCallFromThread(self.reactor, wsgiWrite, self.started) + finally: + self.started = True + + def _sendResponseHeaders(self): + """ + Set the response code and response headers on the request object, but + do not flush them. The caller is responsible for doing a write in + order for anything to actually be written out in response to the + request. + + This must be called in the I/O thread. + """ + code, message = self.status.split(None, 1) + code = int(code) + self.request.setResponseCode(code, _wsgiStringToBytes(message)) + + for name, value in self.headers: + # Don't allow the application to control these required headers. + if name.lower() not in ("server", "date"): + self.request.responseHeaders.addRawHeader( + _wsgiStringToBytes(name), _wsgiStringToBytes(value) + ) + + def start(self): + """ + Start the WSGI application in the threadpool. + + This must be called in the I/O thread. + """ + self.threadpool.callInThread(self.run) + + def run(self): + """ + Call the WSGI application object, iterate it, and handle its output. + + This must be called in a non-I/O thread (ie, a WSGI application + thread). + """ + try: + appIterator = self.application(self.environ, self.startResponse) + for elem in appIterator: + if elem: + self.write(elem) + if self._requestFinished: + break + close = getattr(appIterator, "close", None) + if close is not None: + close() + except BaseException: + + def wsgiError(started, type, value, traceback): + self._log.failure( + "WSGI application error", failure=Failure(value, type, traceback) + ) + if started: + self.request.loseConnection() + else: + self.request.setResponseCode(INTERNAL_SERVER_ERROR) + self.request.finish() + + self.reactor.callFromThread(wsgiError, self.started, *exc_info()) + else: + + def wsgiFinish(started): + if not self._requestFinished: + if not started: + self._sendResponseHeaders() + self.request.finish() + + self.reactor.callFromThread(wsgiFinish, self.started) + self.started = True + + +@implementer(IResource) +class WSGIResource: + """ + An L{IResource} implementation which delegates responsibility for all + resources hierarchically inferior to it to a WSGI application. + + @ivar _reactor: An L{IReactorThreads} provider which will be passed on to + L{_WSGIResponse} to schedule calls in the I/O thread. + + @ivar _threadpool: A L{ThreadPool} which will be passed on to + L{_WSGIResponse} to run the WSGI application object. + + @ivar _application: The WSGI application object. + """ + + # Further resource segments are left up to the WSGI application object to + # handle. + isLeaf = True + + def __init__(self, reactor, threadpool, application): + self._reactor = reactor + self._threadpool = threadpool + self._application = application + + def render(self, request): + """ + Turn the request into the appropriate C{environ} C{dict} suitable to be + passed to the WSGI application object and then pass it on. + + The WSGI application object is given almost complete control of the + rendering process. C{NOT_DONE_YET} will always be returned in order + and response completion will be dictated by the application object, as + will the status, headers, and the response body. + """ + response = _WSGIResponse( + self._reactor, self._threadpool, self._application, request + ) + response.start() + return NOT_DONE_YET + + def getChildWithDefault(self, name, request): + """ + Reject attempts to retrieve a child resource. All path segments beyond + the one which refers to this resource are handled by the WSGI + application object. + """ + raise RuntimeError("Cannot get IResource children from WSGIResource") + + def putChild(self, path, child): + """ + Reject attempts to add a child resource to this resource. The WSGI + application object handles all path segments beneath this resource, so + L{IResource} children can never be found. + """ + raise RuntimeError("Cannot put IResource children under WSGIResource") + + +__all__ = ["WSGIResource"] diff --git a/contrib/python/Twisted/py3/twisted/web/xmlrpc.py b/contrib/python/Twisted/py3/twisted/web/xmlrpc.py new file mode 100644 index 0000000000..25797efc2f --- /dev/null +++ b/contrib/python/Twisted/py3/twisted/web/xmlrpc.py @@ -0,0 +1,633 @@ +# -*- test-case-name: twisted.web.test.test_xmlrpc -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A generic resource for publishing objects via XML-RPC. + +Maintainer: Itamar Shtull-Trauring + +@var Fault: See L{xmlrpclib.Fault} +@type Fault: L{xmlrpclib.Fault} +""" + + +# System Imports +import base64 +import xmlrpc.client as xmlrpclib +from urllib.parse import urlparse +from xmlrpc.client import Binary, Boolean, DateTime, Fault + +from twisted.internet import defer, error, protocol +from twisted.logger import Logger +from twisted.python import failure, reflect +from twisted.python.compat import nativeString + +# Sibling Imports +from twisted.web import http, resource, server + +# These are deprecated, use the class level definitions +NOT_FOUND = 8001 +FAILURE = 8002 + + +def withRequest(f): + """ + Decorator to cause the request to be passed as the first argument + to the method. + + If an I{xmlrpc_} method is wrapped with C{withRequest}, the + request object is passed as the first argument to that method. + For example:: + + @withRequest + def xmlrpc_echo(self, request, s): + return s + + @since: 10.2 + """ + f.withRequest = True + return f + + +class NoSuchFunction(Fault): + """ + There is no function by the given name. + """ + + +class Handler: + """ + Handle a XML-RPC request and store the state for a request in progress. + + Override the run() method and return result using self.result, + a Deferred. + + We require this class since we're not using threads, so we can't + encapsulate state in a running function if we're going to have + to wait for results. + + For example, lets say we want to authenticate against twisted.cred, + run a LDAP query and then pass its result to a database query, all + as a result of a single XML-RPC command. We'd use a Handler instance + to store the state of the running command. + """ + + def __init__(self, resource, *args): + self.resource = resource # the XML-RPC resource we are connected to + self.result = defer.Deferred() + self.run(*args) + + def run(self, *args): + # event driven equivalent of 'raise UnimplementedError' + self.result.errback(NotImplementedError("Implement run() in subclasses")) + + +class XMLRPC(resource.Resource): + """ + A resource that implements XML-RPC. + + You probably want to connect this to '/RPC2'. + + Methods published can return XML-RPC serializable results, Faults, + Binary, Boolean, DateTime, Deferreds, or Handler instances. + + By default methods beginning with 'xmlrpc_' are published. + + Sub-handlers for prefixed methods (e.g., system.listMethods) + can be added with putSubHandler. By default, prefixes are + separated with a '.'. Override self.separator to change this. + + @ivar allowNone: Permit XML translating of Python constant None. + @type allowNone: C{bool} + + @ivar useDateTime: Present C{datetime} values as C{datetime.datetime} + objects? + @type useDateTime: C{bool} + """ + + # Error codes for Twisted, if they conflict with yours then + # modify them at runtime. + NOT_FOUND = 8001 + FAILURE = 8002 + + isLeaf = 1 + separator = "." + allowedMethods = (b"POST",) + _log = Logger() + + def __init__(self, allowNone=False, useDateTime=False): + resource.Resource.__init__(self) + self.subHandlers = {} + self.allowNone = allowNone + self.useDateTime = useDateTime + + def __setattr__(self, name, value): + self.__dict__[name] = value + + def putSubHandler(self, prefix, handler): + self.subHandlers[prefix] = handler + + def getSubHandler(self, prefix): + return self.subHandlers.get(prefix, None) + + def getSubHandlerPrefixes(self): + return list(self.subHandlers.keys()) + + def render_POST(self, request): + request.content.seek(0, 0) + request.setHeader(b"content-type", b"text/xml; charset=utf-8") + try: + args, functionPath = xmlrpclib.loads( + request.content.read(), use_datetime=self.useDateTime + ) + except Exception as e: + f = Fault(self.FAILURE, f"Can't deserialize input: {e}") + self._cbRender(f, request) + else: + try: + function = self.lookupProcedure(functionPath) + except Fault as f: + self._cbRender(f, request) + else: + # Use this list to track whether the response has failed or not. + # This will be used later on to decide if the result of the + # Deferred should be written out and Request.finish called. + responseFailed = [] + request.notifyFinish().addErrback(responseFailed.append) + if getattr(function, "withRequest", False): + d = defer.maybeDeferred(function, request, *args) + else: + d = defer.maybeDeferred(function, *args) + d.addErrback(self._ebRender) + d.addCallback(self._cbRender, request, responseFailed) + return server.NOT_DONE_YET + + def _cbRender(self, result, request, responseFailed=None): + if responseFailed: + return + + if isinstance(result, Handler): + result = result.result + if not isinstance(result, Fault): + result = (result,) + try: + try: + content = xmlrpclib.dumps( + result, methodresponse=True, allow_none=self.allowNone + ) + except Exception as e: + f = Fault(self.FAILURE, f"Can't serialize output: {e}") + content = xmlrpclib.dumps( + f, methodresponse=True, allow_none=self.allowNone + ) + + if isinstance(content, str): + content = content.encode("utf8") + request.setHeader(b"content-length", b"%d" % (len(content),)) + request.write(content) + except Exception: + self._log.failure("") + request.finish() + + def _ebRender(self, failure): + if isinstance(failure.value, Fault): + return failure.value + self._log.failure("", failure) + return Fault(self.FAILURE, "error") + + def lookupProcedure(self, procedurePath): + """ + Given a string naming a procedure, return a callable object for that + procedure or raise NoSuchFunction. + + The returned object will be called, and should return the result of the + procedure, a Deferred, or a Fault instance. + + Override in subclasses if you want your own policy. The base + implementation that given C{'foo'}, C{self.xmlrpc_foo} will be returned. + If C{procedurePath} contains C{self.separator}, the sub-handler for the + initial prefix is used to search for the remaining path. + + If you override C{lookupProcedure}, you may also want to override + C{listProcedures} to accurately report the procedures supported by your + resource, so that clients using the I{system.listMethods} procedure + receive accurate results. + + @since: 11.1 + """ + if procedurePath.find(self.separator) != -1: + prefix, procedurePath = procedurePath.split(self.separator, 1) + handler = self.getSubHandler(prefix) + if handler is None: + raise NoSuchFunction(self.NOT_FOUND, "no such subHandler %s" % prefix) + return handler.lookupProcedure(procedurePath) + + f = getattr(self, "xmlrpc_%s" % procedurePath, None) + if not f: + raise NoSuchFunction( + self.NOT_FOUND, "procedure %s not found" % procedurePath + ) + elif not callable(f): + raise NoSuchFunction( + self.NOT_FOUND, "procedure %s not callable" % procedurePath + ) + else: + return f + + def listProcedures(self): + """ + Return a list of the names of all xmlrpc procedures. + + @since: 11.1 + """ + return reflect.prefixedMethodNames(self.__class__, "xmlrpc_") + + +class XMLRPCIntrospection(XMLRPC): + """ + Implement the XML-RPC Introspection API. + + By default, the methodHelp method returns the 'help' method attribute, + if it exists, otherwise the __doc__ method attribute, if it exists, + otherwise the empty string. + + To enable the methodSignature method, add a 'signature' method attribute + containing a list of lists. See methodSignature's documentation for the + format. Note the type strings should be XML-RPC types, not Python types. + """ + + def __init__(self, parent): + """ + Implement Introspection support for an XMLRPC server. + + @param parent: the XMLRPC server to add Introspection support to. + @type parent: L{XMLRPC} + """ + XMLRPC.__init__(self) + self._xmlrpc_parent = parent + + def xmlrpc_listMethods(self): + """ + Return a list of the method names implemented by this server. + """ + functions = [] + todo = [(self._xmlrpc_parent, "")] + while todo: + obj, prefix = todo.pop(0) + functions.extend([prefix + name for name in obj.listProcedures()]) + todo.extend( + [ + (obj.getSubHandler(name), prefix + name + obj.separator) + for name in obj.getSubHandlerPrefixes() + ] + ) + return functions + + xmlrpc_listMethods.signature = [["array"]] # type: ignore[attr-defined] + + def xmlrpc_methodHelp(self, method): + """ + Return a documentation string describing the use of the given method. + """ + method = self._xmlrpc_parent.lookupProcedure(method) + return getattr(method, "help", None) or getattr(method, "__doc__", None) or "" + + xmlrpc_methodHelp.signature = [["string", "string"]] # type: ignore[attr-defined] + + def xmlrpc_methodSignature(self, method): + """ + Return a list of type signatures. + + Each type signature is a list of the form [rtype, type1, type2, ...] + where rtype is the return type and typeN is the type of the Nth + argument. If no signature information is available, the empty + string is returned. + """ + method = self._xmlrpc_parent.lookupProcedure(method) + return getattr(method, "signature", None) or "" + + xmlrpc_methodSignature.signature = [ # type: ignore[attr-defined] + ["array", "string"], + ["string", "string"], + ] + + +def addIntrospection(xmlrpc): + """ + Add Introspection support to an XMLRPC server. + + @param xmlrpc: the XMLRPC server to add Introspection support to. + @type xmlrpc: L{XMLRPC} + """ + xmlrpc.putSubHandler("system", XMLRPCIntrospection(xmlrpc)) + + +class QueryProtocol(http.HTTPClient): + def connectionMade(self): + self._response = None + self.sendCommand(b"POST", self.factory.path) + self.sendHeader(b"User-Agent", b"Twisted/XMLRPClib") + self.sendHeader(b"Host", self.factory.host) + self.sendHeader(b"Content-type", b"text/xml; charset=utf-8") + payload = self.factory.payload + self.sendHeader(b"Content-length", b"%d" % (len(payload),)) + + if self.factory.user: + auth = b":".join([self.factory.user, self.factory.password]) + authHeader = b"".join([b"Basic ", base64.b64encode(auth)]) + self.sendHeader(b"Authorization", authHeader) + self.endHeaders() + self.transport.write(payload) + + def handleStatus(self, version, status, message): + if status != b"200": + self.factory.badStatus(status, message) + + def handleResponse(self, contents): + """ + Handle the XML-RPC response received from the server. + + Specifically, disconnect from the server and store the XML-RPC + response so that it can be properly handled when the disconnect is + finished. + """ + self.transport.loseConnection() + self._response = contents + + def connectionLost(self, reason): + """ + The connection to the server has been lost. + + If we have a full response from the server, then parse it and fired a + Deferred with the return value or C{Fault} that the server gave us. + """ + if not reason.check(error.ConnectionDone, error.ConnectionLost): + # for example, ssl.SSL.Error + self.factory.clientConnectionLost(None, reason) + http.HTTPClient.connectionLost(self, reason) + if self._response is not None: + response, self._response = self._response, None + self.factory.parseResponse(response) + + +payloadTemplate = """<?xml version="1.0"?> +<methodCall> +<methodName>%s</methodName> +%s +</methodCall> +""" + + +class QueryFactory(protocol.ClientFactory): + """ + XML-RPC Client Factory + + @ivar path: The path portion of the URL to which to post method calls. + @type path: L{bytes} + + @ivar host: The value to use for the Host HTTP header. + @type host: L{bytes} + + @ivar user: The username with which to authenticate with the server + when making calls. + @type user: L{bytes} or L{None} + + @ivar password: The password with which to authenticate with the server + when making calls. + @type password: L{bytes} or L{None} + + @ivar useDateTime: Accept datetime values as datetime.datetime objects. + also passed to the underlying xmlrpclib implementation. Defaults to + C{False}. + @type useDateTime: C{bool} + """ + + deferred = None + protocol = QueryProtocol + + def __init__( + self, + path, + host, + method, + user=None, + password=None, + allowNone=False, + args=(), + canceller=None, + useDateTime=False, + ): + """ + @param method: The name of the method to call. + @type method: C{str} + + @param allowNone: allow the use of None values in parameters. It's + passed to the underlying xmlrpclib implementation. Defaults to + C{False}. + @type allowNone: C{bool} or L{None} + + @param args: the arguments to pass to the method. + @type args: C{tuple} + + @param canceller: A 1-argument callable passed to the deferred as the + canceller callback. + @type canceller: callable or L{None} + """ + self.path, self.host = path, host + self.user, self.password = user, password + self.payload = payloadTemplate % ( + method, + xmlrpclib.dumps(args, allow_none=allowNone), + ) + if isinstance(self.payload, str): + self.payload = self.payload.encode("utf8") + self.deferred = defer.Deferred(canceller) + self.useDateTime = useDateTime + + def parseResponse(self, contents): + if not self.deferred: + return + try: + response = xmlrpclib.loads(contents, use_datetime=self.useDateTime)[0][0] + except BaseException: + deferred, self.deferred = self.deferred, None + deferred.errback(failure.Failure()) + else: + deferred, self.deferred = self.deferred, None + deferred.callback(response) + + def clientConnectionLost(self, _, reason): + if self.deferred is not None: + deferred, self.deferred = self.deferred, None + deferred.errback(reason) + + clientConnectionFailed = clientConnectionLost + + def badStatus(self, status, message): + deferred, self.deferred = self.deferred, None + deferred.errback(ValueError(status, message)) + + +class Proxy: + """ + A Proxy for making remote XML-RPC calls. + + Pass the URL of the remote XML-RPC server to the constructor. + + Use C{proxy.callRemote('foobar', *args)} to call remote method + 'foobar' with *args. + + @ivar user: The username with which to authenticate with the server + when making calls. If specified, overrides any username information + embedded in C{url}. If not specified, a value may be taken from + C{url} if present. + @type user: L{bytes} or L{None} + + @ivar password: The password with which to authenticate with the server + when making calls. If specified, overrides any password information + embedded in C{url}. If not specified, a value may be taken from + C{url} if present. + @type password: L{bytes} or L{None} + + @ivar allowNone: allow the use of None values in parameters. It's + passed to the underlying L{xmlrpclib} implementation. Defaults to + C{False}. + @type allowNone: C{bool} or L{None} + + @ivar useDateTime: Accept datetime values as datetime.datetime objects. + also passed to the underlying L{xmlrpclib} implementation. Defaults to + C{False}. + @type useDateTime: C{bool} + + @ivar connectTimeout: Number of seconds to wait before assuming the + connection has failed. + @type connectTimeout: C{float} + + @ivar _reactor: The reactor used to create connections. + @type _reactor: Object providing L{twisted.internet.interfaces.IReactorTCP} + + @ivar queryFactory: Object returning a factory for XML-RPC protocol. Use + this for testing, or to manipulate the XML-RPC parsing behavior. For + example, you may set this to a custom "debugging" factory object that + reimplements C{parseResponse} in order to log the raw XML-RPC contents + from the server before continuing on with parsing. Another possibility + is to implement your own XML-RPC marshaller here to handle non-standard + XML-RPC traffic. + @type queryFactory: L{twisted.web.xmlrpc.QueryFactory} + """ + + queryFactory = QueryFactory + + def __init__( + self, + url, + user=None, + password=None, + allowNone=False, + useDateTime=False, + connectTimeout=30.0, + reactor=None, + ): + """ + @param url: The URL to which to post method calls. Calls will be made + over SSL if the scheme is HTTPS. If netloc contains username or + password information, these will be used to authenticate, as long as + the C{user} and C{password} arguments are not specified. + @type url: L{bytes} + + """ + if reactor is None: + from twisted.internet import reactor + + scheme, netloc, path, params, query, fragment = urlparse(url) + netlocParts = netloc.split(b"@") + if len(netlocParts) == 2: + userpass = netlocParts.pop(0).split(b":") + self.user = userpass.pop(0) + try: + self.password = userpass.pop(0) + except BaseException: + self.password = None + else: + self.user = self.password = None + hostport = netlocParts[0].split(b":") + self.host = hostport.pop(0) + try: + self.port = int(hostport.pop(0)) + except BaseException: + self.port = None + self.path = path + if self.path in [b"", None]: + self.path = b"/" + self.secure = scheme == b"https" + if user is not None: + self.user = user + if password is not None: + self.password = password + self.allowNone = allowNone + self.useDateTime = useDateTime + self.connectTimeout = connectTimeout + self._reactor = reactor + + def callRemote(self, method, *args): + """ + Call remote XML-RPC C{method} with given arguments. + + @return: a L{defer.Deferred} that will fire with the method response, + or a failure if the method failed. Generally, the failure type will + be L{Fault}, but you can also have an C{IndexError} on some buggy + servers giving empty responses. + + If the deferred is cancelled before the request completes, the + connection is closed and the deferred will fire with a + L{defer.CancelledError}. + """ + + def cancel(d): + factory.deferred = None + connector.disconnect() + + factory = self.queryFactory( + self.path, + self.host, + method, + self.user, + self.password, + self.allowNone, + args, + cancel, + self.useDateTime, + ) + + if self.secure: + from twisted.internet import ssl + + contextFactory = ssl.optionsForClientTLS(hostname=nativeString(self.host)) + connector = self._reactor.connectSSL( + nativeString(self.host), + self.port or 443, + factory, + contextFactory, + timeout=self.connectTimeout, + ) + else: + connector = self._reactor.connectTCP( + nativeString(self.host), + self.port or 80, + factory, + timeout=self.connectTimeout, + ) + return factory.deferred + + +__all__ = [ + "XMLRPC", + "Handler", + "NoSuchFunction", + "Proxy", + "Fault", + "Binary", + "Boolean", + "DateTime", +] |