diff options
author | shmel1k <shmel1k@ydb.tech> | 2023-11-26 18:16:14 +0300 |
---|---|---|
committer | shmel1k <shmel1k@ydb.tech> | 2023-11-26 18:43:30 +0300 |
commit | b8cf9e88f4c5c64d9406af533d8948deb050d695 (patch) | |
tree | 218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/Twisted/py2/twisted/web | |
parent | 523f645a83a0ec97a0332dbc3863bb354c92a328 (diff) | |
download | ydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz |
add kikimr_configure
Diffstat (limited to 'contrib/python/Twisted/py2/twisted/web')
38 files changed, 20727 insertions, 0 deletions
diff --git a/contrib/python/Twisted/py2/twisted/web/__init__.py b/contrib/python/Twisted/py2/twisted/web/__init__.py new file mode 100644 index 0000000000..806dc4a2a4 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/__init__.py @@ -0,0 +1,12 @@ +# -*- test-case-name: twisted.web.test -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Twisted Web: HTTP clients and servers, plus tools for implementing them. + +Contains a L{web server<twisted.web.server>} (including an +L{HTTP implementation<twisted.web.http>}, a +L{resource model<twisted.web.resource>}), and +a L{web client<twisted.web.client>}. +""" diff --git a/contrib/python/Twisted/py2/twisted/web/_auth/__init__.py b/contrib/python/Twisted/py2/twisted/web/_auth/__init__.py new file mode 100644 index 0000000000..6a58870091 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_auth/__init__.py @@ -0,0 +1,7 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP header-based authentication migrated from web2 +""" diff --git a/contrib/python/Twisted/py2/twisted/web/_auth/basic.py b/contrib/python/Twisted/py2/twisted/web/_auth/basic.py new file mode 100644 index 0000000000..d539457190 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_auth/basic.py @@ -0,0 +1,61 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP BASIC authentication. + +@see: U{http://tools.ietf.org/html/rfc1945} +@see: U{http://tools.ietf.org/html/rfc2616} +@see: U{http://tools.ietf.org/html/rfc2617} +""" + +from __future__ import division, absolute_import + +import binascii + +from zope.interface import implementer + +from twisted.cred import credentials, error +from twisted.web.iweb import ICredentialFactory + + +@implementer(ICredentialFactory) +class BasicCredentialFactory(object): + """ + Credential Factory for HTTP Basic Authentication + + @type authenticationRealm: L{bytes} + @ivar authenticationRealm: The HTTP authentication realm which will be issued in + challenges. + """ + + scheme = b'basic' + + def __init__(self, authenticationRealm): + self.authenticationRealm = authenticationRealm + + + def getChallenge(self, request): + """ + Return a challenge including the HTTP authentication realm with which + this factory was created. + """ + return {'realm': self.authenticationRealm} + + + def decode(self, response, request): + """ + Parse the base64-encoded, colon-separated username and password into a + L{credentials.UsernamePassword} instance. + """ + try: + creds = binascii.a2b_base64(response + b'===') + except binascii.Error: + raise error.LoginFailed('Invalid credentials') + + creds = creds.split(b':', 1) + if len(creds) == 2: + return credentials.UsernamePassword(*creds) + else: + raise error.LoginFailed('Invalid credentials') diff --git a/contrib/python/Twisted/py2/twisted/web/_auth/digest.py b/contrib/python/Twisted/py2/twisted/web/_auth/digest.py new file mode 100644 index 0000000000..5346801f6b --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_auth/digest.py @@ -0,0 +1,56 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Implementation of RFC2617: HTTP Digest Authentication + +@see: U{http://www.faqs.org/rfcs/rfc2617.html} +""" + +from __future__ import division, absolute_import + +from zope.interface import implementer +from twisted.cred import credentials +from twisted.web.iweb import ICredentialFactory + +@implementer(ICredentialFactory) +class DigestCredentialFactory(object): + """ + Wrapper for L{digest.DigestCredentialFactory} that implements the + L{ICredentialFactory} interface. + """ + + scheme = b'digest' + + def __init__(self, algorithm, authenticationRealm): + """ + Create the digest credential factory that this object wraps. + """ + self.digest = credentials.DigestCredentialFactory(algorithm, + authenticationRealm) + + + def getChallenge(self, request): + """ + Generate the challenge for use in the WWW-Authenticate header + + @param request: The L{IRequest} to with access was denied and for the + response to which this challenge is being generated. + + @return: The L{dict} that can be used to generate a WWW-Authenticate + header. + """ + return self.digest.getChallenge(request.getClientAddress().host) + + + def decode(self, response, request): + """ + Create a L{twisted.cred.credentials.DigestedCredentials} object + from the given response and request. + + @see: L{ICredentialFactory.decode} + """ + return self.digest.decode(response, + request.method, + request.getClientAddress().host) diff --git a/contrib/python/Twisted/py2/twisted/web/_auth/wrapper.py b/contrib/python/Twisted/py2/twisted/web/_auth/wrapper.py new file mode 100644 index 0000000000..1804b7a416 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_auth/wrapper.py @@ -0,0 +1,236 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A guard implementation which supports HTTP header-based authentication +schemes. + +If no I{Authorization} header is supplied, an anonymous login will be +attempted by using a L{Anonymous} credentials object. If such a header is +supplied and does not contain allowed credentials, or if anonymous login is +denied, a 401 will be sent in the response along with I{WWW-Authenticate} +headers for each of the allowed authentication schemes. +""" + +from __future__ import absolute_import, division + +from twisted.cred import error +from twisted.cred.credentials import Anonymous +from twisted.python.compat import unicode +from twisted.python.components import proxyForInterface +from twisted.web import util +from twisted.web.resource import ErrorPage, IResource +from twisted.logger import Logger + +from zope.interface import implementer + + +@implementer(IResource) +class UnauthorizedResource(object): + """ + Simple IResource to escape Resource dispatch + """ + isLeaf = True + + + def __init__(self, factories): + self._credentialFactories = factories + + + def render(self, request): + """ + Send www-authenticate headers to the client + """ + def ensureBytes(s): + return s.encode('ascii') if isinstance(s, unicode) else s + + def generateWWWAuthenticate(scheme, challenge): + l = [] + for k, v in challenge.items(): + k = ensureBytes(k) + v = ensureBytes(v) + l.append(k + b"=" + quoteString(v)) + return b" ".join([scheme, b", ".join(l)]) + + def quoteString(s): + return b'"' + s.replace(b'\\', b'\\\\').replace(b'"', b'\\"') + b'"' + + request.setResponseCode(401) + for fact in self._credentialFactories: + challenge = fact.getChallenge(request) + request.responseHeaders.addRawHeader( + b'www-authenticate', + generateWWWAuthenticate(fact.scheme, challenge)) + if request.method == b'HEAD': + return b'' + return b'Unauthorized' + + + def getChildWithDefault(self, path, request): + """ + Disable resource dispatch + """ + return self + + + +@implementer(IResource) +class HTTPAuthSessionWrapper(object): + """ + Wrap a portal, enforcing supported header-based authentication schemes. + + @ivar _portal: The L{Portal} which will be used to retrieve L{IResource} + avatars. + + @ivar _credentialFactories: A list of L{ICredentialFactory} providers which + will be used to decode I{Authorization} headers into L{ICredentials} + providers. + """ + isLeaf = False + _log = Logger() + + def __init__(self, portal, credentialFactories): + """ + Initialize a session wrapper + + @type portal: C{Portal} + @param portal: The portal that will authenticate the remote client + + @type credentialFactories: C{Iterable} + @param credentialFactories: The portal that will authenticate the + remote client based on one submitted C{ICredentialFactory} + """ + self._portal = portal + self._credentialFactories = credentialFactories + + + def _authorizedResource(self, request): + """ + Get the L{IResource} which the given request is authorized to receive. + If the proper authorization headers are present, the resource will be + requested from the portal. If not, an anonymous login attempt will be + made. + """ + authheader = request.getHeader(b'authorization') + if not authheader: + return util.DeferredResource(self._login(Anonymous())) + + factory, respString = self._selectParseHeader(authheader) + if factory is None: + return UnauthorizedResource(self._credentialFactories) + try: + credentials = factory.decode(respString, request) + except error.LoginFailed: + return UnauthorizedResource(self._credentialFactories) + except: + self._log.failure("Unexpected failure from credentials factory") + return ErrorPage(500, None, None) + else: + return util.DeferredResource(self._login(credentials)) + + + def render(self, request): + """ + Find the L{IResource} avatar suitable for the given request, if + possible, and render it. Otherwise, perhaps render an error page + requiring authorization or describing an internal server failure. + """ + return self._authorizedResource(request).render(request) + + + def getChildWithDefault(self, path, request): + """ + Inspect the Authorization HTTP header, and return a deferred which, + when fired after successful authentication, will return an authorized + C{Avatar}. On authentication failure, an C{UnauthorizedResource} will + be returned, essentially halting further dispatch on the wrapped + resource and all children + """ + # Don't consume any segments of the request - this class should be + # transparent! + request.postpath.insert(0, request.prepath.pop()) + return self._authorizedResource(request) + + + def _login(self, credentials): + """ + Get the L{IResource} avatar for the given credentials. + + @return: A L{Deferred} which will be called back with an L{IResource} + avatar or which will errback if authentication fails. + """ + d = self._portal.login(credentials, None, IResource) + d.addCallbacks(self._loginSucceeded, self._loginFailed) + return d + + + def _loginSucceeded(self, args): + """ + Handle login success by wrapping the resulting L{IResource} avatar + so that the C{logout} callback will be invoked when rendering is + complete. + """ + interface, avatar, logout = args + class ResourceWrapper(proxyForInterface(IResource, 'resource')): + """ + Wrap an L{IResource} so that whenever it or a child of it + completes rendering, the cred logout hook will be invoked. + + An assumption is made here that exactly one L{IResource} from + among C{avatar} and all of its children will be rendered. If + more than one is rendered, C{logout} will be invoked multiple + times and probably earlier than desired. + """ + def getChildWithDefault(self, name, request): + """ + Pass through the lookup to the wrapped resource, wrapping + the result in L{ResourceWrapper} to ensure C{logout} is + called when rendering of the child is complete. + """ + return ResourceWrapper(self.resource.getChildWithDefault(name, request)) + + def render(self, request): + """ + Hook into response generation so that when rendering has + finished completely (with or without error), C{logout} is + called. + """ + request.notifyFinish().addBoth(lambda ign: logout()) + return super(ResourceWrapper, self).render(request) + + return ResourceWrapper(avatar) + + + def _loginFailed(self, result): + """ + Handle login failure by presenting either another challenge (for + expected authentication/authorization-related failures) or a server + error page (for anything else). + """ + if result.check(error.Unauthorized, error.LoginFailed): + return UnauthorizedResource(self._credentialFactories) + else: + self._log.failure( + "HTTPAuthSessionWrapper.getChildWithDefault encountered " + "unexpected error", + failure=result, + ) + return ErrorPage(500, None, None) + + + def _selectParseHeader(self, header): + """ + Choose an C{ICredentialFactory} from C{_credentialFactories} + suitable to use to decode the given I{Authenticate} header. + + @return: A two-tuple of a factory and the remaining portion of the + header value to be decoded or a two-tuple of L{None} if no + factory can decode the header value. + """ + elements = header.split(b' ') + scheme = elements[0].lower() + for fact in self._credentialFactories: + if fact.scheme == scheme: + return (fact, b' '.join(elements[1:])) + return (None, None) diff --git a/contrib/python/Twisted/py2/twisted/web/_element.py b/contrib/python/Twisted/py2/twisted/web/_element.py new file mode 100644 index 0000000000..5c4b7e99cf --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_element.py @@ -0,0 +1,185 @@ +# -*- test-case-name: twisted.web.test.test_template -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +from __future__ import division, absolute_import + +from zope.interface import implementer + +from twisted.web.iweb import IRenderable +from twisted.web.error import MissingRenderMethod, UnexposedMethodError +from twisted.web.error import MissingTemplateLoader + + +class Expose(object): + """ + Helper for exposing methods for various uses using a simple decorator-style + callable. + + Instances of this class can be called with one or more functions as + positional arguments. The names of these functions will be added to a list + on the class object of which they are methods. + + @ivar attributeName: The attribute with which exposed methods will be + tracked. + """ + def __init__(self, doc=None): + self.doc = doc + + + def __call__(self, *funcObjs): + """ + Add one or more functions to the set of exposed functions. + + This is a way to declare something about a class definition, similar to + L{zope.interface.declarations.implementer}. Use it like this:: + + magic = Expose('perform extra magic') + class Foo(Bar): + def twiddle(self, x, y): + ... + def frob(self, a, b): + ... + magic(twiddle, frob) + + Later you can query the object:: + + aFoo = Foo() + magic.get(aFoo, 'twiddle')(x=1, y=2) + + The call to C{get} will fail if the name it is given has not been + exposed using C{magic}. + + @param funcObjs: One or more function objects which will be exposed to + the client. + + @return: The first of C{funcObjs}. + """ + if not funcObjs: + raise TypeError("expose() takes at least 1 argument (0 given)") + for fObj in funcObjs: + fObj.exposedThrough = getattr(fObj, 'exposedThrough', []) + fObj.exposedThrough.append(self) + return funcObjs[0] + + + _nodefault = object() + def get(self, instance, methodName, default=_nodefault): + """ + Retrieve an exposed method with the given name from the given instance. + + @raise UnexposedMethodError: Raised if C{default} is not specified and + there is no exposed method with the given name. + + @return: A callable object for the named method assigned to the given + instance. + """ + method = getattr(instance, methodName, None) + exposedThrough = getattr(method, 'exposedThrough', []) + if self not in exposedThrough: + if default is self._nodefault: + raise UnexposedMethodError(self, methodName) + return default + return method + + + @classmethod + def _withDocumentation(cls, thunk): + """ + Slight hack to make users of this class appear to have a docstring to + documentation generators, by defining them with a decorator. (This hack + should be removed when epydoc can be convinced to use some other method + for documenting.) + """ + return cls(thunk.__doc__) + + +# Avoid exposing the ugly, private classmethod name in the docs. Luckily this +# namespace is private already so this doesn't leak further. +exposer = Expose._withDocumentation + +@exposer +def renderer(): + """ + Decorate with L{renderer} to use methods as template render directives. + + For example:: + + class Foo(Element): + @renderer + def twiddle(self, request, tag): + return tag('Hello, world.') + + <div xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1"> + <span t:render="twiddle" /> + </div> + + Will result in this final output:: + + <div> + <span>Hello, world.</span> + </div> + """ + + + +@implementer(IRenderable) +class Element(object): + """ + Base for classes which can render part of a page. + + An Element is a renderer that can be embedded in a stan document and can + hook its template (from the loader) up to render methods. + + An Element might be used to encapsulate the rendering of a complex piece of + data which is to be displayed in multiple different contexts. The Element + allows the rendering logic to be easily re-used in different ways. + + Element returns render methods which are registered using + L{twisted.web._element.renderer}. For example:: + + class Menu(Element): + @renderer + def items(self, request, tag): + .... + + Render methods are invoked with two arguments: first, the + L{twisted.web.http.Request} being served and second, the tag object which + "invoked" the render method. + + @type loader: L{ITemplateLoader} provider + @ivar loader: The factory which will be used to load documents to + return from C{render}. + """ + loader = None + + def __init__(self, loader=None): + if loader is not None: + self.loader = loader + + + def lookupRenderMethod(self, name): + """ + Look up and return the named render method. + """ + method = renderer.get(self, name, None) + if method is None: + raise MissingRenderMethod(self, name) + return method + + + def render(self, request): + """ + Implement L{IRenderable} to allow one L{Element} to be embedded in + another's template or rendering output. + + (This will simply load the template from the C{loader}; when used in a + template, the flattening engine will keep track of this object + separately as the object to lookup renderers on and call + L{Element.renderer} to look them up. The resulting object from this + method is not directly associated with this L{Element}.) + """ + loader = self.loader + if loader is None: + raise MissingTemplateLoader(self) + return loader.load() diff --git a/contrib/python/Twisted/py2/twisted/web/_flatten.py b/contrib/python/Twisted/py2/twisted/web/_flatten.py new file mode 100644 index 0000000000..89a657dbeb --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_flatten.py @@ -0,0 +1,421 @@ +# -*- test-case-name: twisted.web.test.test_flatten -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Context-free flattener/serializer for rendering Python objects, possibly +complex or arbitrarily nested, as strings. +""" + +from __future__ import division, absolute_import + +from io import BytesIO + +from sys import exc_info +from types import GeneratorType +from traceback import extract_tb + +try: + from inspect import iscoroutine +except ImportError: + def iscoroutine(*args, **kwargs): + return False + +from twisted.python.compat import unicode, nativeString, iteritems +from twisted.internet.defer import Deferred, ensureDeferred +from twisted.web._stan import Tag, slot, voidElements, Comment, CDATA, CharRef +from twisted.web.error import UnfilledSlot, UnsupportedType, FlattenerError +from twisted.web.iweb import IRenderable + + + +def escapeForContent(data): + """ + Escape some character or UTF-8 byte data for inclusion in an HTML or XML + document, by replacing metacharacters (C{&<>}) with their entity + equivalents (C{&<>}). + + This is used as an input to L{_flattenElement}'s C{dataEscaper} parameter. + + @type data: C{bytes} or C{unicode} + @param data: The string to escape. + + @rtype: C{bytes} + @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8 + encoded string. + """ + if isinstance(data, unicode): + data = data.encode('utf-8') + data = data.replace(b'&', b'&' + ).replace(b'<', b'<' + ).replace(b'>', b'>') + return data + + + +def attributeEscapingDoneOutside(data): + """ + Escape some character or UTF-8 byte data for inclusion in the top level of + an attribute. L{attributeEscapingDoneOutside} actually passes the data + through unchanged, because L{writeWithAttributeEscaping} handles the + quoting of the text within attributes outside the generator returned by + L{_flattenElement}; this is used as the C{dataEscaper} argument to that + L{_flattenElement} call so that that generator does not redundantly escape + its text output. + + @type data: C{bytes} or C{unicode} + @param data: The string to escape. + + @return: The string, unchanged, except for encoding. + @rtype: C{bytes} + """ + if isinstance(data, unicode): + return data.encode("utf-8") + return data + + + +def writeWithAttributeEscaping(write): + """ + Decorate a C{write} callable so that all output written is properly quoted + for inclusion within an XML attribute value. + + If a L{Tag <twisted.web.template.Tag>} C{x} is flattened within the context + of the contents of another L{Tag <twisted.web.template.Tag>} C{y}, the + metacharacters (C{<>&"}) delimiting C{x} should be passed through + unchanged, but the textual content of C{x} should still be quoted, as + usual. For example: C{<y><x>&</x></y>}. That is the default behavior + of L{_flattenElement} when L{escapeForContent} is passed as the + C{dataEscaper}. + + However, when a L{Tag <twisted.web.template.Tag>} C{x} is flattened within + the context of an I{attribute} of another L{Tag <twisted.web.template.Tag>} + C{y}, then the metacharacters delimiting C{x} should be quoted so that it + can be parsed from the attribute's value. In the DOM itself, this is not a + valid thing to do, but given that renderers and slots may be freely moved + around in a L{twisted.web.template} template, it is a condition which may + arise in a document and must be handled in a way which produces valid + output. So, for example, you should be able to get C{<y attr="<x />" + />}. This should also be true for other XML/HTML meta-constructs such as + comments and CDATA, so if you were to serialize a L{comment + <twisted.web.template.Comment>} in an attribute you should get C{<y + attr="<-- comment -->" />}. Therefore in order to capture these + meta-characters, flattening is done with C{write} callable that is wrapped + with L{writeWithAttributeEscaping}. + + The final case, and hopefully the much more common one as compared to + serializing L{Tag <twisted.web.template.Tag>} and arbitrary L{IRenderable} + objects within an attribute, is to serialize a simple string, and those + should be passed through for L{writeWithAttributeEscaping} to quote + without applying a second, redundant level of quoting. + + @param write: A callable which will be invoked with the escaped L{bytes}. + + @return: A callable that writes data with escaping. + """ + def _write(data): + write(escapeForContent(data).replace(b'"', b'"')) + return _write + + + +def escapedCDATA(data): + """ + Escape CDATA for inclusion in a document. + + @type data: L{str} or L{unicode} + @param data: The string to escape. + + @rtype: L{str} + @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8 + encoded string. + """ + if isinstance(data, unicode): + data = data.encode('utf-8') + return data.replace(b']]>', b']]]]><![CDATA[>') + + + +def escapedComment(data): + """ + Escape a comment for inclusion in a document. + + @type data: L{str} or L{unicode} + @param data: The string to escape. + + @rtype: C{str} + @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8 + encoded string. + """ + if isinstance(data, unicode): + data = data.encode('utf-8') + data = data.replace(b'--', b'- - ').replace(b'>', b'>') + if data and data[-1:] == b'-': + data += b' ' + return data + + + +def _getSlotValue(name, slotData, default=None): + """ + Find the value of the named slot in the given stack of slot data. + """ + for slotFrame in slotData[::-1]: + if slotFrame is not None and name in slotFrame: + return slotFrame[name] + else: + if default is not None: + return default + raise UnfilledSlot(name) + + + +def _flattenElement(request, root, write, slotData, renderFactory, + dataEscaper): + """ + Make C{root} slightly more flat by yielding all its immediate contents as + strings, deferreds or generators that are recursive calls to itself. + + @param request: A request object which will be passed to + L{IRenderable.render}. + + @param root: An object to be made flatter. This may be of type C{unicode}, + L{str}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, L{list}, + L{types.GeneratorType}, L{Deferred}, or an object that implements + L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @param slotData: A L{list} of L{dict} mapping L{str} slot names to data + with which those slots will be replaced. + + @param renderFactory: If not L{None}, an object that provides + L{IRenderable}. + + @param dataEscaper: A 1-argument callable which takes L{bytes} or + L{unicode} and returns L{bytes}, quoted as appropriate for the + rendering context. This is really only one of two values: + L{attributeEscapingDoneOutside} or L{escapeForContent}, depending on + whether the rendering context is within an attribute or not. See the + explanation in L{writeWithAttributeEscaping}. + + @return: An iterator that eventually yields L{bytes} that should be written + to the output. However it may also yield other iterators or + L{Deferred}s; if it yields another iterator, the caller will iterate + it; if it yields a L{Deferred}, the result of that L{Deferred} will + either be L{bytes}, in which case it's written, or another generator, + in which case it is iterated. See L{_flattenTree} for the trampoline + that consumes said values. + @rtype: An iterator which yields L{bytes}, L{Deferred}, and more iterators + of the same type. + """ + def keepGoing(newRoot, dataEscaper=dataEscaper, + renderFactory=renderFactory, write=write): + return _flattenElement(request, newRoot, write, slotData, + renderFactory, dataEscaper) + if isinstance(root, (bytes, unicode)): + write(dataEscaper(root)) + elif isinstance(root, slot): + slotValue = _getSlotValue(root.name, slotData, root.default) + yield keepGoing(slotValue) + elif isinstance(root, CDATA): + write(b'<![CDATA[') + write(escapedCDATA(root.data)) + write(b']]>') + elif isinstance(root, Comment): + write(b'<!--') + write(escapedComment(root.data)) + write(b'-->') + elif isinstance(root, Tag): + slotData.append(root.slotData) + if root.render is not None: + rendererName = root.render + rootClone = root.clone(False) + rootClone.render = None + renderMethod = renderFactory.lookupRenderMethod(rendererName) + result = renderMethod(request, rootClone) + yield keepGoing(result) + slotData.pop() + return + + if not root.tagName: + yield keepGoing(root.children) + return + + write(b'<') + if isinstance(root.tagName, unicode): + tagName = root.tagName.encode('ascii') + else: + tagName = root.tagName + write(tagName) + for k, v in iteritems(root.attributes): + if isinstance(k, unicode): + k = k.encode('ascii') + write(b' ' + k + b'="') + # Serialize the contents of the attribute, wrapping the results of + # that serialization so that _everything_ is quoted. + yield keepGoing( + v, + attributeEscapingDoneOutside, + write=writeWithAttributeEscaping(write)) + write(b'"') + if root.children or nativeString(tagName) not in voidElements: + write(b'>') + # Regardless of whether we're in an attribute or not, switch back + # to the escapeForContent dataEscaper. The contents of a tag must + # be quoted no matter what; in the top-level document, just so + # they're valid, and if they're within an attribute, they have to + # be quoted so that after applying the *un*-quoting required to re- + # parse the tag within the attribute, all the quoting is still + # correct. + yield keepGoing(root.children, escapeForContent) + write(b'</' + tagName + b'>') + else: + write(b' />') + + elif isinstance(root, (tuple, list, GeneratorType)): + for element in root: + yield keepGoing(element) + elif isinstance(root, CharRef): + escaped = '&#%d;' % (root.ordinal,) + write(escaped.encode('ascii')) + elif isinstance(root, Deferred): + yield root.addCallback(lambda result: (result, keepGoing(result))) + elif iscoroutine(root): + d = ensureDeferred(root) + yield d.addCallback(lambda result: (result, keepGoing(result))) + elif IRenderable.providedBy(root): + result = root.render(request) + yield keepGoing(result, renderFactory=root) + else: + raise UnsupportedType(root) + + + +def _flattenTree(request, root, write): + """ + Make C{root} into an iterable of L{bytes} and L{Deferred} by doing a depth + first traversal of the tree. + + @param request: A request object which will be passed to + L{IRenderable.render}. + + @param root: An object to be made flatter. This may be of type C{unicode}, + L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, + L{list}, L{types.GeneratorType}, L{Deferred}, or something providing + L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @return: An iterator which yields objects of type L{bytes} and L{Deferred}. + A L{Deferred} is only yielded when one is encountered in the process of + flattening C{root}. The returned iterator must not be iterated again + until the L{Deferred} is called back. + """ + stack = [_flattenElement(request, root, write, [], None, escapeForContent)] + while stack: + try: + frame = stack[-1].gi_frame + element = next(stack[-1]) + except StopIteration: + stack.pop() + except Exception as e: + stack.pop() + roots = [] + for generator in stack: + roots.append(generator.gi_frame.f_locals['root']) + roots.append(frame.f_locals['root']) + raise FlattenerError(e, roots, extract_tb(exc_info()[2])) + else: + if isinstance(element, Deferred): + def cbx(originalAndToFlatten): + original, toFlatten = originalAndToFlatten + stack.append(toFlatten) + return original + yield element.addCallback(cbx) + else: + stack.append(element) + + +def _writeFlattenedData(state, write, result): + """ + Take strings from an iterator and pass them to a writer function. + + @param state: An iterator of L{str} and L{Deferred}. L{str} instances will + be passed to C{write}. L{Deferred} instances will be waited on before + resuming iteration of C{state}. + + @param write: A callable which will be invoked with each L{str} + produced by iterating C{state}. + + @param result: A L{Deferred} which will be called back when C{state} has + been completely flattened into C{write} or which will be errbacked if + an exception in a generator passed to C{state} or an errback from a + L{Deferred} from state occurs. + + @return: L{None} + """ + while True: + try: + element = next(state) + except StopIteration: + result.callback(None) + except: + result.errback() + else: + def cby(original): + _writeFlattenedData(state, write, result) + return original + element.addCallbacks(cby, result.errback) + break + + + +def flatten(request, root, write): + """ + Incrementally write out a string representation of C{root} using C{write}. + + In order to create a string representation, C{root} will be decomposed into + simpler objects which will themselves be decomposed and so on until strings + or objects which can easily be converted to strings are encountered. + + @param request: A request object which will be passed to the C{render} + method of any L{IRenderable} provider which is encountered. + + @param root: An object to be made flatter. This may be of type L{unicode}, + L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, + L{list}, L{types.GeneratorType}, L{Deferred}, or something that provides + L{IRenderable}. + + @param write: A callable which will be invoked with each L{bytes} produced + by flattening C{root}. + + @return: A L{Deferred} which will be called back when C{root} has been + completely flattened into C{write} or which will be errbacked if an + unexpected exception occurs. + """ + result = Deferred() + state = _flattenTree(request, root, write) + _writeFlattenedData(state, write, result) + return result + + + +def flattenString(request, root): + """ + Collate a string representation of C{root} into a single string. + + This is basically gluing L{flatten} to an L{io.BytesIO} and returning + the results. See L{flatten} for the exact meanings of C{request} and + C{root}. + + @return: A L{Deferred} which will be called back with a single string as + its result when C{root} has been completely flattened into C{write} or + which will be errbacked if an unexpected exception occurs. + """ + io = BytesIO() + d = flatten(request, root, io.write) + d.addCallback(lambda _: io.getvalue()) + return d diff --git a/contrib/python/Twisted/py2/twisted/web/_http2.py b/contrib/python/Twisted/py2/twisted/web/_http2.py new file mode 100644 index 0000000000..fdaef00782 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_http2.py @@ -0,0 +1,1356 @@ +# -*- test-case-name: twisted.web.test.test_http2 -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP2 Implementation + +This is the basic server-side protocol implementation used by the Twisted +Web server for HTTP2. This functionality is intended to be combined with the +HTTP/1.1 and HTTP/1.0 functionality in twisted.web.http to provide complete +protocol support for HTTP-type protocols. + +This API is currently considered private because it's in early draft form. When +it has stabilised, it'll be made public. +""" + +from __future__ import absolute_import, division + +import io +import warnings +import sys + +from collections import deque + +from zope.interface import implementer + +import priority +import h2.config +import h2.connection +import h2.errors +import h2.events +import h2.exceptions + +from twisted.internet.defer import Deferred +from twisted.internet.error import ConnectionLost +from twisted.internet.interfaces import ( + IProtocol, ITransport, IConsumer, IPushProducer, ISSLTransport +) +from twisted.internet._producer_helpers import _PullToPush +from twisted.internet.protocol import Protocol +from twisted.logger import Logger +from twisted.protocols.policies import TimeoutMixin +from twisted.python.failure import Failure +from twisted.web.error import ExcessiveBufferingError + + +# This API is currently considered private. +__all__ = [] + + +_END_STREAM_SENTINEL = object() + + +# Python versions 2.7.3 and older don't have a memoryview object that plays +# well with the struct module, which h2 needs. On those versions, just refuse +# to import. +if sys.version_info < (2, 7, 4): + warnings.warn( + "HTTP/2 cannot be enabled because this version of Python is too " + "old, and does not fully support memoryview objects.", + UserWarning, + stacklevel=2, + ) + raise ImportError("HTTP/2 not supported on this Python version.") + + + +@implementer(IProtocol, IPushProducer) +class H2Connection(Protocol, TimeoutMixin): + """ + A class representing a single HTTP/2 connection. + + This implementation of L{IProtocol} works hand in hand with L{H2Stream}. + This is because we have the requirement to register multiple producers for + a single HTTP/2 connection, one for each stream. The standard Twisted + interfaces don't really allow for this, so instead there's a custom + interface between the two objects that allows them to work hand-in-hand here. + + @ivar conn: The HTTP/2 connection state machine. + @type conn: L{h2.connection.H2Connection} + + @ivar streams: A mapping of stream IDs to L{H2Stream} objects, used to call + specific methods on streams when events occur. + @type streams: L{dict}, mapping L{int} stream IDs to L{H2Stream} objects. + + @ivar priority: A HTTP/2 priority tree used to ensure that responses are + prioritised appropriately. + @type priority: L{priority.PriorityTree} + + @ivar _consumerBlocked: A flag tracking whether or not the L{IConsumer} + that is consuming this data has asked us to stop producing. + @type _consumerBlocked: L{bool} + + @ivar _sendingDeferred: A L{Deferred} used to restart the data-sending loop + when more response data has been produced. Will not be present if there + is outstanding data still to send. + @type _consumerBlocked: A L{twisted.internet.defer.Deferred}, or L{None} + + @ivar _outboundStreamQueues: A map of stream IDs to queues, used to store + data blocks that are yet to be sent on the connection. These are used + both to handle producers that do not respect L{IConsumer} but also to + allow priority to multiplex data appropriately. + @type _outboundStreamQueues: A L{dict} mapping L{int} stream IDs to + L{collections.deque} queues, which contain either L{bytes} objects or + C{_END_STREAM_SENTINEL}. + + @ivar _sender: A handle to the data-sending loop, allowing it to be + terminated if needed. + @type _sender: L{twisted.internet.task.LoopingCall} + + @ivar abortTimeout: The number of seconds to wait after we attempt to shut + the transport down cleanly to give up and forcibly terminate it. This + is only used when we time a connection out, to prevent errors causing + the FD to get leaked. If this is L{None}, we will wait forever. + @type abortTimeout: L{int} + + @ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be + used to forcibly close the transport if it doesn't close cleanly. + @type _abortingCall: L{twisted.internet.base.DelayedCall} + """ + factory = None + site = None + abortTimeout = 15 + + _log = Logger() + _abortingCall = None + + def __init__(self, reactor=None): + config = h2.config.H2Configuration( + client_side=False, header_encoding=None + ) + self.conn = h2.connection.H2Connection(config=config) + self.streams = {} + + self.priority = priority.PriorityTree() + self._consumerBlocked = None + self._sendingDeferred = None + self._outboundStreamQueues = {} + self._streamCleanupCallbacks = {} + self._stillProducing = True + + # Limit the number of buffered control frame (e.g. PING and + # SETTINGS) bytes. + self._maxBufferedControlFrameBytes = 1024 * 17 + self._bufferedControlFrames = deque() + self._bufferedControlFrameBytes = 0 + + if reactor is None: + from twisted.internet import reactor + self._reactor = reactor + + # Start the data sending function. + self._reactor.callLater(0, self._sendPrioritisedData) + + + # Implementation of IProtocol + def connectionMade(self): + """ + Called by the reactor when a connection is received. May also be called + by the L{twisted.web.http._GenericHTTPChannelProtocol} during upgrade + to HTTP/2. + """ + self.setTimeout(self.timeOut) + self.conn.initiate_connection() + self.transport.write(self.conn.data_to_send()) + + + def dataReceived(self, data): + """ + Called whenever a chunk of data is received from the transport. + + @param data: The data received from the transport. + @type data: L{bytes} + """ + try: + events = self.conn.receive_data(data) + except h2.exceptions.ProtocolError: + stillActive = self._tryToWriteControlData() + if stillActive: + self.transport.loseConnection() + self.connectionLost(Failure(), _cancelTimeouts=False) + return + + # Only reset the timeout if we've received an actual H2 + # protocol message + self.resetTimeout() + + for event in events: + if isinstance(event, h2.events.RequestReceived): + self._requestReceived(event) + elif isinstance(event, h2.events.DataReceived): + self._requestDataReceived(event) + elif isinstance(event, h2.events.StreamEnded): + self._requestEnded(event) + elif isinstance(event, h2.events.StreamReset): + self._requestAborted(event) + elif isinstance(event, h2.events.WindowUpdated): + self._handleWindowUpdate(event) + elif isinstance(event, h2.events.PriorityUpdated): + self._handlePriorityUpdate(event) + elif isinstance(event, h2.events.ConnectionTerminated): + self.transport.loseConnection() + self.connectionLost( + ConnectionLost("Remote peer sent GOAWAY"), + _cancelTimeouts=False, + ) + + self._tryToWriteControlData() + + + def timeoutConnection(self): + """ + Called when the connection has been inactive for + L{self.timeOut<twisted.protocols.policies.TimeoutMixin.timeOut>} + seconds. Cleanly tears the connection down, attempting to notify the + peer if needed. + + We override this method to add two extra bits of functionality: + + - We want to log the timeout. + - We want to send a GOAWAY frame indicating that the connection is + being terminated, and whether it was clean or not. We have to do this + before the connection is torn down. + """ + self._log.info( + "Timing out client {client}", client=self.transport.getPeer() + ) + + # Check whether there are open streams. If there are, we're going to + # want to use the error code PROTOCOL_ERROR. If there aren't, use + # NO_ERROR. + if (self.conn.open_outbound_streams > 0 or + self.conn.open_inbound_streams > 0): + error_code = h2.errors.ErrorCodes.PROTOCOL_ERROR + else: + error_code = h2.errors.ErrorCodes.NO_ERROR + + self.conn.close_connection(error_code=error_code) + self.transport.write(self.conn.data_to_send()) + + # Don't let the client hold this connection open too long. + if self.abortTimeout is not None: + # We use self.callLater because that's what TimeoutMixin does, even + # though we have a perfectly good reactor sitting around. See + # https://twistedmatrix.com/trac/ticket/8488. + self._abortingCall = self.callLater( + self.abortTimeout, self.forceAbortClient + ) + + # We're done, throw the connection away. + self.transport.loseConnection() + + + def forceAbortClient(self): + """ + Called if C{abortTimeout} seconds have passed since the timeout fired, + and the connection still hasn't gone away. This can really only happen + on extremely bad connections or when clients are maliciously attempting + to keep connections open. + """ + self._log.info( + "Forcibly timing out client: {client}", + client=self.transport.getPeer() + ) + # We want to lose track of the _abortingCall so that no-one tries to + # cancel it. + self._abortingCall = None + self.transport.abortConnection() + + + def connectionLost(self, reason, _cancelTimeouts=True): + """ + Called when the transport connection is lost. + + Informs all outstanding response handlers that the connection + has been lost, and cleans up all internal state. + + @param reason: See L{IProtocol.connectionLost} + + @param _cancelTimeouts: Propagate the C{reason} to this + connection's streams but don't cancel any timers, so that + peers who never read the data we've written are eventually + timed out. + """ + self._stillProducing = False + if _cancelTimeouts: + self.setTimeout(None) + + for stream in self.streams.values(): + stream.connectionLost(reason) + + for streamID in list(self.streams.keys()): + self._requestDone(streamID) + + # If we were going to force-close the transport, we don't have to now. + if _cancelTimeouts and self._abortingCall is not None: + self._abortingCall.cancel() + self._abortingCall = None + + + # Implementation of IPushProducer + # + # Here's how we handle IPushProducer. We have multiple outstanding + # H2Streams. Each of these exposes an IConsumer interface to the response + # handler that allows it to push data into the H2Stream. The H2Stream then + # writes the data into the H2Connection object. + # + # The H2Connection needs to manage these writes to account for: + # + # - flow control + # - priority + # + # We manage each of these in different ways. + # + # For flow control, we simply use the equivalent of the IPushProducer + # interface. We simply tell the H2Stream: "Hey, you can't send any data + # right now, sorry!". When that stream becomes unblocked, we free it up + # again. This allows the H2Stream to propagate this backpressure up the + # chain. + # + # For priority, we need to keep a backlog of data frames that we can send, + # and interleave them appropriately. This backlog is most sensibly kept in + # the H2Connection object itself. We keep one queue per stream, which is + # where the writes go, and then we have a loop that manages popping these + # streams off in priority order. + # + # Logically then, we go as follows: + # + # 1. Stream calls writeDataToStream(). This causes a DataFrame to be placed + # on the queue for that stream. It also informs the priority + # implementation that this stream is unblocked. + # 2. The _sendPrioritisedData() function spins in a tight loop. Each + # iteration it asks the priority implementation which stream should send + # next, and pops a data frame off that stream's queue. If, after sending + # that frame, there is no data left on that stream's queue, the function + # informs the priority implementation that the stream is blocked. + # + # If all streams are blocked, or if there are no outstanding streams, the + # _sendPrioritisedData function waits to be awoken when more data is ready + # to send. + # + # Note that all of this only applies to *data*. Headers and other control + # frames deliberately skip this processing as they are not subject to flow + # control or priority constraints. Instead, they are stored in their own buffer + # which is used primarily to detect excessive buffering. + def stopProducing(self): + """ + Stop producing data. + + This tells the L{H2Connection} that its consumer has died, so it must + stop producing data for good. + """ + self.connectionLost(ConnectionLost("Producing stopped")) + + + def pauseProducing(self): + """ + Pause producing data. + + Tells the L{H2Connection} that it has produced too much data to process + for the time being, and to stop until resumeProducing() is called. + """ + self._consumerBlocked = Deferred() + # Ensure pending control data (if any) are sent first. + self._consumerBlocked.addCallback(self._flushBufferedControlData) + + + def resumeProducing(self): + """ + Resume producing data. + + This tells the L{H2Connection} to re-add itself to the main loop and + produce more data for the consumer. + """ + if self._consumerBlocked is not None: + d = self._consumerBlocked + self._consumerBlocked = None + d.callback(None) + + + def _sendPrioritisedData(self, *args): + """ + The data sending loop. This function repeatedly calls itself, either + from L{Deferred}s or from + L{reactor.callLater<twisted.internet.interfaces.IReactorTime.callLater>} + + This function sends data on streams according to the rules of HTTP/2 + priority. It ensures that the data from each stream is interleved + according to the priority signalled by the client, making sure that the + connection is used with maximal efficiency. + + This function will execute if data is available: if all data is + exhausted, the function will place a deferred onto the L{H2Connection} + object and wait until it is called to resume executing. + """ + # If producing has stopped, we're done. Don't reschedule ourselves + if not self._stillProducing: + return + + stream = None + + while stream is None: + try: + stream = next(self.priority) + except priority.DeadlockError: + # All streams are currently blocked or not progressing. Wait + # until a new one becomes available. + assert self._sendingDeferred is None + self._sendingDeferred = Deferred() + self._sendingDeferred.addCallback(self._sendPrioritisedData) + return + + # Wait behind the transport. + if self._consumerBlocked is not None: + self._consumerBlocked.addCallback(self._sendPrioritisedData) + return + + self.resetTimeout() + + remainingWindow = self.conn.local_flow_control_window(stream) + frameData = self._outboundStreamQueues[stream].popleft() + maxFrameSize = min(self.conn.max_outbound_frame_size, remainingWindow) + + if frameData is _END_STREAM_SENTINEL: + # There's no error handling here even though this can throw + # ProtocolError because we really shouldn't encounter this problem. + # If we do, that's a nasty bug. + self.conn.end_stream(stream) + self.transport.write(self.conn.data_to_send()) + + # Clean up the stream + self._requestDone(stream) + else: + # Respect the max frame size. + if len(frameData) > maxFrameSize: + excessData = frameData[maxFrameSize:] + frameData = frameData[:maxFrameSize] + self._outboundStreamQueues[stream].appendleft(excessData) + + # There's deliberately no error handling here, because this just + # absolutely should not happen. + # If for whatever reason the max frame length is zero and so we + # have no frame data to send, don't send any. + if frameData: + self.conn.send_data(stream, frameData) + self.transport.write(self.conn.data_to_send()) + + # If there's no data left, this stream is now blocked. + if not self._outboundStreamQueues[stream]: + self.priority.block(stream) + + # Also, if the stream's flow control window is exhausted, tell it + # to stop. + if self.remainingOutboundWindow(stream) <= 0: + self.streams[stream].flowControlBlocked() + + self._reactor.callLater(0, self._sendPrioritisedData) + + + # Internal functions. + def _requestReceived(self, event): + """ + Internal handler for when a request has been received. + + @param event: The Hyper-h2 event that encodes information about the + received request. + @type event: L{h2.events.RequestReceived} + """ + stream = H2Stream( + event.stream_id, + self, event.headers, + self.requestFactory, + self.site, + self.factory + ) + self.streams[event.stream_id] = stream + self._streamCleanupCallbacks[event.stream_id] = Deferred() + self._outboundStreamQueues[event.stream_id] = deque() + + # Add the stream to the priority tree but immediately block it. + try: + self.priority.insert_stream(event.stream_id) + except priority.DuplicateStreamError: + # Stream already in the tree. This can happen if we received a + # PRIORITY frame before a HEADERS frame. Just move on: we set the + # stream up properly in _handlePriorityUpdate. + pass + else: + self.priority.block(event.stream_id) + + + def _requestDataReceived(self, event): + """ + Internal handler for when a chunk of data is received for a given + request. + + @param event: The Hyper-h2 event that encodes information about the + received data. + @type event: L{h2.events.DataReceived} + """ + stream = self.streams[event.stream_id] + stream.receiveDataChunk(event.data, event.flow_controlled_length) + + + def _requestEnded(self, event): + """ + Internal handler for when a request is complete, and we expect no + further data for that request. + + @param event: The Hyper-h2 event that encodes information about the + completed stream. + @type event: L{h2.events.StreamEnded} + """ + stream = self.streams[event.stream_id] + stream.requestComplete() + + + def _requestAborted(self, event): + """ + Internal handler for when a request is aborted by a remote peer. + + @param event: The Hyper-h2 event that encodes information about the + reset stream. + @type event: L{h2.events.StreamReset} + """ + stream = self.streams[event.stream_id] + stream.connectionLost( + ConnectionLost("Stream reset with code %s" % event.error_code) + ) + self._requestDone(event.stream_id) + + + def _handlePriorityUpdate(self, event): + """ + Internal handler for when a stream priority is updated. + + @param event: The Hyper-h2 event that encodes information about the + stream reprioritization. + @type event: L{h2.events.PriorityUpdated} + """ + try: + self.priority.reprioritize( + stream_id=event.stream_id, + depends_on=event.depends_on or None, + weight=event.weight, + exclusive=event.exclusive, + ) + except priority.MissingStreamError: + # A PRIORITY frame arrived before the HEADERS frame that would + # trigger us to insert the stream into the tree. That's fine: we + # can create the stream here and mark it as blocked. + self.priority.insert_stream( + stream_id=event.stream_id, + depends_on=event.depends_on or None, + weight=event.weight, + exclusive=event.exclusive, + ) + self.priority.block(event.stream_id) + + + def writeHeaders(self, version, code, reason, headers, streamID): + """ + Called by L{twisted.web.http.Request} objects to write a complete set + of HTTP headers to a stream. + + @param version: The HTTP version in use. Unused in HTTP/2. + @type version: L{bytes} + + @param code: The HTTP status code to write. + @type code: L{bytes} + + @param reason: The HTTP reason phrase to write. Unused in HTTP/2. + @type reason: L{bytes} + + @param headers: The headers to write to the stream. + @type headers: L{twisted.web.http_headers.Headers} + + @param streamID: The ID of the stream to write the headers to. + @type streamID: L{int} + """ + headers.insert(0, (b':status', code)) + + try: + self.conn.send_headers(streamID, headers) + except h2.exceptions.StreamClosedError: + # Stream was closed by the client at some point. We need to not + # explode here: just swallow the error. That's what write() does + # when a connection is lost, so that's what we do too. + return + else: + self._tryToWriteControlData() + + + def writeDataToStream(self, streamID, data): + """ + May be called by L{H2Stream} objects to write response data to a given + stream. Writes a single data frame. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + + @param data: The data chunk to write to the stream. + @type data: L{bytes} + """ + self._outboundStreamQueues[streamID].append(data) + + # There's obviously no point unblocking this stream and the sending + # loop if the data can't actually be sent, so confirm that there's + # some room to send data. + if self.conn.local_flow_control_window(streamID) > 0: + self.priority.unblock(streamID) + if self._sendingDeferred is not None: + d = self._sendingDeferred + self._sendingDeferred = None + d.callback(streamID) + + if self.remainingOutboundWindow(streamID) <= 0: + self.streams[streamID].flowControlBlocked() + + + def endRequest(self, streamID): + """ + Called by L{H2Stream} objects to signal completion of a response. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + """ + self._outboundStreamQueues[streamID].append(_END_STREAM_SENTINEL) + self.priority.unblock(streamID) + if self._sendingDeferred is not None: + d = self._sendingDeferred + self._sendingDeferred = None + d.callback(streamID) + + + def abortRequest(self, streamID): + """ + Called by L{H2Stream} objects to request early termination of a stream. + This emits a RstStream frame and then removes all stream state. + + @param streamID: The ID of the stream to write the data to. + @type streamID: L{int} + """ + self.conn.reset_stream(streamID) + stillActive = self._tryToWriteControlData() + if stillActive: + self._requestDone(streamID) + + + def _requestDone(self, streamID): + """ + Called internally by the data sending loop to clean up state that was + being used for the stream. Called when the stream is complete. + + @param streamID: The ID of the stream to clean up state for. + @type streamID: L{int} + """ + del self._outboundStreamQueues[streamID] + self.priority.remove_stream(streamID) + del self.streams[streamID] + cleanupCallback = self._streamCleanupCallbacks.pop(streamID) + cleanupCallback.callback(streamID) + + + def remainingOutboundWindow(self, streamID): + """ + Called to determine how much room is left in the send window for a + given stream. Allows us to handle blocking and unblocking producers. + + @param streamID: The ID of the stream whose flow control window we'll + check. + @type streamID: L{int} + + @return: The amount of room remaining in the send window for the given + stream, including the data queued to be sent. + @rtype: L{int} + """ + # TODO: This involves a fair bit of looping and computation for + # something that is called a lot. Consider caching values somewhere. + windowSize = self.conn.local_flow_control_window(streamID) + sendQueue = self._outboundStreamQueues[streamID] + alreadyConsumed = sum( + len(chunk) for chunk in sendQueue + if chunk is not _END_STREAM_SENTINEL + ) + + return windowSize - alreadyConsumed + + + def _handleWindowUpdate(self, event): + """ + Manage flow control windows. + + Streams that are blocked on flow control will register themselves with + the connection. This will fire deferreds that wake those streams up and + allow them to continue processing. + + @param event: The Hyper-h2 event that encodes information about the + flow control window change. + @type event: L{h2.events.WindowUpdated} + """ + streamID = event.stream_id + + if streamID: + if not self._streamIsActive(streamID): + # We may have already cleaned up our stream state, making this + # a late WINDOW_UPDATE frame. That's fine: the update is + # unnecessary but benign. We'll ignore it. + return + + # If we haven't got any data to send, don't unblock the stream. If + # we do, we'll eventually get an exception inside the + # _sendPrioritisedData loop some time later. + if self._outboundStreamQueues.get(streamID): + self.priority.unblock(streamID) + self.streams[streamID].windowUpdated() + else: + # Update strictly applies to all streams. + for stream in self.streams.values(): + stream.windowUpdated() + + # If we still have data to send for this stream, unblock it. + if self._outboundStreamQueues.get(stream.streamID): + self.priority.unblock(stream.streamID) + + + def getPeer(self): + """ + Get the remote address of this connection. + + Treat this method with caution. It is the unfortunate result of the + CGI and Jabber standards, but should not be considered reliable for + the usual host of reasons; port forwarding, proxying, firewalls, IP + masquerading, etc. + + @return: An L{IAddress} provider. + """ + return self.transport.getPeer() + + + def getHost(self): + """ + Similar to getPeer, but returns an address describing this side of the + connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getHost() + + + def openStreamWindow(self, streamID, increment): + """ + Open the stream window by a given increment. + + @param streamID: The ID of the stream whose window needs to be opened. + @type streamID: L{int} + + @param increment: The amount by which the stream window must be + incremented. + @type increment: L{int} + """ + self.conn.acknowledge_received_data(increment, streamID) + self._tryToWriteControlData() + + + def _isSecure(self): + """ + Returns L{True} if this channel is using a secure transport. + + @returns: L{True} if this channel is secure. + @rtype: L{bool} + """ + # A channel is secure if its transport is ISSLTransport. + return ISSLTransport(self.transport, None) is not None + + + def _send100Continue(self, streamID): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + + @param streamID: The ID of the stream that needs the 100 Continue + response + @type streamID: L{int} + """ + headers = [(b':status', b'100')] + self.conn.send_headers(headers=headers, stream_id=streamID) + self._tryToWriteControlData() + + + def _respondToBadRequestAndDisconnect(self, streamID): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + + Unlike in the HTTP/1.1 case, this does not actually disconnect the + underlying transport: there's no need. This instead just sends a 400 + response and terminates the stream. + + @param streamID: The ID of the stream that needs the 100 Continue + response + @type streamID: L{int} + """ + headers = [(b':status', b'400')] + self.conn.send_headers( + headers=headers, + stream_id=streamID, + end_stream=True + ) + stillActive = self._tryToWriteControlData() + if stillActive: + stream = self.streams[streamID] + stream.connectionLost(ConnectionLost("Invalid request")) + self._requestDone(streamID) + + + def _streamIsActive(self, streamID): + """ + Checks whether Twisted has still got state for a given stream and so + can process events for that stream. + + @param streamID: The ID of the stream that needs processing. + @type streamID: L{int} + + @return: Whether the stream still has state allocated. + @rtype: L{bool} + """ + return streamID in self.streams + + def _tryToWriteControlData(self): + """ + Checks whether the connection is blocked on flow control and, + if it isn't, writes any buffered control data. + + @return: L{True} if the connection is still active and + L{False} if it was aborted because too many bytes have + been written but not consumed by the other end. + """ + bufferedBytes = self.conn.data_to_send() + if not bufferedBytes: + return True + + if self._consumerBlocked is None and not self._bufferedControlFrames: + # The consumer isn't blocked, and we don't have any buffered frames: + # write this directly. + self.transport.write(bufferedBytes) + return True + else: + # Either the consumer is blocked or we have buffered frames. If the + # consumer is blocked, we'll write this when we unblock. If we have + # buffered frames, we have presumably been re-entered from + # transport.write, and so to avoid reordering issues we'll buffer anyway. + self._bufferedControlFrames.append(bufferedBytes) + self._bufferedControlFrameBytes += len(bufferedBytes) + + if self._bufferedControlFrameBytes >= self._maxBufferedControlFrameBytes: + self._log.error( + "Maximum number of control frame bytes buffered: " + "{bufferedControlFrameBytes} > = {maxBufferedControlFrameBytes}. " + "Aborting connection to client: {client} ", + bufferedControlFrameBytes=self._bufferedControlFrameBytes, + maxBufferedControlFrameBytes=self._maxBufferedControlFrameBytes, + client=self.transport.getPeer(), + ) + # We've exceeded a reasonable buffer size for max buffered control frames. + # This is a denial of service risk, so we're going to drop this connection. + self.transport.abortConnection() + self.connectionLost(ExcessiveBufferingError()) + return False + return True + + def _flushBufferedControlData(self, *args): + """ + Called when the connection is marked writable again after being marked unwritable. + Attempts to flush buffered control data if there is any. + """ + # To respect backpressure here we send each write in order, paying attention to whether + # we got blocked + while self._consumerBlocked is None and self._bufferedControlFrames: + nextWrite = self._bufferedControlFrames.popleft() + self._bufferedControlFrameBytes -= len(nextWrite) + self.transport.write(nextWrite) + + +@implementer(ITransport, IConsumer, IPushProducer) +class H2Stream(object): + """ + A class representing a single HTTP/2 stream. + + This class works hand-in-hand with L{H2Connection}. It acts to provide an + implementation of L{ITransport}, L{IConsumer}, and L{IProducer} that work + for a single HTTP/2 connection, while tightly cleaving to the interface + provided by those interfaces. It does this by having a tight coupling to + L{H2Connection}, which allows associating many of the functions of + L{ITransport}, L{IConsumer}, and L{IProducer} to objects on a + stream-specific level. + + @ivar streamID: The numerical stream ID that this object corresponds to. + @type streamID: L{int} + + @ivar producing: Whether this stream is currently allowed to produce data + to its consumer. + @type producing: L{bool} + + @ivar command: The HTTP verb used on the request. + @type command: L{unicode} + + @ivar path: The HTTP path used on the request. + @type path: L{unicode} + + @ivar producer: The object producing the response, if any. + @type producer: L{IProducer} + + @ivar site: The L{twisted.web.server.Site} object this stream belongs to, + if any. + @type site: L{twisted.web.server.Site} + + @ivar factory: The L{twisted.web.http.HTTPFactory} object that constructed + this stream's parent connection. + @type factory: L{twisted.web.http.HTTPFactory} + + @ivar _producerProducing: Whether the producer stored in producer is + currently producing data. + @type _producerProducing: L{bool} + + @ivar _inboundDataBuffer: Any data that has been received from the network + but has not yet been received by the consumer. + @type _inboundDataBuffer: A L{collections.deque} containing L{bytes} + + @ivar _conn: A reference to the connection this stream belongs to. + @type _conn: L{H2Connection} + + @ivar _request: A request object that this stream corresponds to. + @type _request: L{twisted.web.iweb.IRequest} + + @ivar _buffer: A buffer containing data produced by the producer that could + not be sent on the network at this time. + @type _buffer: L{io.BytesIO} + """ + # We need a transport property for t.w.h.Request, but HTTP/2 doesn't want + # to expose it. So we just set it to None. + transport = None + + + def __init__(self, streamID, connection, headers, + requestFactory, site, factory): + """ + Initialize this HTTP/2 stream. + + @param streamID: The numerical stream ID that this object corresponds + to. + @type streamID: L{int} + + @param connection: The HTTP/2 connection this stream belongs to. + @type connection: L{H2Connection} + + @param headers: The HTTP/2 request headers. + @type headers: A L{list} of L{tuple}s of header name and header value, + both as L{bytes}. + + @param requestFactory: A function that builds appropriate request + request objects. + @type requestFactory: A callable that returns a + L{twisted.web.iweb.IRequest}. + + @param site: The L{twisted.web.server.Site} object this stream belongs + to, if any. + @type site: L{twisted.web.server.Site} + + @param factory: The L{twisted.web.http.HTTPFactory} object that + constructed this stream's parent connection. + @type factory: L{twisted.web.http.HTTPFactory} + """ + + self.streamID = streamID + self.site = site + self.factory = factory + self.producing = True + self.command = None + self.path = None + self.producer = None + self._producerProducing = False + self._hasStreamingProducer = None + self._inboundDataBuffer = deque() + self._conn = connection + self._request = requestFactory(self, queued=False) + self._buffer = io.BytesIO() + + self._convertHeaders(headers) + + + def _convertHeaders(self, headers): + """ + This method converts the HTTP/2 header set into something that looks + like HTTP/1.1. In particular, it strips the 'special' headers and adds + a Host: header. + + @param headers: The HTTP/2 header set. + @type headers: A L{list} of L{tuple}s of header name and header value, + both as L{bytes}. + """ + gotLength = False + + for header in headers: + if not header[0].startswith(b':'): + gotLength = ( + _addHeaderToRequest(self._request, header) or gotLength + ) + elif header[0] == b':method': + self.command = header[1] + elif header[0] == b':path': + self.path = header[1] + elif header[0] == b':authority': + # This is essentially the Host: header from HTTP/1.1 + _addHeaderToRequest(self._request, (b'host', header[1])) + + if not gotLength: + if self.command in (b'GET', b'HEAD'): + self._request.gotLength(0) + else: + self._request.gotLength(None) + + self._request.parseCookies() + expectContinue = self._request.requestHeaders.getRawHeaders(b'expect') + if expectContinue and expectContinue[0].lower() == b'100-continue': + self._send100Continue() + + + # Methods called by the H2Connection + def receiveDataChunk(self, data, flowControlledLength): + """ + Called when the connection has received a chunk of data from the + underlying transport. If the stream has been registered with a + consumer, and is currently able to push data, immediately passes it + through. Otherwise, buffers the chunk until we can start producing. + + @param data: The chunk of data that was received. + @type data: L{bytes} + + @param flowControlledLength: The total flow controlled length of this + chunk, which is used when we want to re-open the window. May be + different to C{len(data)}. + @type flowControlledLength: L{int} + """ + if not self.producing: + # Buffer data. + self._inboundDataBuffer.append((data, flowControlledLength)) + else: + self._request.handleContentChunk(data) + self._conn.openStreamWindow(self.streamID, flowControlledLength) + + + def requestComplete(self): + """ + Called by the L{H2Connection} when the all data for a request has been + received. Currently, with the legacy L{twisted.web.http.Request} + object, just calls requestReceived unless the producer wants us to be + quiet. + """ + if self.producing: + self._request.requestReceived(self.command, self.path, b'HTTP/2') + else: + self._inboundDataBuffer.append((_END_STREAM_SENTINEL, None)) + + + def connectionLost(self, reason): + """ + Called by the L{H2Connection} when a connection is lost or a stream is + reset. + + @param reason: The reason the connection was lost. + @type reason: L{str} + """ + self._request.connectionLost(reason) + + + def windowUpdated(self): + """ + Called by the L{H2Connection} when this stream's flow control window + has been opened. + """ + # If we don't have a producer, we have no-one to tell. + if not self.producer: + return + + # If we're not blocked on flow control, we don't care. + if self._producerProducing: + return + + # We check whether the stream's flow control window is actually above + # 0, and then, if a producer is registered and we still have space in + # the window, we unblock it. + remainingWindow = self._conn.remainingOutboundWindow(self.streamID) + if not remainingWindow > 0: + return + + # We have a producer and space in the window, so that producer can + # start producing again! + self._producerProducing = True + self.producer.resumeProducing() + + + def flowControlBlocked(self): + """ + Called by the L{H2Connection} when this stream's flow control window + has been exhausted. + """ + if not self.producer: + return + + if self._producerProducing: + self.producer.pauseProducing() + self._producerProducing = False + + + # Methods called by the consumer (usually an IRequest). + def writeHeaders(self, version, code, reason, headers): + """ + Called by the consumer to write headers to the stream. + + @param version: The HTTP version. + @type version: L{bytes} + + @param code: The status code. + @type code: L{int} + + @param reason: The reason phrase. Ignored in HTTP/2. + @type reason: L{bytes} + + @param headers: The HTTP response headers. + @type: Any iterable of two-tuples of L{bytes}, representing header + names and header values. + """ + self._conn.writeHeaders(version, code, reason, headers, self.streamID) + + + def requestDone(self, request): + """ + Called by a consumer to clean up whatever permanent state is in use. + + @param request: The request calling the method. + @type request: L{twisted.web.iweb.IRequest} + """ + self._conn.endRequest(self.streamID) + + + def _send100Continue(self): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + """ + self._conn._send100Continue(self.streamID) + + + def _respondToBadRequestAndDisconnect(self): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + + Unlike in the HTTP/1.1 case, this does not actually disconnect the + underlying transport: there's no need. This instead just sends a 400 + response and terminates the stream. + """ + self._conn._respondToBadRequestAndDisconnect(self.streamID) + + + # Implementation: ITransport + def write(self, data): + """ + Write a single chunk of data into a data frame. + + @param data: The data chunk to send. + @type data: L{bytes} + """ + self._conn.writeDataToStream(self.streamID, data) + return + + + def writeSequence(self, iovec): + """ + Write a sequence of chunks of data into data frames. + + @param iovec: A sequence of chunks to send. + @type iovec: An iterable of L{bytes} chunks. + """ + for chunk in iovec: + self.write(chunk) + + + def loseConnection(self): + """ + Close the connection after writing all pending data. + """ + self._conn.endRequest(self.streamID) + + + def abortConnection(self): + """ + Forcefully abort the connection by sending a RstStream frame. + """ + self._conn.abortRequest(self.streamID) + + + def getPeer(self): + """ + Get information about the peer. + """ + return self._conn.getPeer() + + + def getHost(self): + """ + Similar to getPeer, but for this side of the connection. + """ + return self._conn.getHost() + + + def isSecure(self): + """ + Returns L{True} if this channel is using a secure transport. + + @returns: L{True} if this channel is secure. + @rtype: L{bool} + """ + return self._conn._isSecure() + + + # Implementation: IConsumer + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + This sets self to be a consumer for a producer. When this object runs + out of data (as when a send(2) call on a socket succeeds in moving the + last data from a userspace buffer into a kernelspace buffer), it will + ask the producer to resumeProducing(). + + For L{IPullProducer} providers, C{resumeProducing} will be called once + each time data is required. + + For L{IPushProducer} providers, C{pauseProducing} will be called + whenever the write buffer fills up and C{resumeProducing} will only be + called when it empties. + + @param producer: The producer to register. + @type producer: L{IProducer} provider + + @param streaming: L{True} if C{producer} provides L{IPushProducer}, + L{False} if C{producer} provides L{IPullProducer}. + @type streaming: L{bool} + + @raise RuntimeError: If a producer is already registered. + + @return: L{None} + """ + if self.producer: + raise ValueError( + "registering producer %s before previous one (%s) was " + "unregistered" % (producer, self.producer)) + + if not streaming: + self.hasStreamingProducer = False + producer = _PullToPush(producer, self) + producer.startStreaming() + else: + self.hasStreamingProducer = True + + self.producer = producer + self._producerProducing = True + + + def unregisterProducer(self): + """ + @see: L{IConsumer.unregisterProducer} + """ + # When the producer is unregistered, we're done. + if self.producer is not None and not self.hasStreamingProducer: + self.producer.stopStreaming() + + self._producerProducing = False + self.producer = None + self.hasStreamingProducer = None + + + # Implementation: IPushProducer + def stopProducing(self): + """ + @see: L{IProducer.stopProducing} + """ + self.producing = False + self.abortConnection() + + + def pauseProducing(self): + """ + @see: L{IPushProducer.pauseProducing} + """ + self.producing = False + + + def resumeProducing(self): + """ + @see: L{IPushProducer.resumeProducing} + """ + self.producing = True + consumedLength = 0 + + while self.producing and self._inboundDataBuffer: + # Allow for pauseProducing to be called in response to a call to + # resumeProducing. + chunk, flowControlledLength = self._inboundDataBuffer.popleft() + + if chunk is _END_STREAM_SENTINEL: + self.requestComplete() + else: + consumedLength += flowControlledLength + self._request.handleContentChunk(chunk) + + self._conn.openStreamWindow(self.streamID, consumedLength) + + + +def _addHeaderToRequest(request, header): + """ + Add a header tuple to a request header object. + + @param request: The request to add the header tuple to. + @type request: L{twisted.web.http.Request} + + @param header: The header tuple to add to the request. + @type header: A L{tuple} with two elements, the header name and header + value, both as L{bytes}. + + @return: If the header being added was the C{Content-Length} header. + @rtype: L{bool} + """ + requestHeaders = request.requestHeaders + name, value = header + values = requestHeaders.getRawHeaders(name) + + if values is not None: + values.append(value) + else: + requestHeaders.setRawHeaders(name, [value]) + + if name == b'content-length': + request.gotLength(int(value)) + return True + + return False diff --git a/contrib/python/Twisted/py2/twisted/web/_newclient.py b/contrib/python/Twisted/py2/twisted/web/_newclient.py new file mode 100644 index 0000000000..74a8a6c2d1 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_newclient.py @@ -0,0 +1,1778 @@ +# -*- test-case-name: twisted.web.test.test_newclient -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An U{HTTP 1.1<http://www.w3.org/Protocols/rfc2616/rfc2616.html>} client. + +The way to use the functionality provided by this module is to: + + - Connect a L{HTTP11ClientProtocol} to an HTTP server + - Create a L{Request} with the appropriate data + - Pass the request to L{HTTP11ClientProtocol.request} + - The returned Deferred will fire with a L{Response} object + - Create a L{IProtocol} provider which can handle the response body + - Connect it to the response with L{Response.deliverBody} + - When the protocol's C{connectionLost} method is called, the response is + complete. See L{Response.deliverBody} for details. + +Various other classes in this module support this usage: + + - HTTPParser is the basic HTTP parser. It can handle the parts of HTTP which + are symmetric between requests and responses. + + - HTTPClientParser extends HTTPParser to handle response-specific parts of + HTTP. One instance is created for each request to parse the corresponding + response. +""" + +from __future__ import division, absolute_import +__metaclass__ = type + +import re + +from zope.interface import implementer + +from twisted.python.compat import networkString +from twisted.python.components import proxyForInterface +from twisted.python.reflect import fullyQualifiedName +from twisted.python.failure import Failure +from twisted.internet.interfaces import IConsumer, IPushProducer +from twisted.internet.error import ConnectionDone +from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred +from twisted.internet.defer import CancelledError +from twisted.internet.protocol import Protocol +from twisted.protocols.basic import LineReceiver +from twisted.web.iweb import UNKNOWN_LENGTH, IResponse, IClientRequest +from twisted.web.http_headers import Headers +from twisted.web.http import NO_CONTENT, NOT_MODIFIED +from twisted.web.http import _DataLoss, PotentialDataLoss +from twisted.web.http import _IdentityTransferDecoder, _ChunkedTransferDecoder +from twisted.logger import Logger + +# States HTTPParser can be in +STATUS = u'STATUS' +HEADER = u'HEADER' +BODY = u'BODY' +DONE = u'DONE' +_moduleLog = Logger() + + +class BadHeaders(Exception): + """ + Headers passed to L{Request} were in some way invalid. + """ + + + +class ExcessWrite(Exception): + """ + The body L{IBodyProducer} for a request tried to write data after + indicating it had finished writing data. + """ + + +class ParseError(Exception): + """ + Some received data could not be parsed. + + @ivar data: The string which could not be parsed. + """ + def __init__(self, reason, data): + Exception.__init__(self, reason, data) + self.data = data + + + +class BadResponseVersion(ParseError): + """ + The version string in a status line was unparsable. + """ + + + +class _WrapperException(Exception): + """ + L{_WrapperException} is the base exception type for exceptions which + include one or more other exceptions as the low-level causes. + + @ivar reasons: A L{list} of one or more L{Failure} instances encountered + during an HTTP request. See subclass documentation for more details. + """ + def __init__(self, reasons): + Exception.__init__(self, reasons) + self.reasons = reasons + + + +class RequestGenerationFailed(_WrapperException): + """ + There was an error while creating the bytes which make up a request. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the request generation was considered to have failed. + """ + + + +class RequestTransmissionFailed(_WrapperException): + """ + There was an error while sending the bytes which make up a request. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the request transmission was considered to have failed. + """ + + + +class ConnectionAborted(Exception): + """ + The connection was explicitly aborted by application code. + """ + + + +class WrongBodyLength(Exception): + """ + An L{IBodyProducer} declared the number of bytes it was going to + produce (via its C{length} attribute) and then produced a different number + of bytes. + """ + + + +class ResponseDone(Exception): + """ + L{ResponseDone} may be passed to L{IProtocol.connectionLost} on the + protocol passed to L{Response.deliverBody} and indicates that the entire + response has been delivered. + """ + + + +class ResponseFailed(_WrapperException): + """ + L{ResponseFailed} indicates that all of the response to a request was not + received for some reason. + + @ivar reasons: A C{list} of one or more L{Failure} instances giving the + reasons the response was considered to have failed. + + @ivar response: If specified, the L{Response} received from the server (and + in particular the status code and the headers). + """ + + def __init__(self, reasons, response=None): + _WrapperException.__init__(self, reasons) + self.response = response + + + +class ResponseNeverReceived(ResponseFailed): + """ + A L{ResponseFailed} that knows no response bytes at all have been received. + """ + + + +class RequestNotSent(Exception): + """ + L{RequestNotSent} indicates that an attempt was made to issue a request but + for reasons unrelated to the details of the request itself, the request + could not be sent. For example, this may indicate that an attempt was made + to send a request using a protocol which is no longer connected to a + server. + """ + + + +def _callAppFunction(function): + """ + Call C{function}. If it raises an exception, log it with a minimal + description of the source. + + @return: L{None} + """ + try: + function() + except: + _moduleLog.failure( + u"Unexpected exception from {name}", + name=fullyQualifiedName(function) + ) + + + +class HTTPParser(LineReceiver): + """ + L{HTTPParser} handles the parsing side of HTTP processing. With a suitable + subclass, it can parse either the client side or the server side of the + connection. + + @ivar headers: All of the non-connection control message headers yet + received. + + @ivar state: State indicator for the response parsing state machine. One + of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}. + + @ivar _partialHeader: L{None} or a C{list} of the lines of a multiline + header while that header is being received. + """ + + # NOTE: According to HTTP spec, we're supposed to eat the + # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that + # doesn't sound like a good idea to me, because it makes it impossible to + # have a non-authenticating transparent proxy in front of an authenticating + # proxy. An authenticating proxy can eat them itself. -jknight + # + # Further, quoting + # http://homepages.tesco.net/J.deBoynePollard/FGA/web-proxy-connection-header.html + # regarding the 'Proxy-Connection' header: + # + # The Proxy-Connection: header is a mistake in how some web browsers + # use HTTP. Its name is the result of a false analogy. It is not a + # standard part of the protocol. There is a different standard + # protocol mechanism for doing what it does. And its existence + # imposes a requirement upon HTTP servers such that no proxy HTTP + # server can be standards-conforming in practice. + # + # -exarkun + + # Some servers (like http://news.ycombinator.com/) return status lines and + # HTTP headers delimited by \n instead of \r\n. + delimiter = b'\n' + + CONNECTION_CONTROL_HEADERS = set([ + b'content-length', b'connection', b'keep-alive', b'te', + b'trailers', b'transfer-encoding', b'upgrade', + b'proxy-connection']) + + def connectionMade(self): + self.headers = Headers() + self.connHeaders = Headers() + self.state = STATUS + self._partialHeader = None + + + def switchToBodyMode(self, decoder): + """ + Switch to body parsing mode - interpret any more bytes delivered as + part of the message body and deliver them to the given decoder. + """ + if self.state == BODY: + raise RuntimeError(u"already in body mode") + + self.bodyDecoder = decoder + self.state = BODY + self.setRawMode() + + + def lineReceived(self, line): + """ + Handle one line from a response. + """ + # Handle the normal CR LF case. + if line[-1:] == b'\r': + line = line[:-1] + + if self.state == STATUS: + self.statusReceived(line) + self.state = HEADER + elif self.state == HEADER: + if not line or line[0] not in b' \t': + if self._partialHeader is not None: + header = b''.join(self._partialHeader) + name, value = header.split(b':', 1) + value = value.strip() + self.headerReceived(name, value) + if not line: + # Empty line means the header section is over. + self.allHeadersReceived() + else: + # Line not beginning with LWS is another header. + self._partialHeader = [line] + else: + # A line beginning with LWS is a continuation of a header + # begun on a previous line. + self._partialHeader.append(line) + + + def rawDataReceived(self, data): + """ + Pass data from the message body to the body decoder object. + """ + self.bodyDecoder.dataReceived(data) + + + def isConnectionControlHeader(self, name): + """ + Return C{True} if the given lower-cased name is the name of a + connection control header (rather than an entity header). + + According to RFC 2616, section 14.10, the tokens in the Connection + header are probably relevant here. However, I am not sure what the + practical consequences of either implementing or ignoring that are. + So I leave it unimplemented for the time being. + """ + return name in self.CONNECTION_CONTROL_HEADERS + + + def statusReceived(self, status): + """ + Callback invoked whenever the first line of a new message is received. + Override this. + + @param status: The first line of an HTTP request or response message + without trailing I{CR LF}. + @type status: C{bytes} + """ + + + def headerReceived(self, name, value): + """ + Store the given header in C{self.headers}. + """ + name = name.lower() + if self.isConnectionControlHeader(name): + headers = self.connHeaders + else: + headers = self.headers + headers.addRawHeader(name, value) + + + def allHeadersReceived(self): + """ + Callback invoked after the last header is passed to C{headerReceived}. + Override this to change to the C{BODY} or C{DONE} state. + """ + self.switchToBodyMode(None) + + + +class HTTPClientParser(HTTPParser): + """ + An HTTP parser which only handles HTTP responses. + + @ivar request: The request with which the expected response is associated. + @type request: L{Request} + + @ivar NO_BODY_CODES: A C{set} of response codes which B{MUST NOT} have a + body. + + @ivar finisher: A callable to invoke when this response is fully parsed. + + @ivar _responseDeferred: A L{Deferred} which will be called back with the + response when all headers in the response have been received. + Thereafter, L{None}. + + @ivar _everReceivedData: C{True} if any bytes have been received. + """ + NO_BODY_CODES = set([NO_CONTENT, NOT_MODIFIED]) + + _transferDecoders = { + b'chunked': _ChunkedTransferDecoder, + } + + bodyDecoder = None + _log = Logger() + + def __init__(self, request, finisher): + self.request = request + self.finisher = finisher + self._responseDeferred = Deferred() + self._everReceivedData = False + + + def dataReceived(self, data): + """ + Override so that we know if any response has been received. + """ + self._everReceivedData = True + HTTPParser.dataReceived(self, data) + + + def parseVersion(self, strversion): + """ + Parse version strings of the form Protocol '/' Major '.' Minor. E.g. + b'HTTP/1.1'. Returns (protocol, major, minor). Will raise ValueError + on bad syntax. + """ + try: + proto, strnumber = strversion.split(b'/') + major, minor = strnumber.split(b'.') + major, minor = int(major), int(minor) + except ValueError as e: + raise BadResponseVersion(str(e), strversion) + if major < 0 or minor < 0: + raise BadResponseVersion(u"version may not be negative", + strversion) + return (proto, major, minor) + + + def statusReceived(self, status): + """ + Parse the status line into its components and create a response object + to keep track of this response's state. + """ + parts = status.split(b' ', 2) + if len(parts) == 2: + # Some broken servers omit the required `phrase` portion of + # `status-line`. One such server identified as + # "cloudflare-nginx". Others fail to identify themselves + # entirely. Fill in an empty phrase for such cases. + version, codeBytes = parts + phrase = b"" + elif len(parts) == 3: + version, codeBytes, phrase = parts + else: + raise ParseError(u"wrong number of parts", status) + + try: + statusCode = int(codeBytes) + except ValueError: + raise ParseError(u"non-integer status code", status) + + self.response = Response._construct( + self.parseVersion(version), + statusCode, + phrase, + self.headers, + self.transport, + self.request, + ) + + + def _finished(self, rest): + """ + Called to indicate that an entire response has been received. No more + bytes will be interpreted by this L{HTTPClientParser}. Extra bytes are + passed up and the state of this L{HTTPClientParser} is set to I{DONE}. + + @param rest: A C{bytes} giving any extra bytes delivered to this + L{HTTPClientParser} which are not part of the response being + parsed. + """ + self.state = DONE + self.finisher(rest) + + + def isConnectionControlHeader(self, name): + """ + Content-Length in the response to a HEAD request is an entity header, + not a connection control header. + """ + if self.request.method == b'HEAD' and name == b'content-length': + return False + return HTTPParser.isConnectionControlHeader(self, name) + + + def allHeadersReceived(self): + """ + Figure out how long the response body is going to be by examining + headers and stuff. + """ + if 100 <= self.response.code < 200: + # RFC 7231 Section 6.2 says that if we receive a 1XX status code + # and aren't expecting it, we MAY ignore it. That's what we're + # going to do. We reset the parser here, but we leave + # _everReceivedData in its True state because we have, in fact, + # received data. + self._log.info( + "Ignoring unexpected {code} response", + code=self.response.code + ) + self.connectionMade() + del self.response + return + + if (self.response.code in self.NO_BODY_CODES + or self.request.method == b'HEAD'): + self.response.length = 0 + # The order of the next two lines might be of interest when adding + # support for pipelining. + self._finished(self.clearLineBuffer()) + self.response._bodyDataFinished() + else: + transferEncodingHeaders = self.connHeaders.getRawHeaders( + b'transfer-encoding') + if transferEncodingHeaders: + + # This could be a KeyError. However, that would mean we do not + # know how to decode the response body, so failing the request + # is as good a behavior as any. Perhaps someday we will want + # to normalize/document/test this specifically, but failing + # seems fine to me for now. + transferDecoder = self._transferDecoders[transferEncodingHeaders[0].lower()] + + # If anyone ever invents a transfer encoding other than + # chunked (yea right), and that transfer encoding can predict + # the length of the response body, it might be sensible to + # allow the transfer decoder to set the response object's + # length attribute. + else: + contentLengthHeaders = self.connHeaders.getRawHeaders( + b'content-length') + if contentLengthHeaders is None: + contentLength = None + elif len(contentLengthHeaders) == 1: + contentLength = int(contentLengthHeaders[0]) + self.response.length = contentLength + else: + # "HTTP Message Splitting" or "HTTP Response Smuggling" + # potentially happening. Or it's just a buggy server. + raise ValueError(u"Too many Content-Length headers; " + u"response is invalid") + + if contentLength == 0: + self._finished(self.clearLineBuffer()) + transferDecoder = None + else: + transferDecoder = lambda x, y: _IdentityTransferDecoder( + contentLength, x, y) + + if transferDecoder is None: + self.response._bodyDataFinished() + else: + # Make sure as little data as possible from the response body + # gets delivered to the response object until the response + # object actually indicates it is ready to handle bytes + # (probably because an application gave it a way to interpret + # them). + self.transport.pauseProducing() + self.switchToBodyMode(transferDecoder( + self.response._bodyDataReceived, + self._finished)) + + # This must be last. If it were first, then application code might + # change some state (for example, registering a protocol to receive the + # response body). Then the pauseProducing above would be wrong since + # the response is ready for bytes and nothing else would ever resume + # the transport. + self._responseDeferred.callback(self.response) + del self._responseDeferred + + + def connectionLost(self, reason): + if self.bodyDecoder is not None: + try: + try: + self.bodyDecoder.noMoreData() + except PotentialDataLoss: + self.response._bodyDataFinished(Failure()) + except _DataLoss: + self.response._bodyDataFinished( + Failure(ResponseFailed([reason, Failure()], + self.response))) + else: + self.response._bodyDataFinished() + except: + # Handle exceptions from both the except suites and the else + # suite. Those functions really shouldn't raise exceptions, + # but maybe there's some buggy application code somewhere + # making things difficult. + self._log.failure('') + elif self.state != DONE: + if self._everReceivedData: + exceptionClass = ResponseFailed + else: + exceptionClass = ResponseNeverReceived + self._responseDeferred.errback(Failure(exceptionClass([reason]))) + del self._responseDeferred + + + +_VALID_METHOD = re.compile( + br"\A[%s]+\Z" % ( + bytes().join( + ( + b"!", b"#", b"$", b"%", b"&", b"'", b"*", + b"+", b"-", b".", b"^", b"_", b"`", b"|", b"~", + b"\x30-\x39", + b"\x41-\x5a", + b"\x61-\x7A", + ), + ), + ), +) + + + +def _ensureValidMethod(method): + """ + An HTTP method is an HTTP token, which consists of any visible + ASCII character that is not a delimiter (i.e. one of + C{"(),/:;<=>?@[\\]{}}.) + + @param method: the method to check + @type method: L{bytes} + + @return: the method if it is valid + @rtype: L{bytes} + + @raise ValueError: if the method is not valid + + @see: U{https://tools.ietf.org/html/rfc7230#section-3.1.1}, + U{https://tools.ietf.org/html/rfc7230#section-3.2.6}, + U{https://tools.ietf.org/html/rfc5234#appendix-B.1} + """ + if _VALID_METHOD.match(method): + return method + raise ValueError("Invalid method {!r}".format(method)) + + + +_VALID_URI = re.compile(br'\A[\x21-\x7e]+\Z') + + + +def _ensureValidURI(uri): + """ + A valid URI cannot contain control characters (i.e., characters + between 0-32, inclusive and 127) or non-ASCII characters (i.e., + characters with values between 128-255, inclusive). + + @param uri: the URI to check + @type uri: L{bytes} + + @return: the URI if it is valid + @rtype: L{bytes} + + @raise ValueError: if the URI is not valid + + @see: U{https://tools.ietf.org/html/rfc3986#section-3.3}, + U{https://tools.ietf.org/html/rfc3986#appendix-A}, + U{https://tools.ietf.org/html/rfc5234#appendix-B.1} + """ + if _VALID_URI.match(uri): + return uri + raise ValueError("Invalid URI {!r}".format(uri)) + + + +@implementer(IClientRequest) +class Request: + """ + A L{Request} instance describes an HTTP request to be sent to an HTTP + server. + + @ivar method: See L{__init__}. + @ivar uri: See L{__init__}. + @ivar headers: See L{__init__}. + @ivar bodyProducer: See L{__init__}. + @ivar persistent: See L{__init__}. + + @ivar _parsedURI: Parsed I{URI} for the request, or L{None}. + @type _parsedURI: L{twisted.web.client.URI} or L{None} + """ + _log = Logger() + + def __init__(self, method, uri, headers, bodyProducer, persistent=False): + """ + @param method: The HTTP method for this request, ex: b'GET', b'HEAD', + b'POST', etc. + @type method: L{bytes} + + @param uri: The relative URI of the resource to request. For example, + C{b'/foo/bar?baz=quux'}. + @type uri: L{bytes} + + @param headers: Headers to be sent to the server. It is important to + note that this object does not create any implicit headers. So it + is up to the HTTP Client to add required headers such as 'Host'. + @type headers: L{twisted.web.http_headers.Headers} + + @param bodyProducer: L{None} or an L{IBodyProducer} provider which + produces the content body to send to the remote HTTP server. + + @param persistent: Set to C{True} when you use HTTP persistent + connection, defaults to C{False}. + @type persistent: L{bool} + """ + self.method = _ensureValidMethod(method) + self.uri = _ensureValidURI(uri) + self.headers = headers + self.bodyProducer = bodyProducer + self.persistent = persistent + self._parsedURI = None + + + @classmethod + def _construct(cls, method, uri, headers, bodyProducer, persistent=False, + parsedURI=None): + """ + Private constructor. + + @param method: See L{__init__}. + @param uri: See L{__init__}. + @param headers: See L{__init__}. + @param bodyProducer: See L{__init__}. + @param persistent: See L{__init__}. + @param parsedURI: See L{Request._parsedURI}. + + @return: L{Request} instance. + """ + request = cls(method, uri, headers, bodyProducer, persistent) + request._parsedURI = parsedURI + return request + + + @property + def absoluteURI(self): + """ + The absolute URI of the request as C{bytes}, or L{None} if the + absolute URI cannot be determined. + """ + return getattr(self._parsedURI, 'toBytes', lambda: None)() + + + def _writeHeaders(self, transport, TEorCL): + hosts = self.headers.getRawHeaders(b'host', ()) + if len(hosts) != 1: + raise BadHeaders(u"Exactly one Host header required") + + # In the future, having the protocol version be a parameter to this + # method would probably be good. It would be nice if this method + # weren't limited to issuing HTTP/1.1 requests. + requestLines = [] + requestLines.append( + b' '.join( + [ + _ensureValidMethod(self.method), + _ensureValidURI(self.uri), + b'HTTP/1.1\r\n', + ] + ), + ) + if not self.persistent: + requestLines.append(b'Connection: close\r\n') + if TEorCL is not None: + requestLines.append(TEorCL) + for name, values in self.headers.getAllRawHeaders(): + requestLines.extend([name + b': ' + v + b'\r\n' for v in values]) + requestLines.append(b'\r\n') + transport.writeSequence(requestLines) + + + def _writeToBodyProducerChunked(self, transport): + """ + Write this request to the given transport using chunked + transfer-encoding to frame the body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders(transport, b'Transfer-Encoding: chunked\r\n') + encoder = ChunkedEncoder(transport) + encoder.registerProducer(self.bodyProducer, True) + d = self.bodyProducer.startProducing(encoder) + + def cbProduced(ignored): + encoder.unregisterProducer() + def ebProduced(err): + encoder._allowNoMoreWrites() + # Don't call the encoder's unregisterProducer because it will write + # a zero-length chunk. This would indicate to the server that the + # request body is complete. There was an error, though, so we + # don't want to do that. + transport.unregisterProducer() + return err + d.addCallbacks(cbProduced, ebProduced) + return d + + + def _writeToBodyProducerContentLength(self, transport): + """ + Write this request to the given transport using content-length to frame + the body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders( + transport, + networkString( + 'Content-Length: %d\r\n' % (self.bodyProducer.length,))) + + # This Deferred is used to signal an error in the data written to the + # encoder below. It can only errback and it will only do so before too + # many bytes have been written to the encoder and before the producer + # Deferred fires. + finishedConsuming = Deferred() + + # This makes sure the producer writes the correct number of bytes for + # the request body. + encoder = LengthEnforcingConsumer( + self.bodyProducer, transport, finishedConsuming) + + transport.registerProducer(self.bodyProducer, True) + + finishedProducing = self.bodyProducer.startProducing(encoder) + + def combine(consuming, producing): + # This Deferred is returned and will be fired when the first of + # consuming or producing fires. If it's cancelled, forward that + # cancellation to the producer. + def cancelConsuming(ign): + finishedProducing.cancel() + ultimate = Deferred(cancelConsuming) + + # Keep track of what has happened so far. This initially + # contains None, then an integer uniquely identifying what + # sequence of events happened. See the callbacks and errbacks + # defined below for the meaning of each value. + state = [None] + + def ebConsuming(err): + if state == [None]: + # The consuming Deferred failed first. This means the + # overall writeTo Deferred is going to errback now. The + # producing Deferred should not fire later (because the + # consumer should have called stopProducing on the + # producer), but if it does, a callback will be ignored + # and an errback will be logged. + state[0] = 1 + ultimate.errback(err) + else: + # The consuming Deferred errbacked after the producing + # Deferred fired. This really shouldn't ever happen. + # If it does, I goofed. Log the error anyway, just so + # there's a chance someone might notice and complain. + self._log.failure( + u"Buggy state machine in {request}/[{state}]: " + u"ebConsuming called", + failure=err, + request=repr(self), + state=state[0] + ) + + def cbProducing(result): + if state == [None]: + # The producing Deferred succeeded first. Nothing will + # ever happen to the consuming Deferred. Tell the + # encoder we're done so it can check what the producer + # wrote and make sure it was right. + state[0] = 2 + try: + encoder._noMoreWritesExpected() + except: + # Fail the overall writeTo Deferred - something the + # producer did was wrong. + ultimate.errback() + else: + # Success - succeed the overall writeTo Deferred. + ultimate.callback(None) + # Otherwise, the consuming Deferred already errbacked. The + # producing Deferred wasn't supposed to fire, but it did + # anyway. It's buggy, but there's not really anything to be + # done about it. Just ignore this result. + + def ebProducing(err): + if state == [None]: + # The producing Deferred failed first. This means the + # overall writeTo Deferred is going to errback now. + # Tell the encoder that we're done so it knows to reject + # further writes from the producer (which should not + # happen, but the producer may be buggy). + state[0] = 3 + encoder._allowNoMoreWrites() + ultimate.errback(err) + else: + # The producing Deferred failed after the consuming + # Deferred failed. It shouldn't have, so it's buggy. + # Log the exception in case anyone who can fix the code + # is watching. + self._log.failure(u"Producer is buggy", failure=err) + + consuming.addErrback(ebConsuming) + producing.addCallbacks(cbProducing, ebProducing) + + return ultimate + + d = combine(finishedConsuming, finishedProducing) + def f(passthrough): + # Regardless of what happens with the overall Deferred, once it + # fires, the producer registered way up above the definition of + # combine should be unregistered. + transport.unregisterProducer() + return passthrough + d.addBoth(f) + return d + + + def _writeToEmptyBodyContentLength(self, transport): + """ + Write this request to the given transport using content-length to frame + the (empty) body. + + @param transport: See L{writeTo}. + @return: See L{writeTo}. + """ + self._writeHeaders(transport, b"Content-Length: 0\r\n") + return succeed(None) + + + def writeTo(self, transport): + """ + Format this L{Request} as an HTTP/1.1 request and write it to the given + transport. If bodyProducer is not None, it will be associated with an + L{IConsumer}. + + @param transport: The transport to which to write. + @type transport: L{twisted.internet.interfaces.ITransport} provider + + @return: A L{Deferred} which fires with L{None} when the request has + been completely written to the transport or with a L{Failure} if + there is any problem generating the request bytes. + """ + if self.bodyProducer is None: + # If the method semantics anticipate a body, include a + # Content-Length even if it is 0. + # https://tools.ietf.org/html/rfc7230#section-3.3.2 + if self.method in (b"PUT", b"POST"): + self._writeToEmptyBodyContentLength(transport) + else: + self._writeHeaders(transport, None) + elif self.bodyProducer.length is UNKNOWN_LENGTH: + return self._writeToBodyProducerChunked(transport) + else: + return self._writeToBodyProducerContentLength(transport) + + + def stopWriting(self): + """ + Stop writing this request to the transport. This can only be called + after C{writeTo} and before the L{Deferred} returned by C{writeTo} + fires. It should cancel any asynchronous task started by C{writeTo}. + The L{Deferred} returned by C{writeTo} need not be fired if this method + is called. + """ + # If bodyProducer is None, then the Deferred returned by writeTo has + # fired already and this method cannot be called. + _callAppFunction(self.bodyProducer.stopProducing) + + + +class LengthEnforcingConsumer: + """ + An L{IConsumer} proxy which enforces an exact length requirement on the + total data written to it. + + @ivar _length: The number of bytes remaining to be written. + + @ivar _producer: The L{IBodyProducer} which is writing to this + consumer. + + @ivar _consumer: The consumer to which at most C{_length} bytes will be + forwarded. + + @ivar _finished: A L{Deferred} which will be fired with a L{Failure} if too + many bytes are written to this consumer. + """ + def __init__(self, producer, consumer, finished): + self._length = producer.length + self._producer = producer + self._consumer = consumer + self._finished = finished + + + def _allowNoMoreWrites(self): + """ + Indicate that no additional writes are allowed. Attempts to write + after calling this method will be met with an exception. + """ + self._finished = None + + + def write(self, bytes): + """ + Write C{bytes} to the underlying consumer unless + C{_noMoreWritesExpected} has been called or there are/have been too + many bytes. + """ + if self._finished is None: + # No writes are supposed to happen any more. Try to convince the + # calling code to stop calling this method by calling its + # stopProducing method and then throwing an exception at it. This + # exception isn't documented as part of the API because you're + # never supposed to expect it: only buggy code will ever receive + # it. + self._producer.stopProducing() + raise ExcessWrite() + + if len(bytes) <= self._length: + self._length -= len(bytes) + self._consumer.write(bytes) + else: + # No synchronous exception is raised in *this* error path because + # we still have _finished which we can use to report the error to a + # better place than the direct caller of this method (some + # arbitrary application code). + _callAppFunction(self._producer.stopProducing) + self._finished.errback(WrongBodyLength(u"too many bytes written")) + self._allowNoMoreWrites() + + + def _noMoreWritesExpected(self): + """ + Called to indicate no more bytes will be written to this consumer. + Check to see that the correct number have been written. + + @raise WrongBodyLength: If not enough bytes have been written. + """ + if self._finished is not None: + self._allowNoMoreWrites() + if self._length: + raise WrongBodyLength(u"too few bytes written") + + + +def makeStatefulDispatcher(name, template): + """ + Given a I{dispatch} name and a function, return a function which can be + used as a method and which, when called, will call another method defined + on the instance and return the result. The other method which is called is + determined by the value of the C{_state} attribute of the instance. + + @param name: A string which is used to construct the name of the subsidiary + method to invoke. The subsidiary method is named like C{'_%s_%s' % + (name, _state)}. + + @param template: A function object which is used to give the returned + function a docstring. + + @return: The dispatcher function. + """ + def dispatcher(self, *args, **kwargs): + func = getattr(self, '_' + name + '_' + self._state, None) + if func is None: + raise RuntimeError( + u"%r has no %s method in state %s" % (self, name, self._state)) + return func(*args, **kwargs) + dispatcher.__doc__ = template.__doc__ + return dispatcher + + + +# This proxy class is used only in the private constructor of the Response +# class below, in order to prevent users relying on any property of the +# concrete request object: they can only use what is provided by +# IClientRequest. +_ClientRequestProxy = proxyForInterface(IClientRequest) + + + +@implementer(IResponse) +class Response: + """ + A L{Response} instance describes an HTTP response received from an HTTP + server. + + L{Response} should not be subclassed or instantiated. + + @ivar _transport: See L{__init__}. + + @ivar _bodyProtocol: The L{IProtocol} provider to which the body is + delivered. L{None} before one has been registered with + C{deliverBody}. + + @ivar _bodyBuffer: A C{list} of the strings passed to C{bodyDataReceived} + before C{deliverBody} is called. L{None} afterwards. + + @ivar _state: Indicates what state this L{Response} instance is in, + particularly with respect to delivering bytes from the response body + to an application-supplied protocol object. This may be one of + C{'INITIAL'}, C{'CONNECTED'}, C{'DEFERRED_CLOSE'}, or C{'FINISHED'}, + with the following meanings: + + - INITIAL: This is the state L{Response} objects start in. No + protocol has yet been provided and the underlying transport may + still have bytes to deliver to it. + + - DEFERRED_CLOSE: If the underlying transport indicates all bytes + have been delivered but no application-provided protocol is yet + available, the L{Response} moves to this state. Data is + buffered and waiting for a protocol to be delivered to. + + - CONNECTED: If a protocol is provided when the state is INITIAL, + the L{Response} moves to this state. Any buffered data is + delivered and any data which arrives from the transport + subsequently is given directly to the protocol. + + - FINISHED: If a protocol is provided in the DEFERRED_CLOSE state, + the L{Response} moves to this state after delivering all + buffered data to the protocol. Otherwise, if the L{Response} is + in the CONNECTED state, if the transport indicates there is no + more data, the L{Response} moves to this state. Nothing else + can happen once the L{Response} is in this state. + @type _state: C{str} + """ + + length = UNKNOWN_LENGTH + + _bodyProtocol = None + _bodyFinished = False + + def __init__(self, version, code, phrase, headers, _transport): + """ + @param version: HTTP version components protocol, major, minor. E.g. + C{(b'HTTP', 1, 1)} to mean C{b'HTTP/1.1'}. + + @param code: HTTP status code. + @type code: L{int} + + @param phrase: HTTP reason phrase, intended to give a short description + of the HTTP status code. + + @param headers: HTTP response headers. + @type headers: L{twisted.web.http_headers.Headers} + + @param _transport: The transport which is delivering this response. + """ + self.version = version + self.code = code + self.phrase = phrase + self.headers = headers + self._transport = _transport + self._bodyBuffer = [] + self._state = 'INITIAL' + self.request = None + self.previousResponse = None + + + @classmethod + def _construct(cls, version, code, phrase, headers, _transport, request): + """ + Private constructor. + + @param version: See L{__init__}. + @param code: See L{__init__}. + @param phrase: See L{__init__}. + @param headers: See L{__init__}. + @param _transport: See L{__init__}. + @param request: See L{IResponse.request}. + + @return: L{Response} instance. + """ + response = Response(version, code, phrase, headers, _transport) + response.request = _ClientRequestProxy(request) + return response + + + def setPreviousResponse(self, previousResponse): + self.previousResponse = previousResponse + + + def deliverBody(self, protocol): + """ + Dispatch the given L{IProtocol} depending of the current state of the + response. + """ + deliverBody = makeStatefulDispatcher('deliverBody', deliverBody) + + + def _deliverBody_INITIAL(self, protocol): + """ + Deliver any buffered data to C{protocol} and prepare to deliver any + future data to it. Move to the C{'CONNECTED'} state. + """ + protocol.makeConnection(self._transport) + self._bodyProtocol = protocol + for data in self._bodyBuffer: + self._bodyProtocol.dataReceived(data) + self._bodyBuffer = None + + self._state = 'CONNECTED' + + # Now that there's a protocol to consume the body, resume the + # transport. It was previously paused by HTTPClientParser to avoid + # reading too much data before it could be handled. We need to do this + # after we transition our state as it may recursively lead to more data + # being delivered, or even the body completing. + self._transport.resumeProducing() + + + def _deliverBody_CONNECTED(self, protocol): + """ + It is invalid to attempt to deliver data to a protocol when it is + already being delivered to another protocol. + """ + raise RuntimeError( + u"Response already has protocol %r, cannot deliverBody " + u"again" % (self._bodyProtocol,)) + + + def _deliverBody_DEFERRED_CLOSE(self, protocol): + """ + Deliver any buffered data to C{protocol} and then disconnect the + protocol. Move to the C{'FINISHED'} state. + """ + # Unlike _deliverBody_INITIAL, there is no need to resume the + # transport here because all of the response data has been received + # already. Some higher level code may want to resume the transport if + # that code expects further data to be received over it. + + protocol.makeConnection(self._transport) + + for data in self._bodyBuffer: + protocol.dataReceived(data) + self._bodyBuffer = None + protocol.connectionLost(self._reason) + self._state = 'FINISHED' + + + def _deliverBody_FINISHED(self, protocol): + """ + It is invalid to attempt to deliver data to a protocol after the + response body has been delivered to another protocol. + """ + raise RuntimeError( + u"Response already finished, cannot deliverBody now.") + + + def _bodyDataReceived(self, data): + """ + Called by HTTPClientParser with chunks of data from the response body. + They will be buffered or delivered to the protocol passed to + deliverBody. + """ + _bodyDataReceived = makeStatefulDispatcher('bodyDataReceived', + _bodyDataReceived) + + + def _bodyDataReceived_INITIAL(self, data): + """ + Buffer any data received for later delivery to a protocol passed to + C{deliverBody}. + + Little or no data should be buffered by this method, since the + transport has been paused and will not be resumed until a protocol + is supplied. + """ + self._bodyBuffer.append(data) + + + def _bodyDataReceived_CONNECTED(self, data): + """ + Deliver any data received to the protocol to which this L{Response} + is connected. + """ + self._bodyProtocol.dataReceived(data) + + + def _bodyDataReceived_DEFERRED_CLOSE(self, data): + """ + It is invalid for data to be delivered after it has been indicated + that the response body has been completely delivered. + """ + raise RuntimeError(u"Cannot receive body data after _bodyDataFinished") + + + def _bodyDataReceived_FINISHED(self, data): + """ + It is invalid for data to be delivered after the response body has + been delivered to a protocol. + """ + raise RuntimeError(u"Cannot receive body data after " + u"protocol disconnected") + + + def _bodyDataFinished(self, reason=None): + """ + Called by HTTPClientParser when no more body data is available. If the + optional reason is supplied, this indicates a problem or potential + problem receiving all of the response body. + """ + _bodyDataFinished = makeStatefulDispatcher('bodyDataFinished', + _bodyDataFinished) + + + def _bodyDataFinished_INITIAL(self, reason=None): + """ + Move to the C{'DEFERRED_CLOSE'} state to wait for a protocol to + which to deliver the response body. + """ + self._state = 'DEFERRED_CLOSE' + if reason is None: + reason = Failure(ResponseDone(u"Response body fully received")) + self._reason = reason + + + def _bodyDataFinished_CONNECTED(self, reason=None): + """ + Disconnect the protocol and move to the C{'FINISHED'} state. + """ + if reason is None: + reason = Failure(ResponseDone(u"Response body fully received")) + self._bodyProtocol.connectionLost(reason) + self._bodyProtocol = None + self._state = 'FINISHED' + + + def _bodyDataFinished_DEFERRED_CLOSE(self): + """ + It is invalid to attempt to notify the L{Response} of the end of the + response body data more than once. + """ + raise RuntimeError(u"Cannot finish body data more than once") + + + def _bodyDataFinished_FINISHED(self): + """ + It is invalid to attempt to notify the L{Response} of the end of the + response body data more than once. + """ + raise RuntimeError(u"Cannot finish body data after " + u"protocol disconnected") + + + +@implementer(IConsumer) +class ChunkedEncoder: + """ + Helper object which exposes L{IConsumer} on top of L{HTTP11ClientProtocol} + for streaming request bodies to the server. + """ + + def __init__(self, transport): + self.transport = transport + + + def _allowNoMoreWrites(self): + """ + Indicate that no additional writes are allowed. Attempts to write + after calling this method will be met with an exception. + """ + self.transport = None + + + def registerProducer(self, producer, streaming): + """ + Register the given producer with C{self.transport}. + """ + self.transport.registerProducer(producer, streaming) + + + def write(self, data): + """ + Write the given request body bytes to the transport using chunked + encoding. + + @type data: C{bytes} + """ + if self.transport is None: + raise ExcessWrite() + self.transport.writeSequence((networkString("%x\r\n" % len(data)), + data, b"\r\n")) + + + def unregisterProducer(self): + """ + Indicate that the request body is complete and finish the request. + """ + self.write(b'') + self.transport.unregisterProducer() + self._allowNoMoreWrites() + + + +@implementer(IPushProducer) +class TransportProxyProducer: + """ + An L{twisted.internet.interfaces.IPushProducer} implementation which + wraps another such thing and proxies calls to it until it is told to stop. + + @ivar _producer: The wrapped L{twisted.internet.interfaces.IPushProducer} + provider or L{None} after this proxy has been stopped. + """ + + # LineReceiver uses this undocumented attribute of transports to decide + # when to stop calling lineReceived or rawDataReceived (if it finds it to + # be true, it doesn't bother to deliver any more data). Set disconnecting + # to False here and never change it to true so that all data is always + # delivered to us and so that LineReceiver doesn't fail with an + # AttributeError. + disconnecting = False + + def __init__(self, producer): + self._producer = producer + + + def stopProxying(self): + """ + Stop forwarding calls of L{twisted.internet.interfaces.IPushProducer} + methods to the underlying L{twisted.internet.interfaces.IPushProducer} + provider. + """ + self._producer = None + + + def stopProducing(self): + """ + Proxy the stoppage to the underlying producer, unless this proxy has + been stopped. + """ + if self._producer is not None: + self._producer.stopProducing() + + + def resumeProducing(self): + """ + Proxy the resumption to the underlying producer, unless this proxy has + been stopped. + """ + if self._producer is not None: + self._producer.resumeProducing() + + + def pauseProducing(self): + """ + Proxy the pause to the underlying producer, unless this proxy has been + stopped. + """ + if self._producer is not None: + self._producer.pauseProducing() + + + def loseConnection(self): + """ + Proxy the request to lose the connection to the underlying producer, + unless this proxy has been stopped. + """ + if self._producer is not None: + self._producer.loseConnection() + + + +class HTTP11ClientProtocol(Protocol): + """ + L{HTTP11ClientProtocol} is an implementation of the HTTP 1.1 client + protocol. It supports as few features as possible. + + @ivar _parser: After a request is issued, the L{HTTPClientParser} to + which received data making up the response to that request is + delivered. + + @ivar _finishedRequest: After a request is issued, the L{Deferred} which + will fire when a L{Response} object corresponding to that request is + available. This allows L{HTTP11ClientProtocol} to fail the request + if there is a connection or parsing problem. + + @ivar _currentRequest: After a request is issued, the L{Request} + instance used to make that request. This allows + L{HTTP11ClientProtocol} to stop request generation if necessary (for + example, if the connection is lost). + + @ivar _transportProxy: After a request is issued, the + L{TransportProxyProducer} to which C{_parser} is connected. This + allows C{_parser} to pause and resume the transport in a way which + L{HTTP11ClientProtocol} can exert some control over. + + @ivar _responseDeferred: After a request is issued, the L{Deferred} from + C{_parser} which will fire with a L{Response} when one has been + received. This is eventually chained with C{_finishedRequest}, but + only in certain cases to avoid double firing that Deferred. + + @ivar _state: Indicates what state this L{HTTP11ClientProtocol} instance + is in with respect to transmission of a request and reception of a + response. This may be one of the following strings: + + - QUIESCENT: This is the state L{HTTP11ClientProtocol} instances + start in. Nothing is happening: no request is being sent and no + response is being received or expected. + + - TRANSMITTING: When a request is made (via L{request}), the + instance moves to this state. L{Request.writeTo} has been used + to start to send a request but it has not yet finished. + + - TRANSMITTING_AFTER_RECEIVING_RESPONSE: The server has returned a + complete response but the request has not yet been fully sent + yet. The instance will remain in this state until the request + is fully sent. + + - GENERATION_FAILED: There was an error while the request. The + request was not fully sent to the network. + + - WAITING: The request was fully sent to the network. The + instance is now waiting for the response to be fully received. + + - ABORTING: Application code has requested that the HTTP connection + be aborted. + + - CONNECTION_LOST: The connection has been lost. + @type _state: C{str} + + @ivar _abortDeferreds: A list of C{Deferred} instances that will fire when + the connection is lost. + """ + _state = 'QUIESCENT' + _parser = None + _finishedRequest = None + _currentRequest = None + _transportProxy = None + _responseDeferred = None + _log = Logger() + + + def __init__(self, quiescentCallback=lambda c: None): + self._quiescentCallback = quiescentCallback + self._abortDeferreds = [] + + + @property + def state(self): + return self._state + + + def request(self, request): + """ + Issue C{request} over C{self.transport} and return a L{Deferred} which + will fire with a L{Response} instance or an error. + + @param request: The object defining the parameters of the request to + issue. + @type request: L{Request} + + @rtype: L{Deferred} + @return: The deferred may errback with L{RequestGenerationFailed} if + the request was not fully written to the transport due to a local + error. It may errback with L{RequestTransmissionFailed} if it was + not fully written to the transport due to a network error. It may + errback with L{ResponseFailed} if the request was sent (not + necessarily received) but some or all of the response was lost. It + may errback with L{RequestNotSent} if it is not possible to send + any more requests using this L{HTTP11ClientProtocol}. + """ + if self._state != 'QUIESCENT': + return fail(RequestNotSent()) + + self._state = 'TRANSMITTING' + _requestDeferred = maybeDeferred(request.writeTo, self.transport) + + def cancelRequest(ign): + # Explicitly cancel the request's deferred if it's still trying to + # write when this request is cancelled. + if self._state in ( + 'TRANSMITTING', 'TRANSMITTING_AFTER_RECEIVING_RESPONSE'): + _requestDeferred.cancel() + else: + self.transport.abortConnection() + self._disconnectParser(Failure(CancelledError())) + self._finishedRequest = Deferred(cancelRequest) + + # Keep track of the Request object in case we need to call stopWriting + # on it. + self._currentRequest = request + + self._transportProxy = TransportProxyProducer(self.transport) + self._parser = HTTPClientParser(request, self._finishResponse) + self._parser.makeConnection(self._transportProxy) + self._responseDeferred = self._parser._responseDeferred + + def cbRequestWritten(ignored): + if self._state == 'TRANSMITTING': + self._state = 'WAITING' + self._responseDeferred.chainDeferred(self._finishedRequest) + + def ebRequestWriting(err): + if self._state == 'TRANSMITTING': + self._state = 'GENERATION_FAILED' + self.transport.abortConnection() + self._finishedRequest.errback( + Failure(RequestGenerationFailed([err]))) + else: + self._log.failure( + u'Error writing request, but not in valid state ' + u'to finalize request: {state}', + failure=err, + state=self._state + ) + + _requestDeferred.addCallbacks(cbRequestWritten, ebRequestWriting) + + return self._finishedRequest + + + def _finishResponse(self, rest): + """ + Called by an L{HTTPClientParser} to indicate that it has parsed a + complete response. + + @param rest: A C{bytes} giving any trailing bytes which were given to + the L{HTTPClientParser} which were not part of the response it + was parsing. + """ + _finishResponse = makeStatefulDispatcher('finishResponse', _finishResponse) + + + def _finishResponse_WAITING(self, rest): + # Currently the rest parameter is ignored. Don't forget to use it if + # we ever add support for pipelining. And maybe check what trailers + # mean. + if self._state == 'WAITING': + self._state = 'QUIESCENT' + else: + # The server sent the entire response before we could send the + # whole request. That sucks. Oh well. Fire the request() + # Deferred with the response. But first, make sure that if the + # request does ever finish being written that it won't try to fire + # that Deferred. + self._state = 'TRANSMITTING_AFTER_RECEIVING_RESPONSE' + self._responseDeferred.chainDeferred(self._finishedRequest) + + # This will happen if we're being called due to connection being lost; + # if so, no need to disconnect parser again, or to call + # _quiescentCallback. + if self._parser is None: + return + + reason = ConnectionDone(u"synthetic!") + connHeaders = self._parser.connHeaders.getRawHeaders(b'connection', ()) + if ((b'close' in connHeaders) or self._state != "QUIESCENT" or + not self._currentRequest.persistent): + self._giveUp(Failure(reason)) + else: + # Just in case we had paused the transport, resume it before + # considering it quiescent again. + self.transport.resumeProducing() + + # We call the quiescent callback first, to ensure connection gets + # added back to connection pool before we finish the request. + try: + self._quiescentCallback(self) + except: + # If callback throws exception, just log it and disconnect; + # keeping persistent connections around is an optimisation: + self._log.failure('') + self.transport.loseConnection() + self._disconnectParser(reason) + + + _finishResponse_TRANSMITTING = _finishResponse_WAITING + + + def _disconnectParser(self, reason): + """ + If there is still a parser, call its C{connectionLost} method with the + given reason. If there is not, do nothing. + + @type reason: L{Failure} + """ + if self._parser is not None: + parser = self._parser + self._parser = None + self._currentRequest = None + self._finishedRequest = None + self._responseDeferred = None + + # The parser is no longer allowed to do anything to the real + # transport. Stop proxying from the parser's transport to the real + # transport before telling the parser it's done so that it can't do + # anything. + self._transportProxy.stopProxying() + self._transportProxy = None + parser.connectionLost(reason) + + + def _giveUp(self, reason): + """ + Lose the underlying connection and disconnect the parser with the given + L{Failure}. + + Use this method instead of calling the transport's loseConnection + method directly otherwise random things will break. + """ + self.transport.loseConnection() + self._disconnectParser(reason) + + + def dataReceived(self, bytes): + """ + Handle some stuff from some place. + """ + try: + self._parser.dataReceived(bytes) + except: + self._giveUp(Failure()) + + + def connectionLost(self, reason): + """ + The underlying transport went away. If appropriate, notify the parser + object. + """ + connectionLost = makeStatefulDispatcher('connectionLost', connectionLost) + + + def _connectionLost_QUIESCENT(self, reason): + """ + Nothing is currently happening. Move to the C{'CONNECTION_LOST'} + state but otherwise do nothing. + """ + self._state = 'CONNECTION_LOST' + + + def _connectionLost_GENERATION_FAILED(self, reason): + """ + The connection was in an inconsistent state. Move to the + C{'CONNECTION_LOST'} state but otherwise do nothing. + """ + self._state = 'CONNECTION_LOST' + + + def _connectionLost_TRANSMITTING(self, reason): + """ + Fail the L{Deferred} for the current request, notify the request + object that it does not need to continue transmitting itself, and + move to the C{'CONNECTION_LOST'} state. + """ + self._state = 'CONNECTION_LOST' + self._finishedRequest.errback( + Failure(RequestTransmissionFailed([reason]))) + del self._finishedRequest + + # Tell the request that it should stop bothering now. + self._currentRequest.stopWriting() + + + def _connectionLost_TRANSMITTING_AFTER_RECEIVING_RESPONSE(self, reason): + """ + Move to the C{'CONNECTION_LOST'} state. + """ + self._state = 'CONNECTION_LOST' + + + def _connectionLost_WAITING(self, reason): + """ + Disconnect the response parser so that it can propagate the event as + necessary (for example, to call an application protocol's + C{connectionLost} method, or to fail a request L{Deferred}) and move + to the C{'CONNECTION_LOST'} state. + """ + self._disconnectParser(reason) + self._state = 'CONNECTION_LOST' + + + def _connectionLost_ABORTING(self, reason): + """ + Disconnect the response parser with a L{ConnectionAborted} failure, and + move to the C{'CONNECTION_LOST'} state. + """ + self._disconnectParser(Failure(ConnectionAborted())) + self._state = 'CONNECTION_LOST' + for d in self._abortDeferreds: + d.callback(None) + self._abortDeferreds = [] + + + def abort(self): + """ + Close the connection and cause all outstanding L{request} L{Deferred}s + to fire with an error. + """ + if self._state == "CONNECTION_LOST": + return succeed(None) + self.transport.loseConnection() + self._state = 'ABORTING' + d = Deferred() + self._abortDeferreds.append(d) + return d diff --git a/contrib/python/Twisted/py2/twisted/web/_responses.py b/contrib/python/Twisted/py2/twisted/web/_responses.py new file mode 100644 index 0000000000..4f8c1cdea4 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_responses.py @@ -0,0 +1,114 @@ +# -*- test-case-name: twisted.web.test.test_http -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP response code definitions. +""" + +from __future__ import division, absolute_import + +_CONTINUE = 100 +SWITCHING = 101 + +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 + +MULTIPLE_CHOICE = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 + +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTH_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 + +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE_SPACE = 507 +NOT_EXTENDED = 510 + +RESPONSES = { + # 100 + _CONTINUE: b"Continue", + SWITCHING: b"Switching Protocols", + + # 200 + OK: b"OK", + CREATED: b"Created", + ACCEPTED: b"Accepted", + NON_AUTHORITATIVE_INFORMATION: b"Non-Authoritative Information", + NO_CONTENT: b"No Content", + RESET_CONTENT: b"Reset Content.", + PARTIAL_CONTENT: b"Partial Content", + MULTI_STATUS: b"Multi-Status", + + # 300 + MULTIPLE_CHOICE: b"Multiple Choices", + MOVED_PERMANENTLY: b"Moved Permanently", + FOUND: b"Found", + SEE_OTHER: b"See Other", + NOT_MODIFIED: b"Not Modified", + USE_PROXY: b"Use Proxy", + # 306 not defined?? + TEMPORARY_REDIRECT: b"Temporary Redirect", + + # 400 + BAD_REQUEST: b"Bad Request", + UNAUTHORIZED: b"Unauthorized", + PAYMENT_REQUIRED: b"Payment Required", + FORBIDDEN: b"Forbidden", + NOT_FOUND: b"Not Found", + NOT_ALLOWED: b"Method Not Allowed", + NOT_ACCEPTABLE: b"Not Acceptable", + PROXY_AUTH_REQUIRED: b"Proxy Authentication Required", + REQUEST_TIMEOUT: b"Request Time-out", + CONFLICT: b"Conflict", + GONE: b"Gone", + LENGTH_REQUIRED: b"Length Required", + PRECONDITION_FAILED: b"Precondition Failed", + REQUEST_ENTITY_TOO_LARGE: b"Request Entity Too Large", + REQUEST_URI_TOO_LONG: b"Request-URI Too Long", + UNSUPPORTED_MEDIA_TYPE: b"Unsupported Media Type", + REQUESTED_RANGE_NOT_SATISFIABLE: b"Requested Range not satisfiable", + EXPECTATION_FAILED: b"Expectation Failed", + + # 500 + INTERNAL_SERVER_ERROR: b"Internal Server Error", + NOT_IMPLEMENTED: b"Not Implemented", + BAD_GATEWAY: b"Bad Gateway", + SERVICE_UNAVAILABLE: b"Service Unavailable", + GATEWAY_TIMEOUT: b"Gateway Time-out", + HTTP_VERSION_NOT_SUPPORTED: b"HTTP Version not supported", + INSUFFICIENT_STORAGE_SPACE: b"Insufficient Storage Space", + NOT_EXTENDED: b"Not Extended" + } + diff --git a/contrib/python/Twisted/py2/twisted/web/_stan.py b/contrib/python/Twisted/py2/twisted/web/_stan.py new file mode 100644 index 0000000000..033a52c652 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/_stan.py @@ -0,0 +1,330 @@ +# -*- test-case-name: twisted.web.test.test_stan -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An s-expression-like syntax for expressing xml in pure python. + +Stan tags allow you to build XML documents using Python. + +Stan is a DOM, or Document Object Model, implemented using basic Python types +and functions called "flatteners". A flattener is a function that knows how to +turn an object of a specific type into something that is closer to an HTML +string. Stan differs from the W3C DOM by not being as cumbersome and heavy +weight. Since the object model is built using simple python types such as lists, +strings, and dictionaries, the API is simpler and constructing a DOM less +cumbersome. + +@var voidElements: the names of HTML 'U{void + elements<http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#void-elements>}'; + those which can't have contents and can therefore be self-closing in the + output. +""" + +from __future__ import absolute_import, division + +from twisted.python.compat import iteritems + + + +class slot(object): + """ + Marker for markup insertion in a template. + + @type name: C{str} + @ivar name: The name of this slot. The key which must be used in + L{Tag.fillSlots} to fill it. + + @type children: C{list} + @ivar children: The L{Tag} objects included in this L{slot}'s template. + + @type default: anything flattenable, or L{None} + @ivar default: The default contents of this slot, if it is left unfilled. + If this is L{None}, an L{UnfilledSlot} will be raised, rather than + L{None} actually being used. + + @type filename: C{str} or L{None} + @ivar filename: The name of the XML file from which this tag was parsed. + If it was not parsed from an XML file, L{None}. + + @type lineNumber: C{int} or L{None} + @ivar lineNumber: The line number on which this tag was encountered in the + XML file from which it was parsed. If it was not parsed from an XML + file, L{None}. + + @type columnNumber: C{int} or L{None} + @ivar columnNumber: The column number at which this tag was encountered in + the XML file from which it was parsed. If it was not parsed from an + XML file, L{None}. + """ + + def __init__(self, name, default=None, filename=None, lineNumber=None, + columnNumber=None): + self.name = name + self.children = [] + self.default = default + self.filename = filename + self.lineNumber = lineNumber + self.columnNumber = columnNumber + + + def __repr__(self): + return "slot(%r)" % (self.name,) + + + +class Tag(object): + """ + A L{Tag} represents an XML tags with a tag name, attributes, and children. + A L{Tag} can be constructed using the special L{twisted.web.template.tags} + object, or it may be constructed directly with a tag name. L{Tag}s have a + special method, C{__call__}, which makes representing trees of XML natural + using pure python syntax. + + @ivar tagName: The name of the represented element. For a tag like + C{<div></div>}, this would be C{"div"}. + @type tagName: C{str} + + @ivar attributes: The attributes of the element. + @type attributes: C{dict} mapping C{str} to renderable objects. + + @ivar children: The child L{Tag}s of this C{Tag}. + @type children: C{list} of renderable objects. + + @ivar render: The name of the render method to use for this L{Tag}. This + name will be looked up at render time by the + L{twisted.web.template.Element} doing the rendering, via + L{twisted.web.template.Element.lookupRenderMethod}, to determine which + method to call. + @type render: C{str} + + @type filename: C{str} or L{None} + @ivar filename: The name of the XML file from which this tag was parsed. + If it was not parsed from an XML file, L{None}. + + @type lineNumber: C{int} or L{None} + @ivar lineNumber: The line number on which this tag was encountered in the + XML file from which it was parsed. If it was not parsed from an XML + file, L{None}. + + @type columnNumber: C{int} or L{None} + @ivar columnNumber: The column number at which this tag was encountered in + the XML file from which it was parsed. If it was not parsed from an + XML file, L{None}. + + @type slotData: C{dict} or L{None} + @ivar slotData: The data which can fill slots. If present, a dictionary + mapping slot names to renderable values. The values in this dict might + be anything that can be present as the child of a L{Tag}; strings, + lists, L{Tag}s, generators, etc. + """ + + slotData = None + filename = None + lineNumber = None + columnNumber = None + + def __init__(self, tagName, attributes=None, children=None, render=None, + filename=None, lineNumber=None, columnNumber=None): + self.tagName = tagName + self.render = render + if attributes is None: + self.attributes = {} + else: + self.attributes = attributes + if children is None: + self.children = [] + else: + self.children = children + if filename is not None: + self.filename = filename + if lineNumber is not None: + self.lineNumber = lineNumber + if columnNumber is not None: + self.columnNumber = columnNumber + + + def fillSlots(self, **slots): + """ + Remember the slots provided at this position in the DOM. + + During the rendering of children of this node, slots with names in + C{slots} will be rendered as their corresponding values. + + @return: C{self}. This enables the idiom C{return tag.fillSlots(...)} in + renderers. + """ + if self.slotData is None: + self.slotData = {} + self.slotData.update(slots) + return self + + + def __call__(self, *children, **kw): + """ + Add children and change attributes on this tag. + + This is implemented using __call__ because it then allows the natural + syntax:: + + table(tr1, tr2, width="100%", height="50%", border="1") + + Children may be other tag instances, strings, functions, or any other + object which has a registered flatten. + + Attributes may be 'transparent' tag instances (so that + C{a(href=transparent(data="foo", render=myhrefrenderer))} works), + strings, functions, or any other object which has a registered + flattener. + + If the attribute is a python keyword, such as 'class', you can add an + underscore to the name, like 'class_'. + + There is one special keyword argument, 'render', which will be used as + the name of the renderer and saved as the 'render' attribute of this + instance, rather than the DOM 'render' attribute in the attributes + dictionary. + """ + self.children.extend(children) + + for k, v in iteritems(kw): + if k[-1] == '_': + k = k[:-1] + + if k == 'render': + self.render = v + else: + self.attributes[k] = v + return self + + + def _clone(self, obj, deep): + """ + Clone an arbitrary object; used by L{Tag.clone}. + + @param obj: an object with a clone method, a list or tuple, or something + which should be immutable. + + @param deep: whether to continue cloning child objects; i.e. the + contents of lists, the sub-tags within a tag. + + @return: a clone of C{obj}. + """ + if hasattr(obj, 'clone'): + return obj.clone(deep) + elif isinstance(obj, (list, tuple)): + return [self._clone(x, deep) for x in obj] + else: + return obj + + + def clone(self, deep=True): + """ + Return a clone of this tag. If deep is True, clone all of this tag's + children. Otherwise, just shallow copy the children list without copying + the children themselves. + """ + if deep: + newchildren = [self._clone(x, True) for x in self.children] + else: + newchildren = self.children[:] + newattrs = self.attributes.copy() + for key in newattrs.keys(): + newattrs[key] = self._clone(newattrs[key], True) + + newslotdata = None + if self.slotData: + newslotdata = self.slotData.copy() + for key in newslotdata: + newslotdata[key] = self._clone(newslotdata[key], True) + + newtag = Tag( + self.tagName, + attributes=newattrs, + children=newchildren, + render=self.render, + filename=self.filename, + lineNumber=self.lineNumber, + columnNumber=self.columnNumber) + newtag.slotData = newslotdata + + return newtag + + + def clear(self): + """ + Clear any existing children from this tag. + """ + self.children = [] + return self + + + def __repr__(self): + rstr = '' + if self.attributes: + rstr += ', attributes=%r' % self.attributes + if self.children: + rstr += ', children=%r' % self.children + return "Tag(%r%s)" % (self.tagName, rstr) + + + +voidElements = ('img', 'br', 'hr', 'base', 'meta', 'link', 'param', 'area', + 'input', 'col', 'basefont', 'isindex', 'frame', 'command', + 'embed', 'keygen', 'source', 'track', 'wbs') + + +class CDATA(object): + """ + A C{<![CDATA[]]>} block from a template. Given a separate representation in + the DOM so that they may be round-tripped through rendering without losing + information. + + @ivar data: The data between "C{<![CDATA[}" and "C{]]>}". + @type data: C{unicode} + """ + def __init__(self, data): + self.data = data + + + def __repr__(self): + return 'CDATA(%r)' % (self.data,) + + + +class Comment(object): + """ + A C{<!-- -->} comment from a template. Given a separate representation in + the DOM so that they may be round-tripped through rendering without losing + information. + + @ivar data: The data between "C{<!--}" and "C{-->}". + @type data: C{unicode} + """ + + def __init__(self, data): + self.data = data + + + def __repr__(self): + return 'Comment(%r)' % (self.data,) + + + +class CharRef(object): + """ + A numeric character reference. Given a separate representation in the DOM + so that non-ASCII characters may be output as pure ASCII. + + @ivar ordinal: The ordinal value of the unicode character to which this is + object refers. + @type ordinal: C{int} + + @since: 12.0 + """ + def __init__(self, ordinal): + self.ordinal = ordinal + + + def __repr__(self): + return "CharRef(%d)" % (self.ordinal,) diff --git a/contrib/python/Twisted/py2/twisted/web/client.py b/contrib/python/Twisted/py2/twisted/web/client.py new file mode 100644 index 0000000000..7e4642ef30 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/client.py @@ -0,0 +1,2336 @@ +# -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTTP client. +""" + +from __future__ import division, absolute_import + +import os +import collections +import warnings + +try: + from urlparse import urlunparse, urljoin, urldefrag +except ImportError: + from urllib.parse import urljoin, urldefrag + from urllib.parse import urlunparse as _urlunparse + + def urlunparse(parts): + result = _urlunparse(tuple([p.decode("charmap") for p in parts])) + return result.encode("charmap") + +import zlib +from functools import wraps + +from zope.interface import implementer + +from twisted.python.compat import _PY3, networkString +from twisted.python.compat import nativeString, intToBytes, unicode, itervalues +from twisted.python.deprecate import deprecatedModuleAttribute, deprecated +from twisted.python.failure import Failure +from incremental import Version + +from twisted.web.iweb import IPolicyForHTTPS, IAgentEndpointFactory +from twisted.python.deprecate import getDeprecationWarningString +from twisted.web import http +from twisted.internet import defer, protocol, task, reactor +from twisted.internet.abstract import isIPv6Address +from twisted.internet.interfaces import IProtocol, IOpenSSLContextFactory +from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS +from twisted.python.util import InsensitiveDict +from twisted.python.components import proxyForInterface +from twisted.web import error +from twisted.web.iweb import UNKNOWN_LENGTH, IAgent, IBodyProducer, IResponse +from twisted.web.http_headers import Headers +from twisted.logger import Logger + +from twisted.web._newclient import _ensureValidURI, _ensureValidMethod + + + +class PartialDownloadError(error.Error): + """ + Page was only partially downloaded, we got disconnected in middle. + + @ivar response: All of the response body which was downloaded. + """ + + +class HTTPPageGetter(http.HTTPClient): + """ + Gets a resource via HTTP, then quits. + + Typically used with L{HTTPClientFactory}. Note that this class does not, by + itself, do anything with the response. If you want to download a resource + into a file, use L{HTTPPageDownloader} instead. + + @ivar _completelyDone: A boolean indicating whether any further requests are + necessary after this one completes in order to provide a result to + C{self.factory.deferred}. If it is C{False}, then a redirect is going + to be followed. Otherwise, this protocol's connection is the last one + before firing the result Deferred. This is used to make sure the result + Deferred is only fired after the connection is cleaned up. + """ + + quietLoss = 0 + followRedirect = True + failed = 0 + + _completelyDone = True + + _specialHeaders = set( + (b'host', b'user-agent', b'cookie', b'content-length'), + ) + + def connectionMade(self): + method = _ensureValidMethod(getattr(self.factory, 'method', b'GET')) + self.sendCommand(method, _ensureValidURI(self.factory.path)) + if self.factory.scheme == b'http' and self.factory.port != 80: + host = self.factory.host + b':' + intToBytes(self.factory.port) + elif self.factory.scheme == b'https' and self.factory.port != 443: + host = self.factory.host + b':' + intToBytes(self.factory.port) + else: + host = self.factory.host + self.sendHeader(b'Host', self.factory.headers.get(b"host", host)) + self.sendHeader(b'User-Agent', self.factory.agent) + data = getattr(self.factory, 'postdata', None) + if data is not None: + self.sendHeader(b"Content-Length", intToBytes(len(data))) + + cookieData = [] + for (key, value) in self.factory.headers.items(): + if key.lower() not in self._specialHeaders: + # we calculated it on our own + self.sendHeader(key, value) + if key.lower() == b'cookie': + cookieData.append(value) + for cookie, cookval in self.factory.cookies.items(): + cookieData.append(cookie + b'=' + cookval) + if cookieData: + self.sendHeader(b'Cookie', b'; '.join(cookieData)) + self.endHeaders() + self.headers = {} + + if data is not None: + self.transport.write(data) + + def handleHeader(self, key, value): + """ + Called every time a header is received. Stores the header information + as key-value pairs in the C{headers} attribute. + + @type key: C{str} + @param key: An HTTP header field name. + + @type value: C{str} + @param value: An HTTP header field value. + """ + key = key.lower() + l = self.headers.setdefault(key, []) + l.append(value) + + def handleStatus(self, version, status, message): + """ + Handle the HTTP status line. + + @param version: The HTTP version. + @type version: L{bytes} + @param status: The HTTP status code, an integer represented as a + bytestring. + @type status: L{bytes} + @param message: The HTTP status message. + @type message: L{bytes} + """ + self.version, self.status, self.message = version, status, message + self.factory.gotStatus(version, status, message) + + def handleEndHeaders(self): + self.factory.gotHeaders(self.headers) + m = getattr(self, 'handleStatus_' + nativeString(self.status), + self.handleStatusDefault) + m() + + def handleStatus_200(self): + pass + + handleStatus_201 = lambda self: self.handleStatus_200() + handleStatus_202 = lambda self: self.handleStatus_200() + + def handleStatusDefault(self): + self.failed = 1 + + def handleStatus_301(self): + l = self.headers.get(b'location') + if not l: + self.handleStatusDefault() + return + url = l[0] + if self.followRedirect: + self.factory._redirectCount += 1 + if self.factory._redirectCount >= self.factory.redirectLimit: + err = error.InfiniteRedirection( + self.status, + b'Infinite redirection detected', + location=url) + self.factory.noPage(Failure(err)) + self.quietLoss = True + self.transport.loseConnection() + return + + self._completelyDone = False + self.factory.setURL(url) + + if self.factory.scheme == b'https': + from twisted.internet import ssl + contextFactory = ssl.ClientContextFactory() + reactor.connectSSL(nativeString(self.factory.host), + self.factory.port, + self.factory, contextFactory) + else: + reactor.connectTCP(nativeString(self.factory.host), + self.factory.port, + self.factory) + else: + self.handleStatusDefault() + self.factory.noPage( + Failure( + error.PageRedirect( + self.status, self.message, location = url))) + self.quietLoss = True + self.transport.loseConnection() + + def handleStatus_302(self): + if self.afterFoundGet: + self.handleStatus_303() + else: + self.handleStatus_301() + + + def handleStatus_303(self): + self.factory.method = b'GET' + self.handleStatus_301() + + + def connectionLost(self, reason): + """ + When the connection used to issue the HTTP request is closed, notify the + factory if we have not already, so it can produce a result. + """ + if not self.quietLoss: + http.HTTPClient.connectionLost(self, reason) + self.factory.noPage(reason) + if self._completelyDone: + # Only if we think we're completely done do we tell the factory that + # we're "disconnected". This way when we're following redirects, + # only the last protocol used will fire the _disconnectedDeferred. + self.factory._disconnectedDeferred.callback(None) + + + def handleResponse(self, response): + if self.quietLoss: + return + if self.failed: + self.factory.noPage( + Failure( + error.Error( + self.status, self.message, response))) + if self.factory.method == b'HEAD': + # Callback with empty string, since there is never a response + # body for HEAD requests. + self.factory.page(b'') + elif self.length != None and self.length != 0: + self.factory.noPage(Failure( + PartialDownloadError(self.status, self.message, response))) + else: + self.factory.page(response) + # server might be stupid and not close connection. admittedly + # the fact we do only one request per connection is also + # stupid... + self.transport.loseConnection() + + def timeout(self): + self.quietLoss = True + self.transport.abortConnection() + self.factory.noPage(defer.TimeoutError("Getting %s took longer than %s seconds." % (self.factory.url, self.factory.timeout))) + + +class HTTPPageDownloader(HTTPPageGetter): + + transmittingPage = 0 + + def handleStatus_200(self, partialContent=0): + HTTPPageGetter.handleStatus_200(self) + self.transmittingPage = 1 + self.factory.pageStart(partialContent) + + def handleStatus_206(self): + self.handleStatus_200(partialContent=1) + + def handleResponsePart(self, data): + if self.transmittingPage: + self.factory.pagePart(data) + + def handleResponseEnd(self): + if self.length: + self.transmittingPage = 0 + self.factory.noPage( + Failure( + PartialDownloadError(self.status))) + if self.transmittingPage: + self.factory.pageEnd() + self.transmittingPage = 0 + if self.failed: + self.factory.noPage( + Failure( + error.Error( + self.status, self.message, None))) + self.transport.loseConnection() + + +class HTTPClientFactory(protocol.ClientFactory): + """Download a given URL. + + @type deferred: Deferred + @ivar deferred: A Deferred that will fire when the content has + been retrieved. Once this is fired, the ivars `status', `version', + and `message' will be set. + + @type status: bytes + @ivar status: The status of the response. + + @type version: bytes + @ivar version: The version of the response. + + @type message: bytes + @ivar message: The text message returned with the status. + + @type response_headers: dict + @ivar response_headers: The headers that were specified in the + response from the server. + + @type method: bytes + @ivar method: The HTTP method to use in the request. This should be one of + OPTIONS, GET, HEAD, POST, PUT, DELETE, TRACE, or CONNECT (case + matters). Other values may be specified if the server being contacted + supports them. + + @type redirectLimit: int + @ivar redirectLimit: The maximum number of HTTP redirects that can occur + before it is assumed that the redirection is endless. + + @type afterFoundGet: C{bool} + @ivar afterFoundGet: Deviate from the HTTP 1.1 RFC by handling redirects + the same way as most web browsers; if the request method is POST and a + 302 status is encountered, the redirect is followed with a GET method + + @type _redirectCount: int + @ivar _redirectCount: The current number of HTTP redirects encountered. + + @ivar _disconnectedDeferred: A L{Deferred} which only fires after the last + connection associated with the request (redirects may cause multiple + connections to be required) has closed. The result Deferred will only + fire after this Deferred, so that callers can be assured that there are + no more event sources in the reactor once they get the result. + """ + + protocol = HTTPPageGetter + + url = None + scheme = None + host = b'' + port = None + path = None + + def __init__(self, url, method=b'GET', postdata=None, headers=None, + agent=b"Twisted PageGetter", timeout=0, cookies=None, + followRedirect=True, redirectLimit=20, + afterFoundGet=False): + self.followRedirect = followRedirect + self.redirectLimit = redirectLimit + self._redirectCount = 0 + self.timeout = timeout + self.agent = agent + self.afterFoundGet = afterFoundGet + if cookies is None: + cookies = {} + self.cookies = cookies + if headers is not None: + self.headers = InsensitiveDict(headers) + else: + self.headers = InsensitiveDict() + if postdata is not None: + self.headers.setdefault(b'Content-Length', + intToBytes(len(postdata))) + # just in case a broken http/1.1 decides to keep connection alive + self.headers.setdefault(b"connection", b"close") + self.postdata = postdata + self.method = _ensureValidMethod(method) + + self.setURL(url) + + self.waiting = 1 + self._disconnectedDeferred = defer.Deferred() + self.deferred = defer.Deferred() + # Make sure the first callback on the result Deferred pauses the + # callback chain until the request connection is closed. + self.deferred.addBoth(self._waitForDisconnect) + self.response_headers = None + + + def _waitForDisconnect(self, passthrough): + """ + Chain onto the _disconnectedDeferred, preserving C{passthrough}, so that + the result is only available after the associated connection has been + closed. + """ + self._disconnectedDeferred.addCallback(lambda ignored: passthrough) + return self._disconnectedDeferred + + + def __repr__(self): + return "<%s: %s>" % (self.__class__.__name__, self.url) + + def setURL(self, url): + _ensureValidURI(url.strip()) + self.url = url + uri = URI.fromBytes(url) + if uri.scheme and uri.host: + self.scheme = uri.scheme + self.host = uri.host + self.port = uri.port + self.path = uri.originForm + + def buildProtocol(self, addr): + p = protocol.ClientFactory.buildProtocol(self, addr) + p.followRedirect = self.followRedirect + p.afterFoundGet = self.afterFoundGet + if self.timeout: + timeoutCall = reactor.callLater(self.timeout, p.timeout) + self.deferred.addBoth(self._cancelTimeout, timeoutCall) + return p + + def _cancelTimeout(self, result, timeoutCall): + if timeoutCall.active(): + timeoutCall.cancel() + return result + + def gotHeaders(self, headers): + """ + Parse the response HTTP headers. + + @param headers: The response HTTP headers. + @type headers: L{dict} + """ + self.response_headers = headers + if b'set-cookie' in headers: + for cookie in headers[b'set-cookie']: + if b'=' in cookie: + cookparts = cookie.split(b';') + cook = cookparts[0] + cook.lstrip() + k, v = cook.split(b'=', 1) + self.cookies[k.lstrip()] = v.lstrip() + + def gotStatus(self, version, status, message): + """ + Set the status of the request on us. + + @param version: The HTTP version. + @type version: L{bytes} + @param status: The HTTP status code, an integer represented as a + bytestring. + @type status: L{bytes} + @param message: The HTTP status message. + @type message: L{bytes} + """ + self.version, self.status, self.message = version, status, message + + def page(self, page): + if self.waiting: + self.waiting = 0 + self.deferred.callback(page) + + def noPage(self, reason): + if self.waiting: + self.waiting = 0 + self.deferred.errback(reason) + + def clientConnectionFailed(self, _, reason): + """ + When a connection attempt fails, the request cannot be issued. If no + result has yet been provided to the result Deferred, provide the + connection failure reason as an error result. + """ + if self.waiting: + self.waiting = 0 + # If the connection attempt failed, there is nothing more to + # disconnect, so just fire that Deferred now. + self._disconnectedDeferred.callback(None) + self.deferred.errback(reason) + + + +class HTTPDownloader(HTTPClientFactory): + """ + Download to a file. + """ + protocol = HTTPPageDownloader + value = None + _log = Logger() + + def __init__(self, url, fileOrName, + method=b'GET', postdata=None, headers=None, + agent=b"Twisted client", supportPartial=False, + timeout=0, cookies=None, followRedirect=True, + redirectLimit=20, afterFoundGet=False): + self.requestedPartial = 0 + if isinstance(fileOrName, (str, unicode)): + self.fileName = fileOrName + self.file = None + if supportPartial and os.path.exists(self.fileName): + fileLength = os.path.getsize(self.fileName) + if fileLength: + self.requestedPartial = fileLength + if headers == None: + headers = {} + headers[b"range"] = b"bytes=" + intToBytes(fileLength) + b"-" + else: + self.file = fileOrName + HTTPClientFactory.__init__( + self, url, method=method, postdata=postdata, headers=headers, + agent=agent, timeout=timeout, cookies=cookies, + followRedirect=followRedirect, redirectLimit=redirectLimit, + afterFoundGet=afterFoundGet) + + + def gotHeaders(self, headers): + HTTPClientFactory.gotHeaders(self, headers) + if self.requestedPartial: + contentRange = headers.get(b"content-range", None) + if not contentRange: + # server doesn't support partial requests, oh well + self.requestedPartial = 0 + return + start, end, realLength = http.parseContentRange(contentRange[0]) + if start != self.requestedPartial: + # server is acting weirdly + self.requestedPartial = 0 + + + def openFile(self, partialContent): + if partialContent: + file = open(self.fileName, 'rb+') + file.seek(0, 2) + else: + file = open(self.fileName, 'wb') + return file + + def pageStart(self, partialContent): + """Called on page download start. + + @param partialContent: tells us if the download is partial download we requested. + """ + if partialContent and not self.requestedPartial: + raise ValueError("we shouldn't get partial content response if we didn't want it!") + if self.waiting: + try: + if not self.file: + self.file = self.openFile(partialContent) + except IOError: + #raise + self.deferred.errback(Failure()) + + def pagePart(self, data): + if not self.file: + return + try: + self.file.write(data) + except IOError: + #raise + self.file = None + self.deferred.errback(Failure()) + + + def noPage(self, reason): + """ + Close the storage file and errback the waiting L{Deferred} with the + given reason. + """ + if self.waiting: + self.waiting = 0 + if self.file: + try: + self.file.close() + except: + self._log.failure("Error closing HTTPDownloader file") + self.deferred.errback(reason) + + + def pageEnd(self): + self.waiting = 0 + if not self.file: + return + try: + self.file.close() + except IOError: + self.deferred.errback(Failure()) + return + self.deferred.callback(self.value) + + + +class URI(object): + """ + A URI object. + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21} + """ + def __init__(self, scheme, netloc, host, port, path, params, query, + fragment): + """ + @type scheme: L{bytes} + @param scheme: URI scheme specifier. + + @type netloc: L{bytes} + @param netloc: Network location component. + + @type host: L{bytes} + @param host: Host name. For IPv6 address literals the brackets are + stripped. + + @type port: L{int} + @param port: Port number. + + @type path: L{bytes} + @param path: Hierarchical path. + + @type params: L{bytes} + @param params: Parameters for last path segment. + + @type query: L{bytes} + @param query: Query string. + + @type fragment: L{bytes} + @param fragment: Fragment identifier. + """ + self.scheme = scheme + self.netloc = netloc + self.host = host.strip(b'[]') + self.port = port + self.path = path + self.params = params + self.query = query + self.fragment = fragment + + + @classmethod + def fromBytes(cls, uri, defaultPort=None): + """ + Parse the given URI into a L{URI}. + + @type uri: C{bytes} + @param uri: URI to parse. + + @type defaultPort: C{int} or L{None} + @param defaultPort: An alternate value to use as the port if the URI + does not include one. + + @rtype: L{URI} + @return: Parsed URI instance. + """ + uri = uri.strip() + scheme, netloc, path, params, query, fragment = http.urlparse(uri) + + if defaultPort is None: + if scheme == b'https': + defaultPort = 443 + else: + defaultPort = 80 + + if b':' in netloc: + host, port = netloc.rsplit(b':', 1) + try: + port = int(port) + except ValueError: + host, port = netloc, defaultPort + else: + host, port = netloc, defaultPort + return cls(scheme, netloc, host, port, path, params, query, fragment) + + + def toBytes(self): + """ + Assemble the individual parts of the I{URI} into a fully formed I{URI}. + + @rtype: C{bytes} + @return: A fully formed I{URI}. + """ + return urlunparse( + (self.scheme, self.netloc, self.path, self.params, self.query, + self.fragment)) + + + @property + def originForm(self): + """ + The absolute I{URI} path including I{URI} parameters, query string and + fragment identifier. + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-21#section-5.3} + + @return: The absolute path in original form. + @rtype: L{bytes} + """ + # The HTTP bis draft says the origin form should not include the + # fragment. + path = urlunparse( + (b'', b'', self.path, self.params, self.query, b'')) + if path == b'': + path = b'/' + return path + + + +def _urljoin(base, url): + """ + Construct a full ("absolute") URL by combining a "base URL" with another + URL. Informally, this uses components of the base URL, in particular the + addressing scheme, the network location and (part of) the path, to provide + missing components in the relative URL. + + Additionally, the fragment identifier is preserved according to the HTTP + 1.1 bis draft. + + @type base: C{bytes} + @param base: Base URL. + + @type url: C{bytes} + @param url: URL to combine with C{base}. + + @return: An absolute URL resulting from the combination of C{base} and + C{url}. + + @see: L{urlparse.urljoin} + + @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2} + """ + base, baseFrag = urldefrag(base) + url, urlFrag = urldefrag(urljoin(base, url)) + return urljoin(url, b'#' + (urlFrag or baseFrag)) + + + +def _makeGetterFactory(url, factoryFactory, contextFactory=None, + *args, **kwargs): + """ + Create and connect an HTTP page getting factory. + + Any additional positional or keyword arguments are used when calling + C{factoryFactory}. + + @param factoryFactory: Factory factory that is called with C{url}, C{args} + and C{kwargs} to produce the getter + + @param contextFactory: Context factory to use when creating a secure + connection, defaulting to L{None} + + @return: The factory created by C{factoryFactory} + """ + uri = URI.fromBytes(_ensureValidURI(url.strip())) + factory = factoryFactory(url, *args, **kwargs) + if uri.scheme == b'https': + from twisted.internet import ssl + if contextFactory is None: + contextFactory = ssl.ClientContextFactory() + reactor.connectSSL( + nativeString(uri.host), uri.port, factory, contextFactory) + else: + reactor.connectTCP(nativeString(uri.host), uri.port, factory) + return factory + + +_GETPAGE_REPLACEMENT_TEXT = "https://pypi.org/project/treq/ or twisted.web.client.Agent" + +def _deprecateGetPageClasses(): + """ + Mark the protocols and factories associated with L{getPage} and + L{downloadPage} as deprecated. + """ + for klass in [ + HTTPPageGetter, HTTPPageDownloader, + HTTPClientFactory, HTTPDownloader + ]: + deprecatedModuleAttribute( + Version("Twisted", 16, 7, 0), + getDeprecationWarningString( + klass, + Version("Twisted", 16, 7, 0), + replacement=_GETPAGE_REPLACEMENT_TEXT) + .split("; ")[1], + klass.__module__, + klass.__name__) + +_deprecateGetPageClasses() + + + +@deprecated(Version("Twisted", 16, 7, 0), + _GETPAGE_REPLACEMENT_TEXT) +def getPage(url, contextFactory=None, *args, **kwargs): + """ + Download a web page as a string. + + Download a page. Return a deferred, which will callback with a + page (as a string) or errback with a description of the error. + + See L{HTTPClientFactory} to see what extra arguments can be passed. + """ + return _makeGetterFactory( + url, + HTTPClientFactory, + contextFactory=contextFactory, + *args, **kwargs).deferred + + + +@deprecated(Version("Twisted", 16, 7, 0), + _GETPAGE_REPLACEMENT_TEXT) +def downloadPage(url, file, contextFactory=None, *args, **kwargs): + """ + Download a web page to a file. + + @param file: path to file on filesystem, or file-like object. + + See HTTPDownloader to see what extra args can be passed. + """ + factoryFactory = lambda url, *a, **kw: HTTPDownloader(url, file, *a, **kw) + return _makeGetterFactory( + url, + factoryFactory, + contextFactory=contextFactory, + *args, **kwargs).deferred + + +# The code which follows is based on the new HTTP client implementation. It +# should be significantly better than anything above, though it is not yet +# feature equivalent. + +from twisted.web.error import SchemeNotSupported +from twisted.web._newclient import ( + HTTP11ClientProtocol, + PotentialDataLoss, + Request, + RequestGenerationFailed, + RequestNotSent, + RequestTransmissionFailed, + Response, + ResponseDone, + ResponseFailed, + ResponseNeverReceived, + _WrapperException, + ) + + + +try: + from OpenSSL import SSL +except ImportError: + SSL = None +else: + from twisted.internet.ssl import (CertificateOptions, + platformTrust, + optionsForClientTLS) + + +def _requireSSL(decoratee): + """ + The decorated method requires pyOpenSSL to be present, or it raises + L{NotImplementedError}. + + @param decoratee: A function which requires pyOpenSSL. + @type decoratee: L{callable} + + @return: A function which raises L{NotImplementedError} if pyOpenSSL is not + installed; otherwise, if it is installed, simply return C{decoratee}. + @rtype: L{callable} + """ + if SSL is None: + @wraps(decoratee) + def raiseNotImplemented(*a, **kw): + """ + pyOpenSSL is not available. + + @param a: The positional arguments for C{decoratee}. + + @param kw: The keyword arguments for C{decoratee}. + + @raise NotImplementedError: Always. + """ + raise NotImplementedError("SSL support unavailable") + return raiseNotImplemented + return decoratee + + + +class WebClientContextFactory(object): + """ + This class is deprecated. Please simply use L{Agent} as-is, or if you want + to customize something, use L{BrowserLikePolicyForHTTPS}. + + A L{WebClientContextFactory} is an HTTPS policy which totally ignores the + hostname and port. It performs basic certificate verification, however the + lack of validation of service identity (e.g. hostname validation) means it + is still vulnerable to man-in-the-middle attacks. Don't use it any more. + """ + + def _getCertificateOptions(self, hostname, port): + """ + Return a L{CertificateOptions}. + + @param hostname: ignored + + @param port: ignored + + @return: A new CertificateOptions instance. + @rtype: L{CertificateOptions} + """ + return CertificateOptions( + method=SSL.SSLv23_METHOD, + trustRoot=platformTrust() + ) + + + @_requireSSL + def getContext(self, hostname, port): + """ + Return an L{OpenSSL.SSL.Context}. + + @param hostname: ignored + @param port: ignored + + @return: A new SSL context. + @rtype: L{OpenSSL.SSL.Context} + """ + return self._getCertificateOptions(hostname, port).getContext() + + + +@implementer(IPolicyForHTTPS) +class BrowserLikePolicyForHTTPS(object): + """ + SSL connection creator for web clients. + """ + def __init__(self, trustRoot=None): + self._trustRoot = trustRoot + + + @_requireSSL + def creatorForNetloc(self, hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a + given network location. + + @param tls: The TLS protocol to create a connection for. + @type tls: L{twisted.protocols.tls.TLSMemoryBIOProtocol} + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: a connection creator with appropriate verification + restrictions set + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + return optionsForClientTLS(hostname.decode("ascii"), + trustRoot=self._trustRoot) + + + +deprecatedModuleAttribute(Version("Twisted", 14, 0, 0), + getDeprecationWarningString( + WebClientContextFactory, + Version("Twisted", 14, 0, 0), + replacement=BrowserLikePolicyForHTTPS) + .split("; ")[1], + WebClientContextFactory.__module__, + WebClientContextFactory.__name__) + + + +@implementer(IPolicyForHTTPS) +class HostnameCachingHTTPSPolicy(object): + """ + IPolicyForHTTPS that wraps a L{IPolicyForHTTPS} and caches the created + L{IOpenSSLClientConnectionCreator}. + + This policy will cache up to C{cacheSize} + L{client connection creators <twisted.internet.interfaces. + IOpenSSLClientConnectionCreator>} for reuse in subsequent requests to + the same hostname. + + @ivar _policyForHTTPS: See C{policyforHTTPS} parameter of L{__init__}. + + @ivar _cache: A cache associating hostnames to their + L{client connection creators <twisted.internet.interfaces. + IOpenSSLClientConnectionCreator>}. + @type _cache: L{collections.OrderedDict} + + @ivar _cacheSize: See C{cacheSize} parameter of L{__init__}. + + @since: Twisted 19.2.0 + """ + + def __init__(self, policyforHTTPS, cacheSize=20): + """ + @param policyforHTTPS: The IPolicyForHTTPS to wrap. + @type policyforHTTPS: L{IPolicyForHTTPS} + + @param cacheSize: The maximum size of the hostname cache. + @type cacheSize: L{int} + """ + self._policyForHTTPS = policyforHTTPS + self._cache = collections.OrderedDict() + self._cacheSize = cacheSize + + + def creatorForNetloc(self, hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} for a + given network location and cache it for future use. + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: a connection creator with appropriate verification + restrictions set + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + host = hostname.decode("ascii") + try: + creator = self._cache.pop(host) + except KeyError: + creator = self._policyForHTTPS.creatorForNetloc(hostname, port) + + self._cache[host] = creator + if len(self._cache) > self._cacheSize: + self._cache.popitem(last=False) + + return creator + + + +@implementer(IOpenSSLContextFactory) +class _ContextFactoryWithContext(object): + """ + A L{_ContextFactoryWithContext} is like a + L{twisted.internet.ssl.ContextFactory} with a pre-created context. + + @ivar _context: A Context. + @type _context: L{OpenSSL.SSL.Context} + """ + + def __init__(self, context): + """ + Initialize a L{_ContextFactoryWithContext} with a context. + + @param context: An SSL context. + @type context: L{OpenSSL.SSL.Context} + """ + self._context = context + + + def getContext(self): + """ + Return the context created by + L{_DeprecatedToCurrentPolicyForHTTPS._webContextFactory}. + + @return: A context. + @rtype context: L{OpenSSL.SSL.Context} + """ + return self._context + + + +@implementer(IPolicyForHTTPS) +class _DeprecatedToCurrentPolicyForHTTPS(object): + """ + Adapt a web context factory to a normal context factory. + + @ivar _webContextFactory: An object providing a getContext method with + C{hostname} and C{port} arguments. + @type _webContextFactory: L{WebClientContextFactory} (or object with a + similar C{getContext} method). + """ + def __init__(self, webContextFactory): + """ + Wrap a web context factory in an L{IPolicyForHTTPS}. + + @param webContextFactory: An object providing a getContext method with + C{hostname} and C{port} arguments. + @type webContextFactory: L{WebClientContextFactory} (or object with a + similar C{getContext} method). + """ + self._webContextFactory = webContextFactory + + + def creatorForNetloc(self, hostname, port): + """ + Called the wrapped web context factory's C{getContext} method with a + hostname and port number and return the resulting context object. + + @param hostname: The hostname part of the URI. + @type hostname: L{bytes} + + @param port: The port part of the URI. + @type port: L{int} + + @return: A context factory. + @rtype: L{IOpenSSLContextFactory} + """ + context = self._webContextFactory.getContext(hostname, port) + return _ContextFactoryWithContext(context) + + + +@implementer(IBodyProducer) +class FileBodyProducer(object): + """ + L{FileBodyProducer} produces bytes from an input file object incrementally + and writes them to a consumer. + + Since file-like objects cannot be read from in an event-driven manner, + L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from + the file. This process is also paused and resumed based on notifications + from the L{IConsumer} provider being written to. + + The file is closed after it has been read, or if the producer is stopped + early. + + @ivar _inputFile: Any file-like object, bytes read from which will be + written to a consumer. + + @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to + schedule all reads. + + @ivar _readSize: The number of bytes to read from C{_inputFile} at a time. + """ + + def __init__(self, inputFile, cooperator=task, readSize=2 ** 16): + self._inputFile = inputFile + self._cooperate = cooperator.cooperate + self._readSize = readSize + self.length = self._determineLength(inputFile) + + + def _determineLength(self, fObj): + """ + Determine how many bytes can be read out of C{fObj} (assuming it is not + modified from this point on). If the determination cannot be made, + return C{UNKNOWN_LENGTH}. + """ + try: + seek = fObj.seek + tell = fObj.tell + except AttributeError: + return UNKNOWN_LENGTH + originalPosition = tell() + seek(0, os.SEEK_END) + end = tell() + seek(originalPosition, os.SEEK_SET) + return end - originalPosition + + + def stopProducing(self): + """ + Permanently stop writing bytes from the file to the consumer by + stopping the underlying L{CooperativeTask}. + """ + self._inputFile.close() + self._task.stop() + + + def startProducing(self, consumer): + """ + Start a cooperative task which will read bytes from the input file and + write them to C{consumer}. Return a L{Deferred} which fires after all + bytes have been written. If this L{Deferred} is cancelled before it is + fired, stop reading and writing bytes. + + @param consumer: Any L{IConsumer} provider + """ + self._task = self._cooperate(self._writeloop(consumer)) + d = self._task.whenDone() + def maybeStopped(reason): + if reason.check(defer.CancelledError): + self.stopProducing() + elif reason.check(task.TaskStopped): + pass + else: + return reason + # IBodyProducer.startProducing's Deferred isn't supposed to fire if + # stopProducing is called. + return defer.Deferred() + d.addCallbacks(lambda ignored: None, maybeStopped) + return d + + + def _writeloop(self, consumer): + """ + Return an iterator which reads one chunk of bytes from the input file + and writes them to the consumer for each time it is iterated. + """ + while True: + bytes = self._inputFile.read(self._readSize) + if not bytes: + self._inputFile.close() + break + consumer.write(bytes) + yield None + + + def pauseProducing(self): + """ + Temporarily suspend copying bytes from the input file to the consumer + by pausing the L{CooperativeTask} which drives that activity. + """ + self._task.pause() + + + def resumeProducing(self): + """ + Undo the effects of a previous C{pauseProducing} and resume copying + bytes to the consumer by resuming the L{CooperativeTask} which drives + the write activity. + """ + self._task.resume() + + + +class _HTTP11ClientFactory(protocol.Factory): + """ + A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}. + + @ivar _quiescentCallback: The quiescent callback to be passed to protocol + instances, used to return them to the connection pool. + + @ivar _metadata: Metadata about the low-level connection details, + used to make the repr more useful. + + @since: 11.1 + """ + def __init__(self, quiescentCallback, metadata): + self._quiescentCallback = quiescentCallback + self._metadata = metadata + + + def __repr__(self): + return '_HTTP11ClientFactory({}, {})'.format( + self._quiescentCallback, + self._metadata) + + def buildProtocol(self, addr): + return HTTP11ClientProtocol(self._quiescentCallback) + + + +class _RetryingHTTP11ClientProtocol(object): + """ + A wrapper for L{HTTP11ClientProtocol} that automatically retries requests. + + @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}. + + @ivar _newConnection: A callable that creates a new connection for a + retry. + """ + + def __init__(self, clientProtocol, newConnection): + self._clientProtocol = clientProtocol + self._newConnection = newConnection + + + def _shouldRetry(self, method, exception, bodyProducer): + """ + Indicate whether request should be retried. + + Only returns C{True} if method is idempotent, no response was + received, the reason for the failed request was not due to + user-requested cancellation, and no body was sent. The latter + requirement may be relaxed in the future, and PUT added to approved + method list. + + @param method: The method of the request. + @type method: L{bytes} + """ + if method not in (b"GET", b"HEAD", b"OPTIONS", b"DELETE", b"TRACE"): + return False + if not isinstance(exception, (RequestNotSent, + RequestTransmissionFailed, + ResponseNeverReceived)): + return False + if isinstance(exception, _WrapperException): + for aFailure in exception.reasons: + if aFailure.check(defer.CancelledError): + return False + if bodyProducer is not None: + return False + return True + + + def request(self, request): + """ + Do a request, and retry once (with a new connection) if it fails in + a retryable manner. + + @param request: A L{Request} instance that will be requested using the + wrapped protocol. + """ + d = self._clientProtocol.request(request) + + def failed(reason): + if self._shouldRetry(request.method, reason.value, + request.bodyProducer): + return self._newConnection().addCallback( + lambda connection: connection.request(request)) + else: + return reason + d.addErrback(failed) + return d + + + +class HTTPConnectionPool(object): + """ + A pool of persistent HTTP connections. + + Features: + - Cached connections will eventually time out. + - Limits on maximum number of persistent connections. + + Connections are stored using keys, which should be chosen such that any + connections stored under a given key can be used interchangeably. + + Failed requests done using previously cached connections will be retried + once if they use an idempotent method (e.g. GET), in case the HTTP server + timed them out. + + @ivar persistent: Boolean indicating whether connections should be + persistent. Connections are persistent by default. + + @ivar maxPersistentPerHost: The maximum number of cached persistent + connections for a C{host:port} destination. + @type maxPersistentPerHost: C{int} + + @ivar cachedConnectionTimeout: Number of seconds a cached persistent + connection will stay open before disconnecting. + + @ivar retryAutomatically: C{boolean} indicating whether idempotent + requests should be retried once if no response was received. + + @ivar _factory: The factory used to connect to the proxy. + + @ivar _connections: Map (scheme, host, port) to lists of + L{HTTP11ClientProtocol} instances. + + @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a + C{IDelayedCall} instance of their timeout. + + @since: 12.1 + """ + + _factory = _HTTP11ClientFactory + maxPersistentPerHost = 2 + cachedConnectionTimeout = 240 + retryAutomatically = True + _log = Logger() + + def __init__(self, reactor, persistent=True): + self._reactor = reactor + self.persistent = persistent + self._connections = {} + self._timeouts = {} + + + def getConnection(self, key, endpoint): + """ + Supply a connection, newly created or retrieved from the pool, to be + used for one HTTP request. + + The connection will remain out of the pool (not available to be + returned from future calls to this method) until one HTTP request has + been completed over it. + + Afterwards, if the connection is still open, it will automatically be + added to the pool. + + @param key: A unique key identifying connections that can be used + interchangeably. + + @param endpoint: An endpoint that can be used to open a new connection + if no cached connection is available. + + @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol} + (or a wrapper) that can be used to send a single HTTP request. + """ + # Try to get cached version: + connections = self._connections.get(key) + while connections: + connection = connections.pop(0) + # Cancel timeout: + self._timeouts[connection].cancel() + del self._timeouts[connection] + if connection.state == "QUIESCENT": + if self.retryAutomatically: + newConnection = lambda: self._newConnection(key, endpoint) + connection = _RetryingHTTP11ClientProtocol( + connection, newConnection) + return defer.succeed(connection) + + return self._newConnection(key, endpoint) + + + def _newConnection(self, key, endpoint): + """ + Create a new connection. + + This implements the new connection code path for L{getConnection}. + """ + def quiescentCallback(protocol): + self._putConnection(key, protocol) + factory = self._factory(quiescentCallback, repr(endpoint)) + return endpoint.connect(factory) + + + def _removeConnection(self, key, connection): + """ + Remove a connection from the cache and disconnect it. + """ + connection.transport.loseConnection() + self._connections[key].remove(connection) + del self._timeouts[connection] + + + def _putConnection(self, key, connection): + """ + Return a persistent connection to the pool. This will be called by + L{HTTP11ClientProtocol} when the connection becomes quiescent. + """ + if connection.state != "QUIESCENT": + # Log with traceback for debugging purposes: + try: + raise RuntimeError( + "BUG: Non-quiescent protocol added to connection pool.") + except: + self._log.failure( + "BUG: Non-quiescent protocol added to connection pool.") + return + connections = self._connections.setdefault(key, []) + if len(connections) == self.maxPersistentPerHost: + dropped = connections.pop(0) + dropped.transport.loseConnection() + self._timeouts[dropped].cancel() + del self._timeouts[dropped] + connections.append(connection) + cid = self._reactor.callLater(self.cachedConnectionTimeout, + self._removeConnection, + key, connection) + self._timeouts[connection] = cid + + + def closeCachedConnections(self): + """ + Close all persistent connections and remove them from the pool. + + @return: L{defer.Deferred} that fires when all connections have been + closed. + """ + results = [] + for protocols in itervalues(self._connections): + for p in protocols: + results.append(p.abort()) + self._connections = {} + for dc in itervalues(self._timeouts): + dc.cancel() + self._timeouts = {} + return defer.gatherResults(results).addCallback(lambda ign: None) + + + +class _AgentBase(object): + """ + Base class offering common facilities for L{Agent}-type classes. + + @ivar _reactor: The C{IReactorTime} implementation which will be used by + the pool, and perhaps by subclasses as well. + + @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections. + """ + + def __init__(self, reactor, pool): + if pool is None: + pool = HTTPConnectionPool(reactor, False) + self._reactor = reactor + self._pool = pool + + + def _computeHostValue(self, scheme, host, port): + """ + Compute the string to use for the value of the I{Host} header, based on + the given scheme, host name, and port number. + """ + if (isIPv6Address(nativeString(host))): + host = b'[' + host + b']' + if (scheme, port) in ((b'http', 80), (b'https', 443)): + return host + return host + b":" + intToBytes(port) + + + def _requestWithEndpoint(self, key, endpoint, method, parsedURI, + headers, bodyProducer, requestPath): + """ + Issue a new request, given the endpoint and the path sent as part of + the request. + """ + if not isinstance(method, bytes): + raise TypeError('method={!r} is {}, but must be bytes'.format( + method, type(method))) + + method = _ensureValidMethod(method) + + # Create minimal headers, if necessary: + if headers is None: + headers = Headers() + if not headers.hasHeader(b'host'): + headers = headers.copy() + headers.addRawHeader( + b'host', self._computeHostValue(parsedURI.scheme, + parsedURI.host, + parsedURI.port)) + + d = self._pool.getConnection(key, endpoint) + def cbConnected(proto): + return proto.request( + Request._construct(method, requestPath, headers, bodyProducer, + persistent=self._pool.persistent, + parsedURI=parsedURI)) + d.addCallback(cbConnected) + return d + + + +@implementer(IAgentEndpointFactory) +class _StandardEndpointFactory(object): + """ + Standard HTTP endpoint destinations - TCP for HTTP, TCP+TLS for HTTPS. + + @ivar _policyForHTTPS: A web context factory which will be used to create + SSL context objects for any SSL connections the agent needs to make. + + @ivar _connectTimeout: If not L{None}, the timeout passed to + L{HostnameEndpoint} for specifying the connection timeout. + + @ivar _bindAddress: If not L{None}, the address passed to + L{HostnameEndpoint} for specifying the local address to bind to. + """ + def __init__(self, reactor, contextFactory, connectTimeout, bindAddress): + """ + @param reactor: A provider to use to create endpoints. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param contextFactory: A factory for TLS contexts, to control the + verification parameters of OpenSSL. + @type contextFactory: L{IPolicyForHTTPS}. + + @param connectTimeout: The amount of time that this L{Agent} will wait + for the peer to accept a connection. + @type connectTimeout: L{float} or L{None} + + @param bindAddress: The local address for client sockets to bind to. + @type bindAddress: L{bytes} or L{None} + """ + self._reactor = reactor + self._policyForHTTPS = contextFactory + self._connectTimeout = connectTimeout + self._bindAddress = bindAddress + + + def endpointForURI(self, uri): + """ + Connect directly over TCP for C{b'http'} scheme, and TLS for + C{b'https'}. + + @param uri: L{URI} to connect to. + + @return: Endpoint to connect to. + @rtype: L{IStreamClientEndpoint} + """ + kwargs = {} + if self._connectTimeout is not None: + kwargs['timeout'] = self._connectTimeout + kwargs['bindAddress'] = self._bindAddress + + try: + host = nativeString(uri.host) + except UnicodeDecodeError: + raise ValueError(("The host of the provided URI ({uri.host!r}) " + "contains non-ASCII octets, it should be ASCII " + "decodable.").format(uri=uri)) + + endpoint = HostnameEndpoint(self._reactor, host, uri.port, **kwargs) + if uri.scheme == b'http': + return endpoint + elif uri.scheme == b'https': + connectionCreator = self._policyForHTTPS.creatorForNetloc(uri.host, + uri.port) + return wrapClientTLS(connectionCreator, endpoint) + else: + raise SchemeNotSupported("Unsupported scheme: %r" % (uri.scheme,)) + + + +@implementer(IAgent) +class Agent(_AgentBase): + """ + L{Agent} is a very basic HTTP client. It supports I{HTTP} and I{HTTPS} + scheme URIs. + + @ivar _pool: An L{HTTPConnectionPool} instance. + + @ivar _endpointFactory: The L{IAgentEndpointFactory} which will + be used to create endpoints for outgoing connections. + + @since: 9.0 + """ + + def __init__(self, reactor, + contextFactory=BrowserLikePolicyForHTTPS(), + connectTimeout=None, bindAddress=None, + pool=None): + """ + Create an L{Agent}. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param contextFactory: A factory for TLS contexts, to control the + verification parameters of OpenSSL. The default is to use a + L{BrowserLikePolicyForHTTPS}, so unless you have special + requirements you can leave this as-is. + @type contextFactory: L{IPolicyForHTTPS}. + + @param connectTimeout: The amount of time that this L{Agent} will wait + for the peer to accept a connection. + @type connectTimeout: L{float} + + @param bindAddress: The local address for client sockets to bind to. + @type bindAddress: L{bytes} + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + """ + if not IPolicyForHTTPS.providedBy(contextFactory): + warnings.warn( + repr(contextFactory) + + " was passed as the HTTPS policy for an Agent, but it does " + "not provide IPolicyForHTTPS. Since Twisted 14.0, you must " + "pass a provider of IPolicyForHTTPS.", + stacklevel=2, category=DeprecationWarning + ) + contextFactory = _DeprecatedToCurrentPolicyForHTTPS(contextFactory) + endpointFactory = _StandardEndpointFactory( + reactor, contextFactory, connectTimeout, bindAddress) + self._init(reactor, endpointFactory, pool) + + + @classmethod + def usingEndpointFactory(cls, reactor, endpointFactory, pool=None): + """ + Create a new L{Agent} that will use the endpoint factory to figure + out how to connect to the server. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param endpointFactory: Used to construct endpoints which the + HTTP client will connect with. + @type endpointFactory: an L{IAgentEndpointFactory} provider. + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + + @return: A new L{Agent}. + """ + agent = cls.__new__(cls) + agent._init(reactor, endpointFactory, pool) + return agent + + + def _init(self, reactor, endpointFactory, pool): + """ + Initialize a new L{Agent}. + + @param reactor: A reactor for this L{Agent} to place outgoing + connections. + @type reactor: see L{HostnameEndpoint.__init__} for acceptable reactor + types. + + @param endpointFactory: Used to construct endpoints which the + HTTP client will connect with. + @type endpointFactory: an L{IAgentEndpointFactory} provider. + + @param pool: An L{HTTPConnectionPool} instance, or L{None}, in which + case a non-persistent L{HTTPConnectionPool} instance will be + created. + @type pool: L{HTTPConnectionPool} + + @return: A new L{Agent}. + """ + _AgentBase.__init__(self, reactor, pool) + self._endpointFactory = endpointFactory + + + def _getEndpoint(self, uri): + """ + Get an endpoint for the given URI, using C{self._endpointFactory}. + + @param uri: The URI of the request. + @type uri: L{URI} + + @return: An endpoint which can be used to connect to given address. + """ + return self._endpointFactory.endpointForURI(uri) + + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a request to the server indicated by the given C{uri}. + + An existing connection from the connection pool may be used or a new + one may be created. + + I{HTTP} and I{HTTPS} schemes are supported in C{uri}. + + @see: L{twisted.web.iweb.IAgent.request} + """ + uri = _ensureValidURI(uri.strip()) + parsedURI = URI.fromBytes(uri) + try: + endpoint = self._getEndpoint(parsedURI) + except SchemeNotSupported: + return defer.fail(Failure()) + key = (parsedURI.scheme, parsedURI.host, parsedURI.port) + return self._requestWithEndpoint(key, endpoint, method, parsedURI, + headers, bodyProducer, + parsedURI.originForm) + + + +@implementer(IAgent) +class ProxyAgent(_AgentBase): + """ + An HTTP agent able to cross HTTP proxies. + + @ivar _proxyEndpoint: The endpoint used to connect to the proxy. + + @since: 11.1 + """ + + def __init__(self, endpoint, reactor=None, pool=None): + if reactor is None: + from twisted.internet import reactor + _AgentBase.__init__(self, reactor, pool) + self._proxyEndpoint = endpoint + + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a new request via the configured proxy. + """ + uri = _ensureValidURI(uri.strip()) + + # Cache *all* connections under the same key, since we are only + # connecting to a single destination, the proxy: + key = ("http-proxy", self._proxyEndpoint) + + # To support proxying HTTPS via CONNECT, we will use key + # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that + # wraps _proxyEndpoint with an additional callback to do the CONNECT. + return self._requestWithEndpoint(key, self._proxyEndpoint, method, + URI.fromBytes(uri), headers, + bodyProducer, uri) + + + +class _FakeUrllib2Request(object): + """ + A fake C{urllib2.Request} object for C{cookielib} to work with. + + @see: U{http://docs.python.org/library/urllib2.html#request-objects} + + @type uri: native L{str} + @ivar uri: Request URI. + + @type headers: L{twisted.web.http_headers.Headers} + @ivar headers: Request headers. + + @type type: native L{str} + @ivar type: The scheme of the URI. + + @type host: native L{str} + @ivar host: The host[:port] of the URI. + + @since: 11.1 + """ + def __init__(self, uri): + """ + Create a fake Urllib2 request. + + @param uri: Request URI. + @type uri: L{bytes} + """ + self.uri = nativeString(uri) + self.headers = Headers() + + _uri = URI.fromBytes(uri) + self.type = nativeString(_uri.scheme) + self.host = nativeString(_uri.host) + + if (_uri.scheme, _uri.port) not in ((b'http', 80), (b'https', 443)): + # If it's not a schema on the regular port, add the port. + self.host += ":" + str(_uri.port) + + if _PY3: + self.origin_req_host = nativeString(_uri.host) + self.unverifiable = lambda _: False + + + def has_header(self, header): + return self.headers.hasHeader(networkString(header)) + + + def add_unredirected_header(self, name, value): + self.headers.addRawHeader(networkString(name), networkString(value)) + + + def get_full_url(self): + return self.uri + + + def get_header(self, name, default=None): + headers = self.headers.getRawHeaders(networkString(name), default) + if headers is not None: + headers = [nativeString(x) for x in headers] + return headers[0] + return None + + + def get_host(self): + return self.host + + + def get_type(self): + return self.type + + + def is_unverifiable(self): + # In theory this shouldn't be hardcoded. + return False + + + +class _FakeUrllib2Response(object): + """ + A fake C{urllib2.Response} object for C{cookielib} to work with. + + @type response: C{twisted.web.iweb.IResponse} + @ivar response: Underlying Twisted Web response. + + @since: 11.1 + """ + def __init__(self, response): + self.response = response + + + def info(self): + class _Meta(object): + def getheaders(zelf, name): + # PY2 + headers = self.response.headers.getRawHeaders(name, []) + return headers + def get_all(zelf, name, default): + # PY3 + headers = self.response.headers.getRawHeaders( + networkString(name), default) + h = [nativeString(x) for x in headers] + return h + return _Meta() + + + +@implementer(IAgent) +class CookieAgent(object): + """ + L{CookieAgent} extends the basic L{Agent} to add RFC-compliant + handling of HTTP cookies. Cookies are written to and extracted + from a C{cookielib.CookieJar} instance. + + The same cookie jar instance will be used for any requests through this + agent, mutating it whenever a I{Set-Cookie} header appears in a response. + + @type _agent: L{twisted.web.client.Agent} + @ivar _agent: Underlying Twisted Web agent to issue requests through. + + @type cookieJar: C{cookielib.CookieJar} + @ivar cookieJar: Initialized cookie jar to read cookies from and store + cookies to. + + @since: 11.1 + """ + def __init__(self, agent, cookieJar): + self._agent = agent + self.cookieJar = cookieJar + + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Issue a new request to the wrapped L{Agent}. + + Send a I{Cookie} header if a cookie for C{uri} is stored in + L{CookieAgent.cookieJar}. Cookies are automatically extracted and + stored from requests. + + If a C{'cookie'} header appears in C{headers} it will override the + automatic cookie header obtained from the cookie jar. + + @see: L{Agent.request} + """ + if headers is None: + headers = Headers() + lastRequest = _FakeUrllib2Request(uri) + # Setting a cookie header explicitly will disable automatic request + # cookies. + if not headers.hasHeader(b'cookie'): + self.cookieJar.add_cookie_header(lastRequest) + cookieHeader = lastRequest.get_header('Cookie', None) + if cookieHeader is not None: + headers = headers.copy() + headers.addRawHeader(b'cookie', networkString(cookieHeader)) + + d = self._agent.request(method, uri, headers, bodyProducer) + d.addCallback(self._extractCookies, lastRequest) + return d + + + def _extractCookies(self, response, request): + """ + Extract response cookies and store them in the cookie jar. + + @type response: L{twisted.web.iweb.IResponse} + @param response: Twisted Web response. + + @param request: A urllib2 compatible request object. + """ + resp = _FakeUrllib2Response(response) + self.cookieJar.extract_cookies(resp, request) + return response + + + +class GzipDecoder(proxyForInterface(IResponse)): + """ + A wrapper for a L{Response} instance which handles gzip'ed body. + + @ivar original: The original L{Response} object. + + @since: 11.1 + """ + + def __init__(self, response): + self.original = response + self.length = UNKNOWN_LENGTH + + + def deliverBody(self, protocol): + """ + Override C{deliverBody} to wrap the given C{protocol} with + L{_GzipProtocol}. + """ + self.original.deliverBody(_GzipProtocol(protocol, self.original)) + + + +class _GzipProtocol(proxyForInterface(IProtocol)): + """ + A L{Protocol} implementation which wraps another one, transparently + decompressing received data. + + @ivar _zlibDecompress: A zlib decompress object used to decompress the data + stream. + + @ivar _response: A reference to the original response, in case of errors. + + @since: 11.1 + """ + + def __init__(self, protocol, response): + self.original = protocol + self._response = response + self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS) + + + def dataReceived(self, data): + """ + Decompress C{data} with the zlib decompressor, forwarding the raw data + to the original protocol. + """ + try: + rawData = self._zlibDecompress.decompress(data) + except zlib.error: + raise ResponseFailed([Failure()], self._response) + if rawData: + self.original.dataReceived(rawData) + + + def connectionLost(self, reason): + """ + Forward the connection lost event, flushing remaining data from the + decompressor if any. + """ + try: + rawData = self._zlibDecompress.flush() + except zlib.error: + raise ResponseFailed([reason, Failure()], self._response) + if rawData: + self.original.dataReceived(rawData) + self.original.connectionLost(reason) + + + +@implementer(IAgent) +class ContentDecoderAgent(object): + """ + An L{Agent} wrapper to handle encoded content. + + It takes care of declaring the support for content in the + I{Accept-Encoding} header, and automatically decompresses the received data + if it's effectively using compression. + + @param decoders: A list or tuple of (name, decoder) objects. The name + declares which decoding the decoder supports, and the decoder must + return a response object when called/instantiated. For example, + C{(('gzip', GzipDecoder))}. The order determines how the decoders are + going to be advertized to the server. + + @since: 11.1 + """ + + def __init__(self, agent, decoders): + self._agent = agent + self._decoders = dict(decoders) + self._supported = b','.join([decoder[0] for decoder in decoders]) + + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Send a client request which declares supporting compressed content. + + @see: L{Agent.request}. + """ + if headers is None: + headers = Headers() + else: + headers = headers.copy() + headers.addRawHeader(b'accept-encoding', self._supported) + deferred = self._agent.request(method, uri, headers, bodyProducer) + return deferred.addCallback(self._handleResponse) + + + def _handleResponse(self, response): + """ + Check if the response is encoded, and wrap it to handle decompression. + """ + contentEncodingHeaders = response.headers.getRawHeaders( + b'content-encoding', []) + contentEncodingHeaders = b','.join(contentEncodingHeaders).split(b',') + while contentEncodingHeaders: + name = contentEncodingHeaders.pop().strip() + decoder = self._decoders.get(name) + if decoder is not None: + response = decoder(response) + else: + # Add it back + contentEncodingHeaders.append(name) + break + if contentEncodingHeaders: + response.headers.setRawHeaders( + b'content-encoding', [b','.join(contentEncodingHeaders)]) + else: + response.headers.removeHeader(b'content-encoding') + return response + + + +@implementer(IAgent) +class RedirectAgent(object): + """ + An L{Agent} wrapper which handles HTTP redirects. + + The implementation is rather strict: 301 and 302 behaves like 307, not + redirecting automatically on methods different from I{GET} and I{HEAD}. + + See L{BrowserLikeRedirectAgent} for a redirecting Agent that behaves more + like a web browser. + + @param redirectLimit: The maximum number of times the agent is allowed to + follow redirects before failing with a L{error.InfiniteRedirection}. + + @cvar _redirectResponses: A L{list} of HTTP status codes to be redirected + for I{GET} and I{HEAD} methods. + + @cvar _seeOtherResponses: A L{list} of HTTP status codes to be redirected + for any method and the method altered to I{GET}. + + @since: 11.1 + """ + + _redirectResponses = [http.MOVED_PERMANENTLY, http.FOUND, + http.TEMPORARY_REDIRECT] + _seeOtherResponses = [http.SEE_OTHER] + + + def __init__(self, agent, redirectLimit=20): + self._agent = agent + self._redirectLimit = redirectLimit + + + def request(self, method, uri, headers=None, bodyProducer=None): + """ + Send a client request following HTTP redirects. + + @see: L{Agent.request}. + """ + deferred = self._agent.request(method, uri, headers, bodyProducer) + return deferred.addCallback( + self._handleResponse, method, uri, headers, 0) + + + def _resolveLocation(self, requestURI, location): + """ + Resolve the redirect location against the request I{URI}. + + @type requestURI: C{bytes} + @param requestURI: The request I{URI}. + + @type location: C{bytes} + @param location: The redirect location. + + @rtype: C{bytes} + @return: Final resolved I{URI}. + """ + return _urljoin(requestURI, location) + + + def _handleRedirect(self, response, method, uri, headers, redirectCount): + """ + Handle a redirect response, checking the number of redirects already + followed, and extracting the location header fields. + """ + if redirectCount >= self._redirectLimit: + err = error.InfiniteRedirection( + response.code, + b'Infinite redirection detected', + location=uri) + raise ResponseFailed([Failure(err)], response) + locationHeaders = response.headers.getRawHeaders(b'location', []) + if not locationHeaders: + err = error.RedirectWithNoLocation( + response.code, b'No location header field', uri) + raise ResponseFailed([Failure(err)], response) + location = self._resolveLocation(uri, locationHeaders[0]) + deferred = self._agent.request(method, location, headers) + def _chainResponse(newResponse): + newResponse.setPreviousResponse(response) + return newResponse + deferred.addCallback(_chainResponse) + return deferred.addCallback( + self._handleResponse, method, uri, headers, redirectCount + 1) + + + def _handleResponse(self, response, method, uri, headers, redirectCount): + """ + Handle the response, making another request if it indicates a redirect. + """ + if response.code in self._redirectResponses: + if method not in (b'GET', b'HEAD'): + err = error.PageRedirect(response.code, location=uri) + raise ResponseFailed([Failure(err)], response) + return self._handleRedirect(response, method, uri, headers, + redirectCount) + elif response.code in self._seeOtherResponses: + return self._handleRedirect(response, b'GET', uri, headers, + redirectCount) + return response + + + +class BrowserLikeRedirectAgent(RedirectAgent): + """ + An L{Agent} wrapper which handles HTTP redirects in the same fashion as web + browsers. + + Unlike L{RedirectAgent}, the implementation is more relaxed: 301 and 302 + behave like 303, redirecting automatically on any method and altering the + redirect request to a I{GET}. + + @see: L{RedirectAgent} + + @since: 13.1 + """ + _redirectResponses = [http.TEMPORARY_REDIRECT] + _seeOtherResponses = [http.MOVED_PERMANENTLY, http.FOUND, http.SEE_OTHER] + + + +class _ReadBodyProtocol(protocol.Protocol): + """ + Protocol that collects data sent to it. + + This is a helper for L{IResponse.deliverBody}, which collects the body and + fires a deferred with it. + + @ivar deferred: See L{__init__}. + @ivar status: See L{__init__}. + @ivar message: See L{__init__}. + + @ivar dataBuffer: list of byte-strings received + @type dataBuffer: L{list} of L{bytes} + """ + + def __init__(self, status, message, deferred): + """ + @param status: Status of L{IResponse} + @ivar status: L{int} + + @param message: Message of L{IResponse} + @type message: L{bytes} + + @param deferred: deferred to fire when response is complete + @type deferred: L{Deferred} firing with L{bytes} + """ + self.deferred = deferred + self.status = status + self.message = message + self.dataBuffer = [] + + + def dataReceived(self, data): + """ + Accumulate some more bytes from the response. + """ + self.dataBuffer.append(data) + + + def connectionLost(self, reason): + """ + Deliver the accumulated response bytes to the waiting L{Deferred}, if + the response body has been completely received without error. + """ + if reason.check(ResponseDone): + self.deferred.callback(b''.join(self.dataBuffer)) + elif reason.check(PotentialDataLoss): + self.deferred.errback( + PartialDownloadError(self.status, self.message, + b''.join(self.dataBuffer))) + else: + self.deferred.errback(reason) + + + +def readBody(response): + """ + Get the body of an L{IResponse} and return it as a byte string. + + This is a helper function for clients that don't want to incrementally + receive the body of an HTTP response. + + @param response: The HTTP response for which the body will be read. + @type response: L{IResponse} provider + + @return: A L{Deferred} which will fire with the body of the response. + Cancelling it will close the connection to the server immediately. + """ + def cancel(deferred): + """ + Cancel a L{readBody} call, close the connection to the HTTP server + immediately, if it is still open. + + @param deferred: The cancelled L{defer.Deferred}. + """ + abort = getAbort() + if abort is not None: + abort() + + d = defer.Deferred(cancel) + protocol = _ReadBodyProtocol(response.code, response.phrase, d) + def getAbort(): + return getattr(protocol.transport, 'abortConnection', None) + + response.deliverBody(protocol) + + if protocol.transport is not None and getAbort() is None: + warnings.warn( + 'Using readBody with a transport that does not have an ' + 'abortConnection method', + category=DeprecationWarning, + stacklevel=2) + + return d + + + +__all__ = [ + 'Agent', + 'BrowserLikePolicyForHTTPS', + 'BrowserLikeRedirectAgent', + 'ContentDecoderAgent', + 'CookieAgent', + 'downloadPage', + 'getPage', + 'GzipDecoder', + 'HTTPClientFactory', + 'HTTPConnectionPool', + 'HTTPDownloader', + 'HTTPPageDownloader', + 'HTTPPageGetter', + 'PartialDownloadError', + 'ProxyAgent', + 'readBody', + 'RedirectAgent', + 'RequestGenerationFailed', + 'RequestTransmissionFailed', + 'Response', + 'ResponseDone', + 'ResponseFailed', + 'ResponseNeverReceived', + 'URI', + ] diff --git a/contrib/python/Twisted/py2/twisted/web/demo.py b/contrib/python/Twisted/py2/twisted/web/demo.py new file mode 100644 index 0000000000..1fe83a9e4f --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/demo.py @@ -0,0 +1,26 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I am a simple test resource. +""" + +from __future__ import absolute_import, division + +from twisted.web import static + + +class Test(static.Data): + isLeaf = True + def __init__(self): + static.Data.__init__( + self, + b""" + <html> + <head><title>Twisted Web Demo</title><head> + <body> + Hello! This is a Twisted Web test page. + </body> + </html> + """, + "text/html") diff --git a/contrib/python/Twisted/py2/twisted/web/distrib.py b/contrib/python/Twisted/py2/twisted/web/distrib.py new file mode 100644 index 0000000000..38f46a0ff9 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/distrib.py @@ -0,0 +1,386 @@ +# -*- test-case-name: twisted.web.test.test_distrib -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Distributed web servers. + +This is going to have to be refactored so that argument parsing is done +by each subprocess and not by the main web server (i.e. GET, POST etc.). +""" + +# System Imports +import os, copy +try: + import pwd +except ImportError: + pwd = None +from io import BytesIO + +from xml.dom.minidom import getDOMImplementation + +# Twisted Imports +from twisted.spread import pb +from twisted.spread.banana import SIZE_LIMIT +from twisted.web import http, resource, server, util, static +from twisted.web.http_headers import Headers +from twisted.persisted import styles +from twisted.internet import address, reactor +from twisted.logger import Logger + + +class _ReferenceableProducerWrapper(pb.Referenceable): + def __init__(self, producer): + self.producer = producer + + def remote_resumeProducing(self): + self.producer.resumeProducing() + + def remote_pauseProducing(self): + self.producer.pauseProducing() + + def remote_stopProducing(self): + self.producer.stopProducing() + + +class Request(pb.RemoteCopy, server.Request): + """ + A request which was received by a L{ResourceSubscription} and sent via + PB to a distributed node. + """ + def setCopyableState(self, state): + """ + Initialize this L{twisted.web.distrib.Request} based on the copied + state so that it closely resembles a L{twisted.web.server.Request}. + """ + for k in 'host', 'client': + tup = state[k] + addrdesc = {'INET': 'TCP', 'UNIX': 'UNIX'}[tup[0]] + addr = {'TCP': lambda: address.IPv4Address(addrdesc, + tup[1], tup[2]), + 'UNIX': lambda: address.UNIXAddress(tup[1])}[addrdesc]() + state[k] = addr + state['requestHeaders'] = Headers(dict(state['requestHeaders'])) + pb.RemoteCopy.setCopyableState(self, state) + # Emulate the local request interface -- + self.content = BytesIO(self.content_data) + self.finish = self.remote.remoteMethod('finish') + self.setHeader = self.remote.remoteMethod('setHeader') + self.addCookie = self.remote.remoteMethod('addCookie') + self.setETag = self.remote.remoteMethod('setETag') + self.setResponseCode = self.remote.remoteMethod('setResponseCode') + self.setLastModified = self.remote.remoteMethod('setLastModified') + + # To avoid failing if a resource tries to write a very long string + # all at once, this one will be handled slightly differently. + self._write = self.remote.remoteMethod('write') + + + def write(self, bytes): + """ + Write the given bytes to the response body. + + @param bytes: The bytes to write. If this is longer than 640k, it + will be split up into smaller pieces. + """ + start = 0 + end = SIZE_LIMIT + while True: + self._write(bytes[start:end]) + start += SIZE_LIMIT + end += SIZE_LIMIT + if start >= len(bytes): + break + + + def registerProducer(self, producer, streaming): + self.remote.callRemote("registerProducer", + _ReferenceableProducerWrapper(producer), + streaming).addErrback(self.fail) + + def unregisterProducer(self): + self.remote.callRemote("unregisterProducer").addErrback(self.fail) + + def fail(self, failure): + self._log.failure('', failure=failure) + + +pb.setUnjellyableForClass(server.Request, Request) + +class Issue: + _log = Logger() + + def __init__(self, request): + self.request = request + + def finished(self, result): + if result is not server.NOT_DONE_YET: + assert isinstance(result, str), "return value not a string" + self.request.write(result) + self.request.finish() + + def failed(self, failure): + #XXX: Argh. FIXME. + failure = str(failure) + self.request.write( + resource.ErrorPage(http.INTERNAL_SERVER_ERROR, + "Server Connection Lost", + "Connection to distributed server lost:" + + util._PRE(failure)). + render(self.request)) + self.request.finish() + self._log.info(failure) + + +class ResourceSubscription(resource.Resource): + isLeaf = 1 + waiting = 0 + _log = Logger() + + def __init__(self, host, port): + resource.Resource.__init__(self) + self.host = host + self.port = port + self.pending = [] + self.publisher = None + + def __getstate__(self): + """Get persistent state for this ResourceSubscription. + """ + # When I unserialize, + state = copy.copy(self.__dict__) + # Publisher won't be connected... + state['publisher'] = None + # I won't be making a connection + state['waiting'] = 0 + # There will be no pending requests. + state['pending'] = [] + return state + + def connected(self, publisher): + """I've connected to a publisher; I'll now send all my requests. + """ + self._log.info('connected to publisher') + publisher.broker.notifyOnDisconnect(self.booted) + self.publisher = publisher + self.waiting = 0 + for request in self.pending: + self.render(request) + self.pending = [] + + def notConnected(self, msg): + """I can't connect to a publisher; I'll now reply to all pending + requests. + """ + self._log.info( + "could not connect to distributed web service: {msg}", + msg=msg + ) + self.waiting = 0 + self.publisher = None + for request in self.pending: + request.write("Unable to connect to distributed server.") + request.finish() + self.pending = [] + + def booted(self): + self.notConnected("connection dropped") + + def render(self, request): + """Render this request, from my server. + + This will always be asynchronous, and therefore return NOT_DONE_YET. + It spins off a request to the pb client, and either adds it to the list + of pending issues or requests it immediately, depending on if the + client is already connected. + """ + if not self.publisher: + self.pending.append(request) + if not self.waiting: + self.waiting = 1 + bf = pb.PBClientFactory() + timeout = 10 + if self.host == "unix": + reactor.connectUNIX(self.port, bf, timeout) + else: + reactor.connectTCP(self.host, self.port, bf, timeout) + d = bf.getRootObject() + d.addCallbacks(self.connected, self.notConnected) + + else: + i = Issue(request) + self.publisher.callRemote('request', request).addCallbacks(i.finished, i.failed) + return server.NOT_DONE_YET + + + +class ResourcePublisher(pb.Root, styles.Versioned): + """ + L{ResourcePublisher} exposes a remote API which can be used to respond + to request. + + @ivar site: The site which will be used for resource lookup. + @type site: L{twisted.web.server.Site} + """ + _log = Logger() + + def __init__(self, site): + self.site = site + + persistenceVersion = 2 + + def upgradeToVersion2(self): + self.application.authorizer.removeIdentity("web") + del self.application.services[self.serviceName] + del self.serviceName + del self.application + del self.perspectiveName + + def getPerspectiveNamed(self, name): + return self + + + def remote_request(self, request): + """ + Look up the resource for the given request and render it. + """ + res = self.site.getResourceFor(request) + self._log.info(request) + result = res.render(request) + if result is not server.NOT_DONE_YET: + request.write(result) + request.finish() + return server.NOT_DONE_YET + + + +class UserDirectory(resource.Resource): + """ + A resource which lists available user resources and serves them as + children. + + @ivar _pwd: An object like L{pwd} which is used to enumerate users and + their home directories. + """ + + userDirName = 'public_html' + userSocketName = '.twistd-web-pb' + + template = """ +<html> + <head> + <title>twisted.web.distrib.UserDirectory</title> + <style> + + a + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + color: #369; + text-decoration: none; + } + + th + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + font-weight: bold; + text-decoration: none; + text-align: left; + } + + pre, code + { + font-family: "Courier New", Courier, monospace; + } + + p, body, td, ol, ul, menu, blockquote, div + { + font-family: Lucida, Verdana, Helvetica, Arial, sans-serif; + color: #000; + } + </style> + </head> + + <body> + <h1>twisted.web.distrib.UserDirectory</h1> + + %(users)s +</body> +</html> +""" + + def __init__(self, userDatabase=None): + resource.Resource.__init__(self) + if userDatabase is None: + userDatabase = pwd + self._pwd = userDatabase + + + def _users(self): + """ + Return a list of two-tuples giving links to user resources and text to + associate with those links. + """ + users = [] + for user in self._pwd.getpwall(): + name, passwd, uid, gid, gecos, dir, shell = user + realname = gecos.split(',')[0] + if not realname: + realname = name + if os.path.exists(os.path.join(dir, self.userDirName)): + users.append((name, realname + ' (file)')) + twistdsock = os.path.join(dir, self.userSocketName) + if os.path.exists(twistdsock): + linkName = name + '.twistd' + users.append((linkName, realname + ' (twistd)')) + return users + + + def render_GET(self, request): + """ + Render as HTML a listing of all known users with links to their + personal resources. + """ + + domImpl = getDOMImplementation() + newDoc = domImpl.createDocument(None, "ul", None) + listing = newDoc.documentElement + for link, text in self._users(): + linkElement = newDoc.createElement('a') + linkElement.setAttribute('href', link + '/') + textNode = newDoc.createTextNode(text) + linkElement.appendChild(textNode) + item = newDoc.createElement('li') + item.appendChild(linkElement) + listing.appendChild(item) + + htmlDoc = self.template % ({'users': listing.toxml()}) + return htmlDoc.encode("utf-8") + + + def getChild(self, name, request): + if name == '': + return self + + td = '.twistd' + + if name[-len(td):] == td: + username = name[:-len(td)] + sub = 1 + else: + username = name + sub = 0 + try: + pw_name, pw_passwd, pw_uid, pw_gid, pw_gecos, pw_dir, pw_shell \ + = self._pwd.getpwnam(username) + except KeyError: + return resource.NoResource() + if sub: + twistdsock = os.path.join(pw_dir, self.userSocketName) + rs = ResourceSubscription('unix',twistdsock) + self.putChild(name, rs) + return rs + else: + path = os.path.join(pw_dir, self.userDirName) + if not os.path.exists(path): + return resource.NoResource() + return static.File(path) diff --git a/contrib/python/Twisted/py2/twisted/web/domhelpers.py b/contrib/python/Twisted/py2/twisted/web/domhelpers.py new file mode 100644 index 0000000000..1ca491b470 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/domhelpers.py @@ -0,0 +1,272 @@ +# -*- test-case-name: twisted.web.test.test_domhelpers -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A library for performing interesting tasks with DOM objects. +""" + +from io import StringIO + +from twisted.web import microdom +from twisted.web.microdom import getElementsByTagName, escape, unescape +# These modules are imported here as a shortcut. +escape +getElementsByTagName + + + +class NodeLookupError(Exception): + pass + + +def substitute(request, node, subs): + """ + Look through the given node's children for strings, and + attempt to do string substitution with the given parameter. + """ + for child in node.childNodes: + if hasattr(child, 'nodeValue') and child.nodeValue: + child.replaceData(0, len(child.nodeValue), child.nodeValue % subs) + substitute(request, child, subs) + +def _get(node, nodeId, nodeAttrs=('id','class','model','pattern')): + """ + (internal) Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. + """ + + if hasattr(node, 'hasAttributes') and node.hasAttributes(): + for nodeAttr in nodeAttrs: + if (str (node.getAttribute(nodeAttr)) == nodeId): + return node + if node.hasChildNodes(): + if hasattr(node.childNodes, 'length'): + length = node.childNodes.length + else: + length = len(node.childNodes) + for childNum in range(length): + result = _get(node.childNodes[childNum], nodeId) + if result: return result + +def get(node, nodeId): + """ + Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, raise + L{NodeLookupError}. + """ + result = _get(node, nodeId) + if result: return result + raise NodeLookupError(nodeId) + +def getIfExists(node, nodeId): + """ + Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, return + L{None}. + """ + return _get(node, nodeId) + +def getAndClear(node, nodeId): + """Get a node with the specified C{nodeId} as any of the C{class}, + C{id} or C{pattern} attributes. If there is no such node, raise + L{NodeLookupError}. Remove all child nodes before returning. + """ + result = get(node, nodeId) + if result: + clearNode(result) + return result + +def clearNode(node): + """ + Remove all children from the given node. + """ + node.childNodes[:] = [] + +def locateNodes(nodeList, key, value, noNesting=1): + """ + Find subnodes in the given node where the given attribute + has the given value. + """ + returnList = [] + if not isinstance(nodeList, type([])): + return locateNodes(nodeList.childNodes, key, value, noNesting) + for childNode in nodeList: + if not hasattr(childNode, 'getAttribute'): + continue + if str(childNode.getAttribute(key)) == value: + returnList.append(childNode) + if noNesting: + continue + returnList.extend(locateNodes(childNode, key, value, noNesting)) + return returnList + +def superSetAttribute(node, key, value): + if not hasattr(node, 'setAttribute'): return + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superSetAttribute(child, key, value) + +def superPrependAttribute(node, key, value): + if not hasattr(node, 'setAttribute'): return + old = node.getAttribute(key) + if old: + node.setAttribute(key, value+'/'+old) + else: + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superPrependAttribute(child, key, value) + +def superAppendAttribute(node, key, value): + if not hasattr(node, 'setAttribute'): return + old = node.getAttribute(key) + if old: + node.setAttribute(key, old + '/' + value) + else: + node.setAttribute(key, value) + if node.hasChildNodes(): + for child in node.childNodes: + superAppendAttribute(child, key, value) + +def gatherTextNodes(iNode, dounescape=0, joinWith=""): + """Visit each child node and collect its text data, if any, into a string. +For example:: + >>> doc=microdom.parseString('<a>1<b>2<c>3</c>4</b></a>') + >>> gatherTextNodes(doc.documentElement) + '1234' +With dounescape=1, also convert entities back into normal characters. +@return: the gathered nodes as a single string +@rtype: str +""" + gathered=[] + gathered_append=gathered.append + slice=[iNode] + while len(slice)>0: + c=slice.pop(0) + if hasattr(c, 'nodeValue') and c.nodeValue is not None: + if dounescape: + val=unescape(c.nodeValue) + else: + val=c.nodeValue + gathered_append(val) + slice[:0]=c.childNodes + return joinWith.join(gathered) + +class RawText(microdom.Text): + """This is an evil and horrible speed hack. Basically, if you have a big + chunk of XML that you want to insert into the DOM, but you don't want to + incur the cost of parsing it, you can construct one of these and insert it + into the DOM. This will most certainly only work with microdom as the API + for converting nodes to xml is different in every DOM implementation. + + This could be improved by making this class a Lazy parser, so if you + inserted this into the DOM and then later actually tried to mutate this + node, it would be parsed then. + """ + + def writexml(self, writer, indent="", addindent="", newl="", strip=0, nsprefixes=None, namespace=None): + writer.write("%s%s%s" % (indent, self.data, newl)) + +def findNodes(parent, matcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + # print child, child.nodeType, child.nodeName + if matcher(child): + accum.append(child) + findNodes(child, matcher, accum) + return accum + + +def findNodesShallowOnMatch(parent, matcher, recurseMatcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + # print child, child.nodeType, child.nodeName + if matcher(child): + accum.append(child) + if recurseMatcher(child): + findNodesShallowOnMatch(child, matcher, recurseMatcher, accum) + return accum + +def findNodesShallow(parent, matcher, accum=None): + if accum is None: + accum = [] + if not parent.hasChildNodes(): + return accum + for child in parent.childNodes: + if matcher(child): + accum.append(child) + else: + findNodes(child, matcher, accum) + return accum + + +def findElementsWithAttributeShallow(parent, attribute): + """ + Return an iterable of the elements which are direct children of C{parent} + and which have the C{attribute} attribute. + """ + return findNodesShallow(parent, + lambda n: getattr(n, 'tagName', None) is not None and + n.hasAttribute(attribute)) + + +def findElements(parent, matcher): + """ + Return an iterable of the elements which are children of C{parent} for + which the predicate C{matcher} returns true. + """ + return findNodes( + parent, + lambda n, matcher=matcher: getattr(n, 'tagName', None) is not None and + matcher(n)) + +def findElementsWithAttribute(parent, attribute, value=None): + if value: + return findElements( + parent, + lambda n, attribute=attribute, value=value: + n.hasAttribute(attribute) and n.getAttribute(attribute) == value) + else: + return findElements( + parent, + lambda n, attribute=attribute: n.hasAttribute(attribute)) + + +def findNodesNamed(parent, name): + return findNodes(parent, lambda n, name=name: n.nodeName == name) + + +def writeNodeData(node, oldio): + for subnode in node.childNodes: + if hasattr(subnode, 'data'): + oldio.write(u"" + subnode.data) + else: + writeNodeData(subnode, oldio) + + +def getNodeText(node): + oldio = StringIO() + writeNodeData(node, oldio) + return oldio.getvalue() + + +def getParents(node): + l = [] + while node: + l.append(node) + node = node.parentNode + return l + +def namedChildren(parent, nodeName): + """namedChildren(parent, nodeName) -> children (not descendants) of parent + that have tagName == nodeName + """ + return [n for n in parent.childNodes if getattr(n, 'tagName', '')==nodeName] diff --git a/contrib/python/Twisted/py2/twisted/web/error.py b/contrib/python/Twisted/py2/twisted/web/error.py new file mode 100644 index 0000000000..e3456a4b4d --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/error.py @@ -0,0 +1,407 @@ +# -*- test-case-name: twisted.web.test.test_error -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Exception definitions for L{twisted.web}. +""" + +from __future__ import division, absolute_import +try: + from future_builtins import ascii +except ImportError: + pass + +__all__ = [ + 'Error', 'PageRedirect', 'InfiniteRedirection', 'RenderError', + 'MissingRenderMethod', 'MissingTemplateLoader', 'UnexposedMethodError', + 'UnfilledSlot', 'UnsupportedType', 'FlattenerError', + 'RedirectWithNoLocation', + ] + + +from twisted.web._responses import RESPONSES +from twisted.python.compat import unicode, nativeString, intToBytes, Sequence + + + +def _codeToMessage(code): + """ + Returns the response message corresponding to an HTTP code, or None + if the code is unknown or unrecognized. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example C{http.NOT_FOUND}. + + @return: A string message or none + @rtype: L{bytes} + """ + try: + return RESPONSES.get(int(code)) + except (ValueError, AttributeError): + return None + + +class Error(Exception): + """ + A basic HTTP error. + + @type status: L{bytes} + @ivar status: Refers to an HTTP status code, for example C{http.NOT_FOUND}. + + @type message: L{bytes} + @param message: A short error message, for example "NOT FOUND". + + @type response: L{bytes} + @ivar response: A complete HTML document for an error page. + """ + def __init__(self, code, message=None, response=None): + """ + Initializes a basic exception. + + @type code: L{bytes} or L{int} + @param code: Refers to an HTTP status code (for example, 200) either as + an integer or a bytestring representing such. If no C{message} is + given, C{code} is mapped to a descriptive bytestring that is used + instead. + + @type message: L{bytes} + @param message: A short error message, for example "NOT FOUND". + + @type response: L{bytes} + @param response: A complete HTML document for an error page. + """ + message = message or _codeToMessage(code) + + Exception.__init__(self, code, message, response) + + if isinstance(code, int): + # If we're given an int, convert it to a bytestring + # downloadPage gives a bytes, Agent gives an int, and it worked by + # accident previously, so just make it keep working. + code = intToBytes(code) + + self.status = code + self.message = message + self.response = response + + + def __str__(self): + return nativeString(self.status + b" " + self.message) + + + +class PageRedirect(Error): + """ + A request resulted in an HTTP redirect. + + @type location: L{bytes} + @ivar location: The location of the redirect which was not followed. + """ + def __init__(self, code, message=None, response=None, location=None): + """ + Initializes a page redirect exception. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to a + descriptive string that is used instead. + + @type message: L{bytes} + @param message: A short error message, for example "NOT FOUND". + + @type response: L{bytes} + @param response: A complete HTML document for an error page. + + @type location: L{bytes} + @param location: The location response-header field value. It is an + absolute URI used to redirect the receiver to a location other than + the Request-URI so the request can be completed. + """ + Error.__init__(self, code, message, response) + if self.message and location: + self.message = self.message + b" to " + location + self.location = location + + + +class InfiniteRedirection(Error): + """ + HTTP redirection is occurring endlessly. + + @type location: L{bytes} + @ivar location: The first URL in the series of redirections which was + not followed. + """ + def __init__(self, code, message=None, response=None, location=None): + """ + Initializes an infinite redirection exception. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to a + descriptive string that is used instead. + + @type message: L{bytes} + @param message: A short error message, for example "NOT FOUND". + + @type response: L{bytes} + @param response: A complete HTML document for an error page. + + @type location: L{bytes} + @param location: The location response-header field value. It is an + absolute URI used to redirect the receiver to a location other than + the Request-URI so the request can be completed. + """ + Error.__init__(self, code, message, response) + if self.message and location: + self.message = self.message + b" to " + location + self.location = location + + + +class RedirectWithNoLocation(Error): + """ + Exception passed to L{ResponseFailed} if we got a redirect without a + C{Location} header field. + + @type uri: L{bytes} + @ivar uri: The URI which failed to give a proper location header + field. + + @since: 11.1 + """ + + def __init__(self, code, message, uri): + """ + Initializes a page redirect exception when no location is given. + + @type code: L{bytes} + @param code: Refers to an HTTP status code, for example + C{http.NOT_FOUND}. If no C{message} is given, C{code} is mapped to + a descriptive string that is used instead. + + @type message: L{bytes} + @param message: A short error message. + + @type uri: L{bytes} + @param uri: The URI which failed to give a proper location header + field. + """ + Error.__init__(self, code, message) + self.message = self.message + b" to " + uri + self.uri = uri + + + +class UnsupportedMethod(Exception): + """ + Raised by a resource when faced with a strange request method. + + RFC 2616 (HTTP 1.1) gives us two choices when faced with this situation: + If the type of request is known to us, but not allowed for the requested + resource, respond with NOT_ALLOWED. Otherwise, if the request is something + we don't know how to deal with in any case, respond with NOT_IMPLEMENTED. + + When this exception is raised by a Resource's render method, the server + will make the appropriate response. + + This exception's first argument MUST be a sequence of the methods the + resource *does* support. + """ + + allowedMethods = () + + def __init__(self, allowedMethods, *args): + Exception.__init__(self, allowedMethods, *args) + self.allowedMethods = allowedMethods + + if not isinstance(allowedMethods, Sequence): + raise TypeError( + "First argument must be a sequence of supported methods, " + "but my first argument is not a sequence.") + + + def __str__(self): + return "Expected one of %r" % (self.allowedMethods,) + + + +class SchemeNotSupported(Exception): + """ + The scheme of a URI was not one of the supported values. + """ + + + +class RenderError(Exception): + """ + Base exception class for all errors which can occur during template + rendering. + """ + + + +class MissingRenderMethod(RenderError): + """ + Tried to use a render method which does not exist. + + @ivar element: The element which did not have the render method. + @ivar renderName: The name of the renderer which could not be found. + """ + def __init__(self, element, renderName): + RenderError.__init__(self, element, renderName) + self.element = element + self.renderName = renderName + + + def __repr__(self): + return '%r: %r had no render method named %r' % ( + self.__class__.__name__, self.element, self.renderName) + + + +class MissingTemplateLoader(RenderError): + """ + L{MissingTemplateLoader} is raised when trying to render an Element without + a template loader, i.e. a C{loader} attribute. + + @ivar element: The Element which did not have a document factory. + """ + def __init__(self, element): + RenderError.__init__(self, element) + self.element = element + + + def __repr__(self): + return '%r: %r had no loader' % (self.__class__.__name__, + self.element) + + + +class UnexposedMethodError(Exception): + """ + Raised on any attempt to get a method which has not been exposed. + """ + + + +class UnfilledSlot(Exception): + """ + During flattening, a slot with no associated data was encountered. + """ + + + +class UnsupportedType(Exception): + """ + During flattening, an object of a type which cannot be flattened was + encountered. + """ + + +class ExcessiveBufferingError(Exception): + """ + The HTTP/2 protocol has been forced to buffer an excessive amount of + outbound data, and has therefore closed the connection and dropped all + outbound data. + """ + + + +class FlattenerError(Exception): + """ + An error occurred while flattening an object. + + @ivar _roots: A list of the objects on the flattener's stack at the time + the unflattenable object was encountered. The first element is least + deeply nested object and the last element is the most deeply nested. + """ + def __init__(self, exception, roots, traceback): + self._exception = exception + self._roots = roots + self._traceback = traceback + Exception.__init__(self, exception, roots, traceback) + + + def _formatRoot(self, obj): + """ + Convert an object from C{self._roots} to a string suitable for + inclusion in a render-traceback (like a normal Python traceback, but + can include "frame" source locations which are not in Python source + files). + + @param obj: Any object which can be a render step I{root}. + Typically, L{Tag}s, strings, and other simple Python types. + + @return: A string representation of C{obj}. + @rtype: L{str} + """ + # There's a circular dependency between this class and 'Tag', although + # only for an isinstance() check. + from twisted.web.template import Tag + + if isinstance(obj, (bytes, str, unicode)): + # It's somewhat unlikely that there will ever be a str in the roots + # list. However, something like a MemoryError during a str.replace + # call (eg, replacing " with ") could possibly cause this. + # Likewise, UTF-8 encoding a unicode string to a byte string might + # fail like this. + if len(obj) > 40: + if isinstance(obj, unicode): + ellipsis = u'<...>' + else: + ellipsis = b'<...>' + return ascii(obj[:20] + ellipsis + obj[-20:]) + else: + return ascii(obj) + elif isinstance(obj, Tag): + if obj.filename is None: + return 'Tag <' + obj.tagName + '>' + else: + return "File \"%s\", line %d, column %d, in \"%s\"" % ( + obj.filename, obj.lineNumber, + obj.columnNumber, obj.tagName) + else: + return ascii(obj) + + + def __repr__(self): + """ + Present a string representation which includes a template traceback, so + we can tell where this error occurred in the template, as well as in + Python. + """ + # Avoid importing things unnecessarily until we actually need them; + # since this is an 'error' module we should be extra paranoid about + # that. + from traceback import format_list + if self._roots: + roots = ' ' + '\n '.join([ + self._formatRoot(r) for r in self._roots]) + '\n' + else: + roots = '' + if self._traceback: + traceback = '\n'.join([ + line + for entry in format_list(self._traceback) + for line in entry.splitlines()]) + '\n' + else: + traceback = '' + return ( + 'Exception while flattening:\n' + + roots + traceback + + self._exception.__class__.__name__ + ': ' + + str(self._exception) + '\n') + + + def __str__(self): + return repr(self) + + + +class UnsupportedSpecialHeader(Exception): + """ + A HTTP/2 request was received that contained a HTTP/2 pseudo-header field + that is not recognised by Twisted. + """ diff --git a/contrib/python/Twisted/py2/twisted/web/guard.py b/contrib/python/Twisted/py2/twisted/web/guard.py new file mode 100644 index 0000000000..0e580815ed --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/guard.py @@ -0,0 +1,20 @@ +# -*- test-case-name: twisted.web.test.test_httpauth -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Resource traversal integration with L{twisted.cred} to allow for +authentication and authorization of HTTP requests. +""" + +from __future__ import division, absolute_import + +# Expose HTTP authentication classes here. +from twisted.web._auth.wrapper import HTTPAuthSessionWrapper +from twisted.web._auth.basic import BasicCredentialFactory +from twisted.web._auth.digest import DigestCredentialFactory + +__all__ = [ + "HTTPAuthSessionWrapper", + + "BasicCredentialFactory", "DigestCredentialFactory"] diff --git a/contrib/python/Twisted/py2/twisted/web/html.py b/contrib/python/Twisted/py2/twisted/web/html.py new file mode 100644 index 0000000000..5605f5f46a --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/html.py @@ -0,0 +1,57 @@ +# -*- test-case-name: twisted.web.test.test_html -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +"""I hold HTML generation helpers. +""" + +from twisted.python import log +from twisted.python.compat import NativeStringIO as StringIO, escape +from twisted.python.deprecate import deprecated +from incremental import Version + + + +@deprecated(Version('Twisted', 15, 3, 0), replacement='twisted.web.template') +def PRE(text): + "Wrap <pre> tags around some text and HTML-escape it." + return "<pre>"+escape(text)+"</pre>" + + + +@deprecated(Version('Twisted', 15, 3, 0), replacement='twisted.web.template') +def UL(lst): + io = StringIO() + io.write("<ul>\n") + for el in lst: + io.write("<li> %s</li>\n" % el) + io.write("</ul>") + return io.getvalue() + + + +@deprecated(Version('Twisted', 15, 3, 0), replacement='twisted.web.template') +def linkList(lst): + io = StringIO() + io.write("<ul>\n") + for hr, el in lst: + io.write('<li> <a href="%s">%s</a></li>\n' % (hr, el)) + io.write("</ul>") + return io.getvalue() + + + +@deprecated(Version('Twisted', 15, 3, 0), replacement='twisted.web.template') +def output(func, *args, **kw): + """output(func, *args, **kw) -> html string + Either return the result of a function (which presumably returns an + HTML-legal string) or a sparse HTMLized error message and a message + in the server log. + """ + try: + return func(*args, **kw) + except: + log.msg("Error calling %r:" % (func,)) + log.err() + return PRE("An error occurred.") diff --git a/contrib/python/Twisted/py2/twisted/web/http.py b/contrib/python/Twisted/py2/twisted/web/http.py new file mode 100644 index 0000000000..b7afa8b0d0 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/http.py @@ -0,0 +1,3170 @@ +# -*- test-case-name: twisted.web.test.test_http -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HyperText Transfer Protocol implementation. + +This is the basic server-side protocol implementation used by the Twisted +Web server. It can parse HTTP 1.0 requests and supports many HTTP 1.1 +features as well. Additionally, some functionality implemented here is +also useful for HTTP clients (such as the chunked encoding parser). + +@var CACHED: A marker value to be returned from cache-related request methods + to indicate to the caller that a cached response will be usable and no + response body should be generated. + +@var FOUND: An HTTP response code indicating a temporary redirect. + +@var NOT_MODIFIED: An HTTP response code indicating that a requested + pre-condition (for example, the condition represented by an + I{If-Modified-Since} header is present in the request) has succeeded. This + indicates a response body cached by the client can be used. + +@var PRECONDITION_FAILED: An HTTP response code indicating that a requested + pre-condition (for example, the condition represented by an I{If-None-Match} + header is present in the request) has failed. This should typically + indicate that the server has not taken the requested action. +""" + +from __future__ import division, absolute_import + +__all__ = [ + 'SWITCHING', 'OK', 'CREATED', 'ACCEPTED', 'NON_AUTHORITATIVE_INFORMATION', + 'NO_CONTENT', 'RESET_CONTENT', 'PARTIAL_CONTENT', 'MULTI_STATUS', + + 'MULTIPLE_CHOICE', 'MOVED_PERMANENTLY', 'FOUND', 'SEE_OTHER', + 'NOT_MODIFIED', 'USE_PROXY', 'TEMPORARY_REDIRECT', + + 'BAD_REQUEST', 'UNAUTHORIZED', 'PAYMENT_REQUIRED', 'FORBIDDEN', 'NOT_FOUND', + 'NOT_ALLOWED', 'NOT_ACCEPTABLE', 'PROXY_AUTH_REQUIRED', 'REQUEST_TIMEOUT', + 'CONFLICT', 'GONE', 'LENGTH_REQUIRED', 'PRECONDITION_FAILED', + 'REQUEST_ENTITY_TOO_LARGE', 'REQUEST_URI_TOO_LONG', + 'UNSUPPORTED_MEDIA_TYPE', 'REQUESTED_RANGE_NOT_SATISFIABLE', + 'EXPECTATION_FAILED', + + 'INTERNAL_SERVER_ERROR', 'NOT_IMPLEMENTED', 'BAD_GATEWAY', + 'SERVICE_UNAVAILABLE', 'GATEWAY_TIMEOUT', 'HTTP_VERSION_NOT_SUPPORTED', + 'INSUFFICIENT_STORAGE_SPACE', 'NOT_EXTENDED', + + 'RESPONSES', 'CACHED', + + 'urlparse', 'parse_qs', 'datetimeToString', 'datetimeToLogString', 'timegm', + 'stringToDatetime', 'toChunk', 'fromChunk', 'parseContentRange', + + 'StringTransport', 'HTTPClient', 'NO_BODY_CODES', 'Request', + 'PotentialDataLoss', 'HTTPChannel', 'HTTPFactory', + ] + + +# system imports +import tempfile +import base64, binascii +import cgi +import math +import time +import calendar +import warnings +import os +from io import BytesIO as StringIO + +try: + from urlparse import ( + ParseResult as ParseResultBytes, urlparse as _urlparse) + from urllib import unquote + from cgi import parse_header as _parseHeader +except ImportError: + from urllib.parse import ( + ParseResultBytes, urlparse as _urlparse, unquote_to_bytes as unquote) + + def _parseHeader(line): + # cgi.parse_header requires a str + key, pdict = cgi.parse_header(line.decode('charmap')) + + # We want the key as bytes, and cgi.parse_multipart (which consumes + # pdict) expects a dict of str keys but bytes values + key = key.encode('charmap') + pdict = {x:y.encode('charmap') for x, y in pdict.items()} + return (key, pdict) + + +from zope.interface import Attribute, Interface, implementer, provider + +# twisted imports +from twisted.python.compat import ( + _PY3, long, unicode, intToBytes, networkString, nativeString, _PY37PLUS) +from twisted.python.deprecate import deprecated +from twisted.python import log +from twisted.logger import Logger +from twisted.python.failure import Failure +from incremental import Version +from twisted.python.components import proxyForInterface +from twisted.internet import interfaces, protocol, address +from twisted.internet.defer import Deferred +from twisted.internet.interfaces import IProtocol +from twisted.internet._producer_helpers import _PullToPush +from twisted.protocols import policies, basic + +from twisted.web.iweb import ( + IRequest, IAccessLogFormatter, INonQueuedRequestFactory) +from twisted.web.http_headers import Headers, _sanitizeLinearWhitespace + +try: + from twisted.web._http2 import H2Connection + H2_ENABLED = True +except ImportError: + H2Connection = None + H2_ENABLED = False + + +from twisted.web._responses import ( + SWITCHING, + + OK, CREATED, ACCEPTED, NON_AUTHORITATIVE_INFORMATION, NO_CONTENT, + RESET_CONTENT, PARTIAL_CONTENT, MULTI_STATUS, + + MULTIPLE_CHOICE, MOVED_PERMANENTLY, FOUND, SEE_OTHER, NOT_MODIFIED, + USE_PROXY, TEMPORARY_REDIRECT, + + BAD_REQUEST, UNAUTHORIZED, PAYMENT_REQUIRED, FORBIDDEN, NOT_FOUND, + NOT_ALLOWED, NOT_ACCEPTABLE, PROXY_AUTH_REQUIRED, REQUEST_TIMEOUT, + CONFLICT, GONE, LENGTH_REQUIRED, PRECONDITION_FAILED, + REQUEST_ENTITY_TOO_LARGE, REQUEST_URI_TOO_LONG, UNSUPPORTED_MEDIA_TYPE, + REQUESTED_RANGE_NOT_SATISFIABLE, EXPECTATION_FAILED, + + INTERNAL_SERVER_ERROR, NOT_IMPLEMENTED, BAD_GATEWAY, SERVICE_UNAVAILABLE, + GATEWAY_TIMEOUT, HTTP_VERSION_NOT_SUPPORTED, INSUFFICIENT_STORAGE_SPACE, + NOT_EXTENDED, + + RESPONSES) + + +_intTypes = (int, long) + +# A common request timeout -- 1 minute. This is roughly what nginx uses, and +# so it seems to be a good choice for us too. +_REQUEST_TIMEOUT = 1 * 60 + +protocol_version = "HTTP/1.1" + +CACHED = """Magic constant returned by http.Request methods to set cache +validation headers when the request is conditional and the value fails +the condition.""" + +# backwards compatibility +responses = RESPONSES + + +# datetime parsing and formatting +weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] +monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] +weekdayname_lower = [name.lower() for name in weekdayname] +monthname_lower = [name and name.lower() for name in monthname] + +def urlparse(url): + """ + Parse an URL into six components. + + This is similar to C{urlparse.urlparse}, but rejects C{unicode} input + and always produces C{bytes} output. + + @type url: C{bytes} + + @raise TypeError: The given url was a C{unicode} string instead of a + C{bytes}. + + @return: The scheme, net location, path, params, query string, and fragment + of the URL - all as C{bytes}. + @rtype: C{ParseResultBytes} + """ + if isinstance(url, unicode): + raise TypeError("url must be bytes, not unicode") + scheme, netloc, path, params, query, fragment = _urlparse(url) + if isinstance(scheme, unicode): + scheme = scheme.encode('ascii') + netloc = netloc.encode('ascii') + path = path.encode('ascii') + query = query.encode('ascii') + fragment = fragment.encode('ascii') + return ParseResultBytes(scheme, netloc, path, params, query, fragment) + + + +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """ + Like C{cgi.parse_qs}, but with support for parsing byte strings on Python 3. + + @type qs: C{bytes} + """ + d = {} + items = [s2 for s1 in qs.split(b"&") for s2 in s1.split(b";")] + for item in items: + try: + k, v = item.split(b"=", 1) + except ValueError: + if strict_parsing: + raise + continue + if v or keep_blank_values: + k = unquote(k.replace(b"+", b" ")) + v = unquote(v.replace(b"+", b" ")) + if k in d: + d[k].append(v) + else: + d[k] = [v] + return d + + + +def datetimeToString(msSinceEpoch=None): + """ + Convert seconds since epoch to HTTP datetime string. + + @rtype: C{bytes} + """ + if msSinceEpoch == None: + msSinceEpoch = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) + s = networkString("%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + weekdayname[wd], + day, monthname[month], year, + hh, mm, ss)) + return s + + + +def datetimeToLogString(msSinceEpoch=None): + """ + Convert seconds since epoch to log datetime string. + + @rtype: C{str} + """ + if msSinceEpoch == None: + msSinceEpoch = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch) + s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % ( + day, monthname[month], year, + hh, mm, ss) + return s + + + +def timegm(year, month, day, hour, minute, second): + """ + Convert time tuple in GMT to seconds since epoch, GMT + """ + EPOCH = 1970 + if year < EPOCH: + raise ValueError("Years prior to %d not supported" % (EPOCH,)) + assert 1 <= month <= 12 + days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year) + for i in range(1, month): + days = days + calendar.mdays[i] + if month > 2 and calendar.isleap(year): + days = days + 1 + days = days + day - 1 + hours = days*24 + hour + minutes = hours*60 + minute + seconds = minutes*60 + second + return seconds + + + +def stringToDatetime(dateString): + """ + Convert an HTTP date string (one of three formats) to seconds since epoch. + + @type dateString: C{bytes} + """ + parts = nativeString(dateString).split() + + if not parts[0][0:3].lower() in weekdayname_lower: + # Weekday is stupid. Might have been omitted. + try: + return stringToDatetime(b"Sun, " + dateString) + except ValueError: + # Guess not. + pass + + partlen = len(parts) + if (partlen == 5 or partlen == 6) and parts[1].isdigit(): + # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without "GMT") + # This is the normal format + day = parts[1] + month = parts[2] + year = parts[3] + time = parts[4] + elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: + # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT + # (Note: "GMT" is literal, not a variable timezone) + # (also handles without without "GMT") + # Two digit year, yucko. + day, month, year = parts[1].split('-') + time = parts[2] + year=int(year) + if year < 69: + year = year + 2000 + elif year < 100: + year = year + 1900 + elif len(parts) == 5: + # 3rd date format: Sun Nov 6 08:49:37 1994 + # ANSI C asctime() format. + day = parts[2] + month = parts[1] + year = parts[4] + time = parts[3] + else: + raise ValueError("Unknown datetime format %r" % dateString) + + day = int(day) + month = int(monthname_lower.index(month.lower())) + year = int(year) + hour, min, sec = map(int, time.split(':')) + return int(timegm(year, month, day, hour, min, sec)) + + + +def toChunk(data): + """ + Convert string to a chunk. + + @type data: C{bytes} + + @returns: a tuple of C{bytes} representing the chunked encoding of data + """ + return (networkString('%x' % (len(data),)), b"\r\n", data, b"\r\n") + + + +def fromChunk(data): + """ + Convert chunk to string. + + @type data: C{bytes} + + @return: tuple of (result, remaining) - both C{bytes}. + + @raise ValueError: If the given data is not a correctly formatted chunked + byte string. + """ + prefix, rest = data.split(b'\r\n', 1) + length = int(prefix, 16) + if length < 0: + raise ValueError("Chunk length must be >= 0, not %d" % (length,)) + if rest[length:length + 2] != b'\r\n': + raise ValueError("chunk must end with CRLF") + return rest[:length], rest[length + 2:] + + + +def parseContentRange(header): + """ + Parse a content-range header into (start, end, realLength). + + realLength might be None if real length is not known ('*'). + """ + kind, other = header.strip().split() + if kind.lower() != "bytes": + raise ValueError("a range of type %r is not supported") + startend, realLength = other.split("/") + start, end = map(int, startend.split("-")) + if realLength == "*": + realLength = None + else: + realLength = int(realLength) + return (start, end, realLength) + + + +class _IDeprecatedHTTPChannelToRequestInterface(Interface): + """ + The interface L{HTTPChannel} expects of L{Request}. + """ + + requestHeaders = Attribute( + "A L{http_headers.Headers} instance giving all received HTTP request " + "headers.") + + responseHeaders = Attribute( + "A L{http_headers.Headers} instance holding all HTTP response " + "headers to be sent.") + + + def connectionLost(reason): + """ + The underlying connection has been lost. + + @param reason: A failure instance indicating the reason why + the connection was lost. + @type reason: L{twisted.python.failure.Failure} + """ + + + def gotLength(length): + """ + Called when L{HTTPChannel} has determined the length, if any, + of the incoming request's body. + + @param length: The length of the request's body. + @type length: L{int} if the request declares its body's length + and L{None} if it does not. + """ + + + def handleContentChunk(data): + """ + Deliver a received chunk of body data to the request. Note + this does not imply chunked transfer encoding. + + @param data: The received chunk. + @type data: L{bytes} + """ + + + def parseCookies(): + """ + Parse the request's cookies out of received headers. + """ + + + def requestReceived(command, path, version): + """ + Called when the entire request, including its body, has been + received. + + @param command: The request's HTTP command. + @type command: L{bytes} + + @param path: The request's path. Note: this is actually what + RFC7320 calls the URI. + @type path: L{bytes} + + @param version: The request's HTTP version. + @type version: L{bytes} + """ + + + def __eq__(other): + """ + Determines if two requests are the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are the same object and L{False} + when not. + @rtype: L{bool} + """ + + + def __ne__(other): + """ + Determines if two requests are not the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are not the same object and + L{False} when they are. + @rtype: L{bool} + """ + + + def __hash__(): + """ + Generate a hash value for the request. + + @return: The request's hash value. + @rtype: L{int} + """ + + + +class StringTransport: + """ + I am a StringIO wrapper that conforms for the transport API. I support + the `writeSequence' method. + """ + def __init__(self): + self.s = StringIO() + def writeSequence(self, seq): + self.s.write(b''.join(seq)) + def __getattr__(self, attr): + return getattr(self.__dict__['s'], attr) + + + +class HTTPClient(basic.LineReceiver): + """ + A client for HTTP 1.0. + + Notes: + You probably want to send a 'Host' header with the name of the site you're + connecting to, in order to not break name based virtual hosting. + + @ivar length: The length of the request body in bytes. + @type length: C{int} + + @ivar firstLine: Are we waiting for the first header line? + @type firstLine: C{bool} + + @ivar __buffer: The buffer that stores the response to the HTTP request. + @type __buffer: A C{StringIO} object. + + @ivar _header: Part or all of an HTTP request header. + @type _header: C{bytes} + """ + length = None + firstLine = True + __buffer = None + _header = b"" + + def sendCommand(self, command, path): + self.transport.writeSequence([command, b' ', path, b' HTTP/1.0\r\n']) + + def sendHeader(self, name, value): + if not isinstance(value, bytes): + # XXX Deprecate this case + value = networkString(str(value)) + santizedName = _sanitizeLinearWhitespace(name) + santizedValue = _sanitizeLinearWhitespace(value) + self.transport.writeSequence( + [santizedName, b': ', santizedValue, b'\r\n']) + + def endHeaders(self): + self.transport.write(b'\r\n') + + + def extractHeader(self, header): + """ + Given a complete HTTP header, extract the field name and value and + process the header. + + @param header: a complete HTTP request header of the form + 'field-name: value'. + @type header: C{bytes} + """ + key, val = header.split(b':', 1) + val = val.lstrip() + self.handleHeader(key, val) + if key.lower() == b'content-length': + self.length = int(val) + + + def lineReceived(self, line): + """ + Parse the status line and headers for an HTTP request. + + @param line: Part of an HTTP request header. Request bodies are parsed + in L{HTTPClient.rawDataReceived}. + @type line: C{bytes} + """ + if self.firstLine: + self.firstLine = False + l = line.split(None, 2) + version = l[0] + status = l[1] + try: + message = l[2] + except IndexError: + # sometimes there is no message + message = b"" + self.handleStatus(version, status, message) + return + if not line: + if self._header != b"": + # Only extract headers if there are any + self.extractHeader(self._header) + self.__buffer = StringIO() + self.handleEndHeaders() + self.setRawMode() + return + + if line.startswith(b'\t') or line.startswith(b' '): + # This line is part of a multiline header. According to RFC 822, in + # "unfolding" multiline headers you do not strip the leading + # whitespace on the continuing line. + self._header = self._header + line + elif self._header: + # This line starts a new header, so process the previous one. + self.extractHeader(self._header) + self._header = line + else: # First header + self._header = line + + + def connectionLost(self, reason): + self.handleResponseEnd() + + def handleResponseEnd(self): + """ + The response has been completely received. + + This callback may be invoked more than once per request. + """ + if self.__buffer is not None: + b = self.__buffer.getvalue() + self.__buffer = None + self.handleResponse(b) + + def handleResponsePart(self, data): + self.__buffer.write(data) + + def connectionMade(self): + pass + + def handleStatus(self, version, status, message): + """ + Called when the status-line is received. + + @param version: e.g. 'HTTP/1.0' + @param status: e.g. '200' + @type status: C{bytes} + @param message: e.g. 'OK' + """ + + def handleHeader(self, key, val): + """ + Called every time a header is received. + """ + + def handleEndHeaders(self): + """ + Called when all headers have been received. + """ + + + def rawDataReceived(self, data): + if self.length is not None: + data, rest = data[:self.length], data[self.length:] + self.length -= len(data) + else: + rest = b'' + self.handleResponsePart(data) + if self.length == 0: + self.handleResponseEnd() + self.setLineMode(rest) + + + +# response codes that must have empty bodies +NO_BODY_CODES = (204, 304) + + +# Sentinel object that detects people explicitly passing `queued` to Request. +_QUEUED_SENTINEL = object() + + + +def _getContentFile(length): + """ + Get a writeable file-like object to which request content can be written. + """ + if length is not None and length < 100000: + return StringIO() + return tempfile.TemporaryFile() + + + +@implementer(interfaces.IConsumer, + _IDeprecatedHTTPChannelToRequestInterface) +class Request: + """ + A HTTP request. + + Subclasses should override the process() method to determine how + the request will be processed. + + @ivar method: The HTTP method that was used, e.g. C{b'GET'}. + @type method: L{bytes} + + @ivar uri: The full encoded URI which was requested (including query + arguments), e.g. C{b'/a/b%20/c?q=v'}. + @type uri: L{bytes} + + @ivar path: The encoded path of the request URI (not including query + arguments), e.g. C{b'/a/b%20/c'}. + @type path: L{bytes} + + @ivar args: A mapping of decoded query argument names as L{bytes} to + corresponding query argument values as L{list}s of L{bytes}. + For example, for a URI with C{foo=bar&foo=baz&quux=spam} + as its query part C{args} will be C{{b'foo': [b'bar', b'baz'], + b'quux': [b'spam']}}. + @type args: L{dict} of L{bytes} to L{list} of L{bytes} + + @ivar content: A file-like object giving the request body. This may be + a file on disk, an L{io.BytesIO}, or some other type. The + implementation is free to decide on a per-request basis. + @type content: L{typing.BinaryIO} + + @ivar cookies: The cookies that will be sent in the response. + @type cookies: L{list} of L{bytes} + + @type requestHeaders: L{http_headers.Headers} + @ivar requestHeaders: All received HTTP request headers. + + @type responseHeaders: L{http_headers.Headers} + @ivar responseHeaders: All HTTP response headers to be sent. + + @ivar notifications: A L{list} of L{Deferred}s which are waiting for + notification that the response to this request has been finished + (successfully or with an error). Don't use this attribute directly, + instead use the L{Request.notifyFinish} method. + + @ivar _disconnected: A flag which is C{False} until the connection over + which this request was received is closed and which is C{True} after + that. + @type _disconnected: L{bool} + + @ivar _log: A logger instance for request related messages. + @type _log: L{twisted.logger.Logger} + """ + producer = None + finished = 0 + code = OK + code_message = RESPONSES[OK] + method = "(no method yet)" + clientproto = b"(no clientproto yet)" + uri = "(no uri yet)" + startedWriting = 0 + chunked = 0 + sentLength = 0 # content-length of response, or total bytes sent via chunking + etag = None + lastModified = None + args = None + path = None + content = None + _forceSSL = 0 + _disconnected = False + _log = Logger() + + def __init__(self, channel, queued=_QUEUED_SENTINEL): + """ + @param channel: the channel we're connected to. + @param queued: (deprecated) are we in the request queue, or can we + start writing to the transport? + """ + self.notifications = [] + self.channel = channel + + # Cache the client and server information, we'll need this + # later to be serialized and sent with the request so CGIs + # will work remotely + self.client = self.channel.getPeer() + self.host = self.channel.getHost() + + self.requestHeaders = Headers() + self.received_cookies = {} + self.responseHeaders = Headers() + self.cookies = [] # outgoing cookies + self.transport = self.channel.transport + + if queued is _QUEUED_SENTINEL: + queued = False + + self.queued = queued + + + def _cleanup(self): + """ + Called when have finished responding and are no longer queued. + """ + if self.producer: + self._log.failure( + '', + Failure( + RuntimeError( + "Producer was not unregistered for %s" % (self.uri,) + ) + ) + ) + self.unregisterProducer() + self.channel.requestDone(self) + del self.channel + if self.content is not None: + try: + self.content.close() + except OSError: + # win32 suckiness, no idea why it does this + pass + del self.content + for d in self.notifications: + d.callback(None) + self.notifications = [] + + # methods for channel - end users should not use these + + def noLongerQueued(self): + """ + Notify the object that it is no longer queued. + + We start writing whatever data we have to the transport, etc. + + This method is not intended for users. + + In 16.3 this method was changed to become a no-op, as L{Request} + objects are now never queued. + """ + pass + + + def gotLength(self, length): + """ + Called when HTTP channel got length of content in this request. + + This method is not intended for users. + + @param length: The length of the request body, as indicated by the + request headers. L{None} if the request headers do not indicate a + length. + """ + self.content = _getContentFile(length) + + + def parseCookies(self): + """ + Parse cookie headers. + + This method is not intended for users. + """ + cookieheaders = self.requestHeaders.getRawHeaders(b"cookie") + + if cookieheaders is None: + return + + for cookietxt in cookieheaders: + if cookietxt: + for cook in cookietxt.split(b';'): + cook = cook.lstrip() + try: + k, v = cook.split(b'=', 1) + self.received_cookies[k] = v + except ValueError: + pass + + + def handleContentChunk(self, data): + """ + Write a chunk of data. + + This method is not intended for users. + """ + self.content.write(data) + + + def requestReceived(self, command, path, version): + """ + Called by channel when all data has been received. + + This method is not intended for users. + + @type command: C{bytes} + @param command: The HTTP verb of this request. This has the case + supplied by the client (eg, it maybe "get" rather than "GET"). + + @type path: C{bytes} + @param path: The URI of this request. + + @type version: C{bytes} + @param version: The HTTP version of this request. + """ + clength = self.content.tell() + self.content.seek(0, 0) + self.args = {} + + self.method, self.uri = command, path + self.clientproto = version + x = self.uri.split(b'?', 1) + + if len(x) == 1: + self.path = self.uri + else: + self.path, argstring = x + self.args = parse_qs(argstring, 1) + + # Argument processing + args = self.args + ctype = self.requestHeaders.getRawHeaders(b'content-type') + if ctype is not None: + ctype = ctype[0] + + if self.method == b"POST" and ctype and clength: + mfd = b'multipart/form-data' + key, pdict = _parseHeader(ctype) + # This weird CONTENT-LENGTH param is required by + # cgi.parse_multipart() in some versions of Python 3.7+, see + # bpo-29979. It looks like this will be relaxed and backported, see + # https://github.com/python/cpython/pull/8530. + pdict["CONTENT-LENGTH"] = clength + if key == b'application/x-www-form-urlencoded': + args.update(parse_qs(self.content.read(), 1)) + elif key == mfd: + try: + if _PY37PLUS: + cgiArgs = cgi.parse_multipart( + self.content, pdict, encoding='utf8', + errors="surrogateescape") + else: + cgiArgs = cgi.parse_multipart(self.content, pdict) + + if not _PY37PLUS and _PY3: + # The parse_multipart function on Python 3 + # decodes the header bytes as iso-8859-1 and + # returns a str key -- we want bytes so encode + # it back + self.args.update({x.encode('iso-8859-1'): y + for x, y in cgiArgs.items()}) + elif _PY37PLUS: + # The parse_multipart function on Python 3.7+ + # decodes the header bytes as iso-8859-1 and + # decodes the body bytes as utf8 with + # surrogateescape -- we want bytes + self.args.update({ + x.encode('iso-8859-1'): \ + [z.encode('utf8', "surrogateescape") + if isinstance(z, str) else z for z in y] + for x, y in cgiArgs.items()}) + + else: + self.args.update(cgiArgs) + except Exception as e: + # It was a bad request, or we got a signal. + self.channel._respondToBadRequestAndDisconnect() + if isinstance(e, (TypeError, ValueError, KeyError)): + return + else: + # If it's not a userspace error from CGI, reraise + raise + + self.content.seek(0, 0) + + self.process() + + + def __repr__(self): + """ + Return a string description of the request including such information + as the request method and request URI. + + @return: A string loosely describing this L{Request} object. + @rtype: L{str} + """ + return '<%s at 0x%x method=%s uri=%s clientproto=%s>' % ( + self.__class__.__name__, + id(self), + nativeString(self.method), + nativeString(self.uri), + nativeString(self.clientproto)) + + + def process(self): + """ + Override in subclasses. + + This method is not intended for users. + """ + pass + + + # consumer interface + + def registerProducer(self, producer, streaming): + """ + Register a producer. + """ + if self.producer: + raise ValueError( + "registering producer %s before previous one (%s) was " + "unregistered" % (producer, self.producer)) + + self.streamingProducer = streaming + self.producer = producer + self.channel.registerProducer(producer, streaming) + + def unregisterProducer(self): + """ + Unregister the producer. + """ + self.channel.unregisterProducer() + self.producer = None + + + # The following is the public interface that people should be + # writing to. + def getHeader(self, key): + """ + Get an HTTP request header. + + @type key: C{bytes} or C{str} + @param key: The name of the header to get the value of. + + @rtype: C{bytes} or C{str} or L{None} + @return: The value of the specified header, or L{None} if that header + was not present in the request. The string type of the result + matches the type of L{key}. + """ + value = self.requestHeaders.getRawHeaders(key) + if value is not None: + return value[-1] + + + def getCookie(self, key): + """ + Get a cookie that was sent from the network. + + @type key: C{bytes} + @param key: The name of the cookie to get. + + @rtype: C{bytes} or C{None} + @returns: The value of the specified cookie, or L{None} if that cookie + was not present in the request. + """ + return self.received_cookies.get(key) + + + def notifyFinish(self): + """ + Notify when the response to this request has finished. + + @note: There are some caveats around the reliability of the delivery of + this notification. + + 1. If this L{Request}'s channel is paused, the notification + will not be delivered. This can happen in one of two ways; + either you can call C{request.transport.pauseProducing} + yourself, or, + + 2. In order to deliver this notification promptly when a client + disconnects, the reactor must continue reading from the + transport, so that it can tell when the underlying network + connection has gone away. Twisted Web will only keep + reading up until a finite (small) maximum buffer size before + it gives up and pauses the transport itself. If this + occurs, you will not discover that the connection has gone + away until a timeout fires or until the application attempts + to send some data via L{Request.write}. + + 3. It is theoretically impossible to distinguish between + successfully I{sending} a response and the peer successfully + I{receiving} it. There are several networking edge cases + where the L{Deferred}s returned by C{notifyFinish} will + indicate success, but the data will never be received. + There are also edge cases where the connection will appear + to fail, but in reality the response was delivered. As a + result, the information provided by the result of the + L{Deferred}s returned by this method should be treated as a + guess; do not make critical decisions in your applications + based upon it. + + @rtype: L{Deferred} + @return: A L{Deferred} which will be triggered when the request is + finished -- with a L{None} value if the request finishes + successfully or with an error if the request is interrupted by an + error (for example, the client closing the connection prematurely). + """ + self.notifications.append(Deferred()) + return self.notifications[-1] + + + def finish(self): + """ + Indicate that all response data has been written to this L{Request}. + """ + if self._disconnected: + raise RuntimeError( + "Request.finish called on a request after its connection was lost; " + "use Request.notifyFinish to keep track of this.") + if self.finished: + warnings.warn("Warning! request.finish called twice.", stacklevel=2) + return + + if not self.startedWriting: + # write headers + self.write(b'') + + if self.chunked: + # write last chunk and closing CRLF + self.channel.write(b"0\r\n\r\n") + + # log request + if (hasattr(self.channel, "factory") and + self.channel.factory is not None): + self.channel.factory.log(self) + + self.finished = 1 + if not self.queued: + self._cleanup() + + + def write(self, data): + """ + Write some data as a result of an HTTP request. The first + time this is called, it writes out response data. + + @type data: C{bytes} + @param data: Some bytes to be sent as part of the response body. + """ + if self.finished: + raise RuntimeError('Request.write called on a request after ' + 'Request.finish was called.') + + if self._disconnected: + # Don't attempt to write any data to a disconnected client. + # The RuntimeError exception will be thrown as usual when + # request.finish is called + return + + if not self.startedWriting: + self.startedWriting = 1 + version = self.clientproto + code = intToBytes(self.code) + reason = self.code_message + headers = [] + + # if we don't have a content length, we send data in + # chunked mode, so that we can support pipelining in + # persistent connections. + if ((version == b"HTTP/1.1") and + (self.responseHeaders.getRawHeaders(b'content-length') is None) and + self.method != b"HEAD" and self.code not in NO_BODY_CODES): + headers.append((b'Transfer-Encoding', b'chunked')) + self.chunked = 1 + + if self.lastModified is not None: + if self.responseHeaders.hasHeader(b'last-modified'): + self._log.info( + "Warning: last-modified specified both in" + " header list and lastModified attribute." + ) + else: + self.responseHeaders.setRawHeaders( + b'last-modified', + [datetimeToString(self.lastModified)]) + + if self.etag is not None: + self.responseHeaders.setRawHeaders(b'ETag', [self.etag]) + + for name, values in self.responseHeaders.getAllRawHeaders(): + for value in values: + headers.append((name, value)) + + for cookie in self.cookies: + headers.append((b'Set-Cookie', cookie)) + + self.channel.writeHeaders(version, code, reason, headers) + + # if this is a "HEAD" request, we shouldn't return any data + if self.method == b"HEAD": + self.write = lambda data: None + return + + # for certain result codes, we should never return any data + if self.code in NO_BODY_CODES: + self.write = lambda data: None + return + + self.sentLength = self.sentLength + len(data) + if data: + if self.chunked: + self.channel.writeSequence(toChunk(data)) + else: + self.channel.write(data) + + def addCookie(self, k, v, expires=None, domain=None, path=None, + max_age=None, comment=None, secure=None, httpOnly=False, + sameSite=None): + """ + Set an outgoing HTTP cookie. + + In general, you should consider using sessions instead of cookies, see + L{twisted.web.server.Request.getSession} and the + L{twisted.web.server.Session} class for details. + + @param k: cookie name + @type k: L{bytes} or L{unicode} + + @param v: cookie value + @type v: L{bytes} or L{unicode} + + @param expires: cookie expire attribute value in + "Wdy, DD Mon YYYY HH:MM:SS GMT" format + @type expires: L{bytes} or L{unicode} + + @param domain: cookie domain + @type domain: L{bytes} or L{unicode} + + @param path: cookie path + @type path: L{bytes} or L{unicode} + + @param max_age: cookie expiration in seconds from reception + @type max_age: L{bytes} or L{unicode} + + @param comment: cookie comment + @type comment: L{bytes} or L{unicode} + + @param secure: direct browser to send the cookie on encrypted + connections only + @type secure: L{bool} + + @param httpOnly: direct browser not to expose cookies through channels + other than HTTP (and HTTPS) requests + @type httpOnly: L{bool} + + @param sameSite: One of L{None} (default), C{'lax'} or C{'strict'}. + Direct browsers not to send this cookie on cross-origin requests. + Please see: + U{https://tools.ietf.org/html/draft-west-first-party-cookies-07} + @type sameSite: L{None}, L{bytes} or L{unicode} + + @raises: L{DeprecationWarning} if an argument is not L{bytes} or + L{unicode}. + L{ValueError} if the value for C{sameSite} is not supported. + """ + def _ensureBytes(val): + """ + Ensure that C{val} is bytes, encoding using UTF-8 if + needed. + + @param val: L{bytes} or L{unicode} + + @return: L{bytes} + """ + if val is None: + # It's None, so we don't want to touch it + return val + + if isinstance(val, bytes): + return val + else: + return val.encode('utf8') + + + def _sanitize(val): + """ + Replace linear whitespace (C{\r}, C{\n}, C{\r\n}) and + semicolons C{;} in C{val} with a single space. + + @param val: L{bytes} + @return: L{bytes} + """ + return _sanitizeLinearWhitespace(val).replace(b';', b' ') + + cookie = ( + _sanitize(_ensureBytes(k)) + + b"=" + + _sanitize(_ensureBytes(v))) + if expires is not None: + cookie = cookie + b"; Expires=" + _sanitize(_ensureBytes(expires)) + if domain is not None: + cookie = cookie + b"; Domain=" + _sanitize(_ensureBytes(domain)) + if path is not None: + cookie = cookie + b"; Path=" + _sanitize(_ensureBytes(path)) + if max_age is not None: + cookie = cookie + b"; Max-Age=" + _sanitize(_ensureBytes(max_age)) + if comment is not None: + cookie = cookie + b"; Comment=" + _sanitize(_ensureBytes(comment)) + if secure: + cookie = cookie + b"; Secure" + if httpOnly: + cookie = cookie + b"; HttpOnly" + if sameSite: + sameSite = _ensureBytes(sameSite).lower() + if sameSite not in [b"lax", b"strict"]: + raise ValueError( + "Invalid value for sameSite: " + repr(sameSite)) + cookie += b"; SameSite=" + sameSite + self.cookies.append(cookie) + + def setResponseCode(self, code, message=None): + """ + Set the HTTP response code. + + @type code: L{int} + @type message: L{bytes} + """ + if not isinstance(code, _intTypes): + raise TypeError("HTTP response code must be int or long") + self.code = code + if message: + if not isinstance(message, bytes): + raise TypeError("HTTP response status message must be bytes") + self.code_message = message + else: + self.code_message = RESPONSES.get(code, b"Unknown Status") + + + def setHeader(self, name, value): + """ + Set an HTTP response header. Overrides any previously set values for + this header. + + @type k: L{bytes} or L{str} + @param k: The name of the header for which to set the value. + + @type v: L{bytes} or L{str} + @param v: The value to set for the named header. A L{str} will be + UTF-8 encoded, which may not interoperable with other + implementations. Avoid passing non-ASCII characters if possible. + """ + self.responseHeaders.setRawHeaders(name, [value]) + + + def redirect(self, url): + """ + Utility function that does a redirect. + + Set the response code to L{FOUND} and the I{Location} header to the + given URL. + + The request should have C{finish()} called after this. + + @param url: I{Location} header value. + @type url: L{bytes} or L{str} + """ + self.setResponseCode(FOUND) + self.setHeader(b"location", url) + + + def setLastModified(self, when): + """ + Set the C{Last-Modified} time for the response to this request. + + If I am called more than once, I ignore attempts to set + Last-Modified earlier, only replacing the Last-Modified time + if it is to a later value. + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} if appropriate for the time given. + + @param when: The last time the resource being returned was + modified, in seconds since the epoch. + @type when: number + @return: If I am a I{If-Modified-Since} conditional request and + the time given is not newer than the condition, I return + L{http.CACHED<CACHED>} to indicate that you should write no + body. Otherwise, I return a false value. + """ + # time.time() may be a float, but the HTTP-date strings are + # only good for whole seconds. + when = int(math.ceil(when)) + if (not self.lastModified) or (self.lastModified < when): + self.lastModified = when + + modifiedSince = self.getHeader(b'if-modified-since') + if modifiedSince: + firstPart = modifiedSince.split(b';', 1)[0] + try: + modifiedSince = stringToDatetime(firstPart) + except ValueError: + return None + if modifiedSince >= self.lastModified: + self.setResponseCode(NOT_MODIFIED) + return CACHED + return None + + def setETag(self, etag): + """ + Set an C{entity tag} for the outgoing response. + + That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used + for comparing two or more entities from the same requested + resource.\" + + If I am a conditional request, I may modify my response code + to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate + for the tag given. + + @param etag: The entity tag for the resource being returned. + @type etag: string + @return: If I am a C{If-None-Match} conditional request and + the tag matches one in the request, I return + L{http.CACHED<CACHED>} to indicate that you should write + no body. Otherwise, I return a false value. + """ + if etag: + self.etag = etag + + tags = self.getHeader(b"if-none-match") + if tags: + tags = tags.split() + if (etag in tags) or (b'*' in tags): + self.setResponseCode(((self.method in (b"HEAD", b"GET")) + and NOT_MODIFIED) + or PRECONDITION_FAILED) + return CACHED + return None + + + def getAllHeaders(self): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{self.requestHeaders.getAllRawHeaders()} may be preferred. + """ + headers = {} + for k, v in self.requestHeaders.getAllRawHeaders(): + headers[k.lower()] = v[-1] + return headers + + + def getRequestHostname(self): + """ + Get the hostname that the user passed in to the request. + + This will either use the Host: header (if it is available) or the + host we are listening on if the header is unavailable. + + @returns: the requested hostname + @rtype: C{bytes} + """ + # XXX This method probably has no unit tests. I changed it a ton and + # nothing failed. + host = self.getHeader(b'host') + if host: + return host.split(b':', 1)[0] + return networkString(self.getHost().host) + + + def getHost(self): + """ + Get my originally requesting transport's host. + + Don't rely on the 'transport' attribute, since Request objects may be + copied remotely. For information on this method's return value, see + L{twisted.internet.tcp.Port}. + """ + return self.host + + def setHost(self, host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + This method is useful for working with reverse HTTP proxies (e.g. + both Squid and Apache's mod_proxy can do this), when the address + the HTTP client is using is different than the one we're listening on. + + For example, Apache may be listening on https://www.example.com/, and + then forwarding requests to http://localhost:8080/, but we don't want + HTML produced by Twisted to say b'http://localhost:8080/', they should + say b'https://www.example.com/', so we do:: + + request.setHost(b'www.example.com', 443, ssl=1) + + @type host: C{bytes} + @param host: The value to which to change the host header. + + @type ssl: C{bool} + @param ssl: A flag which, if C{True}, indicates that the request is + considered secure (if C{True}, L{isSecure} will return C{True}). + """ + self._forceSSL = ssl # set first so isSecure will work + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostHeader = host + else: + hostHeader = host + b":" + intToBytes(port) + self.requestHeaders.setRawHeaders(b"host", [hostHeader]) + self.host = address.IPv4Address("TCP", host, port) + + + def getClientIP(self): + """ + Return the IP address of the client who submitted this request. + + This method is B{deprecated}. Use L{getClientAddress} instead. + + @returns: the client IP address + @rtype: C{str} + """ + if isinstance(self.client, (address.IPv4Address, address.IPv6Address)): + return self.client.host + else: + return None + + + def getClientAddress(self): + """ + Return the address of the client who submitted this request. + + This may not be a network address (e.g., a server listening on + a UNIX domain socket will cause this to return + L{UNIXAddress}). Callers must check the type of the returned + address. + + @since: 18.4 + + @return: the client's address. + @rtype: L{IAddress} + """ + return self.client + + + def isSecure(self): + """ + Return L{True} if this request is using a secure transport. + + Normally this method returns L{True} if this request's L{HTTPChannel} + instance is using a transport that implements + L{interfaces.ISSLTransport}. + + This will also return L{True} if L{Request.setHost} has been called + with C{ssl=True}. + + @returns: L{True} if this request is secure + @rtype: C{bool} + """ + if self._forceSSL: + return True + channel = getattr(self, 'channel', None) + if channel is None: + return False + return channel.isSecure() + + + def _authorize(self): + # Authorization, (mostly) per the RFC + try: + authh = self.getHeader(b"Authorization") + if not authh: + self.user = self.password = b'' + return + bas, upw = authh.split() + if bas.lower() != b"basic": + raise ValueError() + upw = base64.decodestring(upw) + self.user, self.password = upw.split(b':', 1) + except (binascii.Error, ValueError): + self.user = self.password = b'' + except: + self._log.failure('') + self.user = self.password = b'' + + + def getUser(self): + """ + Return the HTTP user sent with this request, if any. + + If no user was supplied, return the empty string. + + @returns: the HTTP user, if any + @rtype: C{bytes} + """ + try: + return self.user + except: + pass + self._authorize() + return self.user + + + def getPassword(self): + """ + Return the HTTP password sent with this request, if any. + + If no password was supplied, return the empty string. + + @returns: the HTTP password, if any + @rtype: C{bytes} + """ + try: + return self.password + except: + pass + self._authorize() + return self.password + + + def connectionLost(self, reason): + """ + There is no longer a connection for this request to respond over. + Clean up anything which can't be useful anymore. + """ + self._disconnected = True + self.channel = None + if self.content is not None: + self.content.close() + for d in self.notifications: + d.errback(reason) + self.notifications = [] + + + def loseConnection(self): + """ + Pass the loseConnection through to the underlying channel. + """ + if self.channel is not None: + self.channel.loseConnection() + + + def __eq__(self, other): + """ + Determines if two requests are the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are the same object and L{False} + when not. + @rtype: L{bool} + """ + # When other is not an instance of request, return + # NotImplemented so that Python uses other.__eq__ to perform + # the comparison. This ensures that a Request proxy generated + # by proxyForInterface compares equal to an actual Request + # instanceby turning request != proxy into proxy != request. + if isinstance(other, Request): + return self is other + return NotImplemented + + + def __ne__(self, other): + """ + Determines if two requests are not the same object. + + @param other: Another object whose identity will be compared + to this instance's. + + @return: L{True} when the two are not the same object and + L{False} when they are. + @rtype: L{bool} + """ + # When other is not an instance of request, return + # NotImplemented so that Python uses other.__ne__ to perform + # the comparison. This ensures that a Request proxy generated + # by proxyForInterface can compare equal to an actual Request + # instance by turning request != proxy into proxy != request. + if isinstance(other, Request): + return self is not other + return NotImplemented + + + def __hash__(self): + """ + A C{Request} is hashable so that it can be used as a mapping key. + + @return: A C{int} based on the instance's identity. + """ + return id(self) + + + +Request.getClientIP = deprecated( + Version('Twisted', 18, 4, 0), + replacement="getClientAddress", +)(Request.getClientIP) + +Request.noLongerQueued = deprecated( + Version("Twisted", 16, 3, 0))(Request.noLongerQueued) + + +class _DataLoss(Exception): + """ + L{_DataLoss} indicates that not all of a message body was received. This + is only one of several possible exceptions which may indicate that data + was lost. Because of this, it should not be checked for by + specifically; any unexpected exception should be treated as having + caused data loss. + """ + + + +class PotentialDataLoss(Exception): + """ + L{PotentialDataLoss} may be raised by a transfer encoding decoder's + C{noMoreData} method to indicate that it cannot be determined if the + entire response body has been delivered. This only occurs when making + requests to HTTP servers which do not set I{Content-Length} or a + I{Transfer-Encoding} in the response because in this case the end of the + response is indicated by the connection being closed, an event which may + also be due to a transient network problem or other error. + """ + + + +class _MalformedChunkedDataError(Exception): + """ + C{_ChunkedTranferDecoder} raises L{_MalformedChunkedDataError} from its + C{dataReceived} method when it encounters malformed data. This exception + indicates a client-side error. If this exception is raised, the connection + should be dropped with a 400 error. + """ + + + +class _IdentityTransferDecoder(object): + """ + Protocol for accumulating bytes up to a specified length. This handles the + case where no I{Transfer-Encoding} is specified. + + @ivar contentLength: Counter keeping track of how many more bytes there are + to receive. + + @ivar dataCallback: A one-argument callable which will be invoked each + time application data is received. + + @ivar finishCallback: A one-argument callable which will be invoked when + the terminal chunk is received. It will be invoked with all bytes + which were delivered to this protocol which came after the terminal + chunk. + """ + def __init__(self, contentLength, dataCallback, finishCallback): + self.contentLength = contentLength + self.dataCallback = dataCallback + self.finishCallback = finishCallback + + + def dataReceived(self, data): + """ + Interpret the next chunk of bytes received. Either deliver them to the + data callback or invoke the finish callback if enough bytes have been + received. + + @raise RuntimeError: If the finish callback has already been invoked + during a previous call to this methood. + """ + if self.dataCallback is None: + raise RuntimeError( + "_IdentityTransferDecoder cannot decode data after finishing") + + if self.contentLength is None: + self.dataCallback(data) + elif len(data) < self.contentLength: + self.contentLength -= len(data) + self.dataCallback(data) + else: + # Make the state consistent before invoking any code belonging to + # anyone else in case noMoreData ends up being called beneath this + # stack frame. + contentLength = self.contentLength + dataCallback = self.dataCallback + finishCallback = self.finishCallback + self.dataCallback = self.finishCallback = None + self.contentLength = 0 + + dataCallback(data[:contentLength]) + finishCallback(data[contentLength:]) + + + def noMoreData(self): + """ + All data which will be delivered to this decoder has been. Check to + make sure as much data as was expected has been received. + + @raise PotentialDataLoss: If the content length is unknown. + @raise _DataLoss: If the content length is known and fewer than that + many bytes have been delivered. + + @return: L{None} + """ + finishCallback = self.finishCallback + self.dataCallback = self.finishCallback = None + if self.contentLength is None: + finishCallback(b'') + raise PotentialDataLoss() + elif self.contentLength != 0: + raise _DataLoss() + + + +class _ChunkedTransferDecoder(object): + """ + Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616, + section 3.6.1. This protocol can interpret the contents of a request or + response body which uses the I{chunked} Transfer-Encoding. It cannot + interpret any of the rest of the HTTP protocol. + + It may make sense for _ChunkedTransferDecoder to be an actual IProtocol + implementation. Currently, the only user of this class will only ever + call dataReceived on it. However, it might be an improvement if the + user could connect this to a transport and deliver connection lost + notification. This way, `dataCallback` becomes `self.transport.write` + and perhaps `finishCallback` becomes `self.transport.loseConnection()` + (although I'm not sure where the extra data goes in that case). This + could also allow this object to indicate to the receiver of data that + the stream was not completely received, an error case which should be + noticed. -exarkun + + @ivar dataCallback: A one-argument callable which will be invoked each + time application data is received. + + @ivar finishCallback: A one-argument callable which will be invoked when + the terminal chunk is received. It will be invoked with all bytes + which were delivered to this protocol which came after the terminal + chunk. + + @ivar length: Counter keeping track of how many more bytes in a chunk there + are to receive. + + @ivar state: One of C{'CHUNK_LENGTH'}, C{'CRLF'}, C{'TRAILER'}, + C{'BODY'}, or C{'FINISHED'}. For C{'CHUNK_LENGTH'}, data for the + chunk length line is currently being read. For C{'CRLF'}, the CR LF + pair which follows each chunk is being read. For C{'TRAILER'}, the CR + LF pair which follows the terminal 0-length chunk is currently being + read. For C{'BODY'}, the contents of a chunk are being read. For + C{'FINISHED'}, the last chunk has been completely read and no more + input is valid. + """ + state = 'CHUNK_LENGTH' + + def __init__(self, dataCallback, finishCallback): + self.dataCallback = dataCallback + self.finishCallback = finishCallback + self._buffer = b'' + + + def _dataReceived_CHUNK_LENGTH(self, data): + if b'\r\n' in data: + line, rest = data.split(b'\r\n', 1) + parts = line.split(b';') + try: + self.length = int(parts[0], 16) + except ValueError: + raise _MalformedChunkedDataError( + "Chunk-size must be an integer.") + if self.length == 0: + self.state = 'TRAILER' + else: + self.state = 'BODY' + return rest + else: + self._buffer = data + return b'' + + + def _dataReceived_CRLF(self, data): + if data.startswith(b'\r\n'): + self.state = 'CHUNK_LENGTH' + return data[2:] + else: + self._buffer = data + return b'' + + + def _dataReceived_TRAILER(self, data): + if data.startswith(b'\r\n'): + data = data[2:] + self.state = 'FINISHED' + self.finishCallback(data) + else: + self._buffer = data + return b'' + + + def _dataReceived_BODY(self, data): + if len(data) >= self.length: + chunk, data = data[:self.length], data[self.length:] + self.dataCallback(chunk) + self.state = 'CRLF' + return data + elif len(data) < self.length: + self.length -= len(data) + self.dataCallback(data) + return b'' + + + def _dataReceived_FINISHED(self, data): + raise RuntimeError( + "_ChunkedTransferDecoder.dataReceived called after last " + "chunk was processed") + + + def dataReceived(self, data): + """ + Interpret data from a request or response body which uses the + I{chunked} Transfer-Encoding. + """ + data = self._buffer + data + self._buffer = b'' + while data: + data = getattr(self, '_dataReceived_%s' % (self.state,))(data) + + + def noMoreData(self): + """ + Verify that all data has been received. If it has not been, raise + L{_DataLoss}. + """ + if self.state != 'FINISHED': + raise _DataLoss( + "Chunked decoder in %r state, still expecting more data to " + "get to 'FINISHED' state." % (self.state,)) + + + +@implementer(interfaces.IPushProducer) +class _NoPushProducer(object): + """ + A no-op version of L{interfaces.IPushProducer}, used to abstract over the + possibility that a L{HTTPChannel} transport does not provide + L{IPushProducer}. + """ + def pauseProducing(self): + """ + Pause producing data. + + Tells a producer that it has produced too much data to process for + the time being, and to stop until resumeProducing() is called. + """ + pass + + + def resumeProducing(self): + """ + Resume producing data. + + This tells a producer to re-add itself to the main loop and produce + more data for its consumer. + """ + pass + + + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + @param producer: The producer to register. + @param streaming: Whether this is a streaming producer or not. + """ + pass + + + def unregisterProducer(self): + """ + Stop consuming data from a producer, without disconnecting. + """ + pass + + + +@implementer(interfaces.ITransport, + interfaces.IPushProducer, + interfaces.IConsumer) +class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin): + """ + A receiver for HTTP requests. + + The L{HTTPChannel} provides L{interfaces.ITransport} and + L{interfaces.IConsumer} to the L{Request} objects it creates. It also + implements L{interfaces.IPushProducer} to C{self.transport}, allowing the + transport to pause it. + + @ivar MAX_LENGTH: Maximum length for initial request line and each line + from the header. + + @ivar _transferDecoder: L{None} or a decoder instance if the request body + uses the I{chunked} Transfer-Encoding. + @type _transferDecoder: L{_ChunkedTransferDecoder} + + @ivar maxHeaders: Maximum number of headers allowed per request. + @type maxHeaders: C{int} + + @ivar totalHeadersSize: Maximum bytes for request line plus all headers + from the request. + @type totalHeadersSize: C{int} + + @ivar _receivedHeaderSize: Bytes received so far for the header. + @type _receivedHeaderSize: C{int} + + @ivar _handlingRequest: Whether a request is currently being processed. + @type _handlingRequest: L{bool} + + @ivar _dataBuffer: Any data that has been received from the connection + while processing an outstanding request. + @type _dataBuffer: L{list} of L{bytes} + + @ivar _networkProducer: Either the transport, if it provides + L{interfaces.IPushProducer}, or a null implementation of + L{interfaces.IPushProducer}. Used to attempt to prevent the transport + from producing excess data when we're responding to a request. + @type _networkProducer: L{interfaces.IPushProducer} + + @ivar _requestProducer: If the L{Request} object or anything it calls + registers itself as an L{interfaces.IProducer}, it will be stored here. + This is used to create a producing pipeline: pause/resume producing + methods will be propagated from the C{transport}, through the + L{HTTPChannel} instance, to the c{_requestProducer}. + + The reason we proxy through the producing methods rather than the old + behaviour (where we literally just set the L{Request} object as the + producer on the transport) is because we want to be able to exert + backpressure on the client to prevent it from sending in arbitrarily + many requests without ever reading responses. Essentially, if the + client never reads our responses we will eventually stop reading its + requests. + + @type _requestProducer: L{interfaces.IPushProducer} + + @ivar _requestProducerStreaming: A boolean that tracks whether the producer + on the L{Request} side of this channel has registered itself as a + L{interfaces.IPushProducer} or an L{interfaces.IPullProducer}. + @type _requestProducerStreaming: L{bool} or L{None} + + @ivar _waitingForTransport: A boolean that tracks whether the transport has + asked us to stop producing. This is used to keep track of what we're + waiting for: if the transport has asked us to stop producing then we + don't want to unpause the transport until it asks us to produce again. + @type _waitingForTransport: L{bool} + + @ivar abortTimeout: The number of seconds to wait after we attempt to shut + the transport down cleanly to give up and forcibly terminate it. This + is only used when we time a connection out, to prevent errors causing + the FD to get leaked. If this is L{None}, we will wait forever. + @type abortTimeout: L{int} + + @ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be + used to forcibly close the transport if it doesn't close cleanly. + @type _abortingCall: L{twisted.internet.base.DelayedCall} + + @ivar _optimisticEagerReadSize: When a resource takes a long time to answer + a request (via L{twisted.web.server.NOT_DONE_YET}, hopefully one day by + a L{Deferred}), we would like to be able to let that resource know + about the underlying transport disappearing as promptly as possible, + via L{Request.notifyFinish}, and therefore via + C{self.requests[...].connectionLost()} on this L{HTTPChannel}. + + However, in order to simplify application logic, we implement + head-of-line blocking, and do not relay pipelined requests to the + application until the previous request has been answered. This means + that said application cannot dispose of any entity-body that comes in + from those subsequent requests, which may be arbitrarily large, and it + may need to be buffered in memory. + + To implement this tradeoff between prompt notification when possible + (in the most frequent case of non-pipelined requests) and correct + behavior when not (say, if a client sends a very long-running GET + request followed by a PUT request with a very large body) we will + continue reading pipelined requests into C{self._dataBuffer} up to a + given limit. + + C{_optimisticEagerReadSize} is the number of bytes we will accept from + the client and buffer before pausing the transport. + + This behavior has been in place since Twisted 17.9.0 . + + @type _optimisticEagerReadSize: L{int} + """ + + maxHeaders = 500 + totalHeadersSize = 16384 + abortTimeout = 15 + + length = 0 + persistent = 1 + __header = b'' + __first_line = 1 + __content = None + + # set in instances or subclasses + requestFactory = Request + + _savedTimeOut = None + _receivedHeaderCount = 0 + _receivedHeaderSize = 0 + _requestProducer = None + _requestProducerStreaming = None + _waitingForTransport = False + _abortingCall = None + _optimisticEagerReadSize = 0x4000 + _log = Logger() + + def __init__(self): + # the request queue + self.requests = [] + self._handlingRequest = False + self._dataBuffer = [] + self._transferDecoder = None + + + def connectionMade(self): + self.setTimeout(self.timeOut) + self._networkProducer = interfaces.IPushProducer( + self.transport, _NoPushProducer() + ) + self._networkProducer.registerProducer(self, True) + + + def lineReceived(self, line): + """ + Called for each line from request until the end of headers when + it enters binary mode. + """ + self.resetTimeout() + + self._receivedHeaderSize += len(line) + if (self._receivedHeaderSize > self.totalHeadersSize): + self._respondToBadRequestAndDisconnect() + return + + if self.__first_line: + # if this connection is not persistent, drop any data which + # the client (illegally) sent after the last request. + if not self.persistent: + self.dataReceived = self.lineReceived = lambda *args: None + return + + # IE sends an extraneous empty line (\r\n) after a POST request; + # eat up such a line, but only ONCE + if not line and self.__first_line == 1: + self.__first_line = 2 + return + + # create a new Request object + if INonQueuedRequestFactory.providedBy(self.requestFactory): + request = self.requestFactory(self) + else: + request = self.requestFactory(self, len(self.requests)) + self.requests.append(request) + + self.__first_line = 0 + + parts = line.split() + if len(parts) != 3: + self._respondToBadRequestAndDisconnect() + return + command, request, version = parts + try: + command.decode("ascii") + except UnicodeDecodeError: + self._respondToBadRequestAndDisconnect() + return + + self._command = command + self._path = request + self._version = version + elif line == b'': + # End of headers. + if self.__header: + ok = self.headerReceived(self.__header) + # If the last header we got is invalid, we MUST NOT proceed + # with processing. We'll have sent a 400 anyway, so just stop. + if not ok: + return + self.__header = b'' + self.allHeadersReceived() + if self.length == 0: + self.allContentReceived() + else: + self.setRawMode() + elif line[0] in b' \t': + # Continuation of a multi line header. + self.__header = self.__header + b'\n' + line + # Regular header line. + # Processing of header line is delayed to allow accumulating multi + # line headers. + else: + if self.__header: + self.headerReceived(self.__header) + self.__header = line + + + def _finishRequestBody(self, data): + self.allContentReceived() + self._dataBuffer.append(data) + + def _maybeChooseTransferDecoder(self, header, data): + """ + If the provided header is C{content-length} or + C{transfer-encoding}, choose the appropriate decoder if any. + + Returns L{True} if the request can proceed and L{False} if not. + """ + + def fail(): + self._respondToBadRequestAndDisconnect() + self.length = None + return False + + # Can this header determine the length? + if header == b'content-length': + try: + length = int(data) + except ValueError: + return fail() + newTransferDecoder = _IdentityTransferDecoder( + length, self.requests[-1].handleContentChunk, + self._finishRequestBody) + elif header == b'transfer-encoding': + # XXX Rather poorly tested code block, apparently only exercised by + # test_chunkedEncoding + if data.lower() == b'chunked': + length = None + newTransferDecoder = _ChunkedTransferDecoder( + self.requests[-1].handleContentChunk, + self._finishRequestBody) + elif data.lower() == b'identity': + return True + else: + return fail() + else: + # It's not a length related header, so exit + return True + + if self._transferDecoder is not None: + return fail() + else: + self.length = length + self._transferDecoder = newTransferDecoder + return True + + + def headerReceived(self, line): + """ + Do pre-processing (for content-length) and store this header away. + Enforce the per-request header limit. + + @type line: C{bytes} + @param line: A line from the header section of a request, excluding the + line delimiter. + + @return: A flag indicating whether the header was valid. + @rtype: L{bool} + """ + try: + header, data = line.split(b':', 1) + except ValueError: + self._respondToBadRequestAndDisconnect() + return False + + if not header or header[-1:].isspace(): + self._respondToBadRequestAndDisconnect() + return False + + header = header.lower() + data = data.strip() + + if not self._maybeChooseTransferDecoder(header, data): + return False + + reqHeaders = self.requests[-1].requestHeaders + values = reqHeaders.getRawHeaders(header) + if values is not None: + values.append(data) + else: + reqHeaders.setRawHeaders(header, [data]) + + self._receivedHeaderCount += 1 + if self._receivedHeaderCount > self.maxHeaders: + self._respondToBadRequestAndDisconnect() + return False + + return True + + + def allContentReceived(self): + command = self._command + path = self._path + version = self._version + + # reset ALL state variables, so we don't interfere with next request + self.length = 0 + self._receivedHeaderCount = 0 + self._receivedHeaderSize = 0 + self.__first_line = 1 + self._transferDecoder = None + del self._command, self._path, self._version + + # Disable the idle timeout, in case this request takes a long + # time to finish generating output. + if self.timeOut: + self._savedTimeOut = self.setTimeout(None) + + self._handlingRequest = True + + req = self.requests[-1] + req.requestReceived(command, path, version) + + + def dataReceived(self, data): + """ + Data was received from the network. Process it. + """ + # If we're currently handling a request, buffer this data. + if self._handlingRequest: + self._dataBuffer.append(data) + if ( + (sum(map(len, self._dataBuffer)) > + self._optimisticEagerReadSize) + and not self._waitingForTransport + ): + # If we received more data than a small limit while processing + # the head-of-line request, apply TCP backpressure to our peer + # to get them to stop sending more request data until we're + # ready. See docstring for _optimisticEagerReadSize above. + self._networkProducer.pauseProducing() + return + return basic.LineReceiver.dataReceived(self, data) + + + def rawDataReceived(self, data): + self.resetTimeout() + + try: + self._transferDecoder.dataReceived(data) + except _MalformedChunkedDataError: + self._respondToBadRequestAndDisconnect() + + + def allHeadersReceived(self): + req = self.requests[-1] + req.parseCookies() + self.persistent = self.checkPersistence(req, self._version) + req.gotLength(self.length) + # Handle 'Expect: 100-continue' with automated 100 response code, + # a simplistic implementation of RFC 2686 8.2.3: + expectContinue = req.requestHeaders.getRawHeaders(b'expect') + if (expectContinue and expectContinue[0].lower() == b'100-continue' and + self._version == b'HTTP/1.1'): + self._send100Continue() + + + def checkPersistence(self, request, version): + """ + Check if the channel should close or not. + + @param request: The request most recently received over this channel + against which checks will be made to determine if this connection + can remain open after a matching response is returned. + + @type version: C{bytes} + @param version: The version of the request. + + @rtype: C{bool} + @return: A flag which, if C{True}, indicates that this connection may + remain open to receive another request; if C{False}, the connection + must be closed in order to indicate the completion of the response + to C{request}. + """ + connection = request.requestHeaders.getRawHeaders(b'connection') + if connection: + tokens = [t.lower() for t in connection[0].split(b' ')] + else: + tokens = [] + + # Once any HTTP 0.9 or HTTP 1.0 request is received, the connection is + # no longer allowed to be persistent. At this point in processing the + # request, we don't yet know if it will be possible to set a + # Content-Length in the response. If it is not, then the connection + # will have to be closed to end an HTTP 0.9 or HTTP 1.0 response. + + # If the checkPersistence call happened later, after the Content-Length + # has been determined (or determined not to be set), it would probably + # be possible to have persistent connections with HTTP 0.9 and HTTP 1.0. + # This may not be worth the effort, though. Just use HTTP 1.1, okay? + + if version == b"HTTP/1.1": + if b'close' in tokens: + request.responseHeaders.setRawHeaders(b'connection', [b'close']) + return False + else: + return True + else: + return False + + + def requestDone(self, request): + """ + Called by first request in queue when it is done. + """ + if request != self.requests[0]: raise TypeError + del self.requests[0] + + # We should only resume the producer if we're not waiting for the + # transport. + if not self._waitingForTransport: + self._networkProducer.resumeProducing() + + if self.persistent: + self._handlingRequest = False + + if self._savedTimeOut: + self.setTimeout(self._savedTimeOut) + + # Receive our buffered data, if any. + data = b''.join(self._dataBuffer) + self._dataBuffer = [] + self.setLineMode(data) + else: + self.loseConnection() + + + def timeoutConnection(self): + self._log.info( + "Timing out client: {peer}", + peer=str(self.transport.getPeer()) + ) + if self.abortTimeout is not None: + # We use self.callLater because that's what TimeoutMixin does. + self._abortingCall = self.callLater( + self.abortTimeout, self.forceAbortClient + ) + self.loseConnection() + + + def forceAbortClient(self): + """ + Called if C{abortTimeout} seconds have passed since the timeout fired, + and the connection still hasn't gone away. This can really only happen + on extremely bad connections or when clients are maliciously attempting + to keep connections open. + """ + self._log.info( + "Forcibly timing out client: {peer}", + peer=str(self.transport.getPeer()) + ) + # We want to lose track of the _abortingCall so that no-one tries to + # cancel it. + self._abortingCall = None + self.transport.abortConnection() + + + def connectionLost(self, reason): + self.setTimeout(None) + for request in self.requests: + request.connectionLost(reason) + + # If we were going to force-close the transport, we don't have to now. + if self._abortingCall is not None: + self._abortingCall.cancel() + self._abortingCall = None + + + def isSecure(self): + """ + Return L{True} if this channel is using a secure transport. + + Normally this method returns L{True} if this instance is using a + transport that implements L{interfaces.ISSLTransport}. + + @returns: L{True} if this request is secure + @rtype: C{bool} + """ + if interfaces.ISSLTransport(self.transport, None) is not None: + return True + return False + + + def writeHeaders(self, version, code, reason, headers): + """ + Called by L{Request} objects to write a complete set of HTTP headers to + a transport. + + @param version: The HTTP version in use. + @type version: L{bytes} + + @param code: The HTTP status code to write. + @type code: L{bytes} + + @param reason: The HTTP reason phrase to write. + @type reason: L{bytes} + + @param headers: The headers to write to the transport. + @type headers: L{twisted.web.http_headers.Headers} + """ + sanitizedHeaders = Headers() + for name, value in headers: + sanitizedHeaders.addRawHeader(name, value) + + responseLine = version + b" " + code + b" " + reason + b"\r\n" + headerSequence = [responseLine] + headerSequence.extend( + name + b': ' + value + b"\r\n" + for name, values in sanitizedHeaders.getAllRawHeaders() + for value in values + ) + headerSequence.append(b"\r\n") + self.transport.writeSequence(headerSequence) + + + def write(self, data): + """ + Called by L{Request} objects to write response data. + + @param data: The data chunk to write to the stream. + @type data: L{bytes} + + @return: L{None} + """ + self.transport.write(data) + + + def writeSequence(self, iovec): + """ + Write a list of strings to the HTTP response. + + @param iovec: A list of byte strings to write to the stream. + @type data: L{list} of L{bytes} + + @return: L{None} + """ + self.transport.writeSequence(iovec) + + + def getPeer(self): + """ + Get the remote address of this connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getPeer() + + + def getHost(self): + """ + Get the local address of this connection. + + @return: An L{IAddress} provider. + """ + return self.transport.getHost() + + + def loseConnection(self): + """ + Closes the connection. Will write any data that is pending to be sent + on the network, but if this response has not yet been written to the + network will not write anything. + + @return: L{None} + """ + self._networkProducer.unregisterProducer() + return self.transport.loseConnection() + + + def registerProducer(self, producer, streaming): + """ + Register to receive data from a producer. + + This sets self to be a consumer for a producer. When this object runs + out of data (as when a send(2) call on a socket succeeds in moving the + last data from a userspace buffer into a kernelspace buffer), it will + ask the producer to resumeProducing(). + + For L{IPullProducer} providers, C{resumeProducing} will be called once + each time data is required. + + For L{IPushProducer} providers, C{pauseProducing} will be called + whenever the write buffer fills up and C{resumeProducing} will only be + called when it empties. + + @type producer: L{IProducer} provider + @param producer: The L{IProducer} that will be producing data. + + @type streaming: L{bool} + @param streaming: C{True} if C{producer} provides L{IPushProducer}, + C{False} if C{producer} provides L{IPullProducer}. + + @raise RuntimeError: If a producer is already registered. + + @return: L{None} + """ + if self._requestProducer is not None: + raise RuntimeError( + "Cannot register producer %s, because producer %s was never " + "unregistered." % (producer, self._requestProducer)) + + if not streaming: + producer = _PullToPush(producer, self) + + self._requestProducer = producer + self._requestProducerStreaming = streaming + + if not streaming: + producer.startStreaming() + + + def unregisterProducer(self): + """ + Stop consuming data from a producer, without disconnecting. + + @return: L{None} + """ + if self._requestProducer is None: + return + + if not self._requestProducerStreaming: + self._requestProducer.stopStreaming() + + self._requestProducer = None + self._requestProducerStreaming = None + + + def stopProducing(self): + """ + Stop producing data. + + The HTTPChannel doesn't *actually* implement this, beacuse the + assumption is that it will only be called just before C{loseConnection} + is called. There's nothing sensible we can do other than call + C{loseConnection} anyway. + """ + if self._requestProducer is not None: + self._requestProducer.stopProducing() + + + def pauseProducing(self): + """ + Pause producing data. + + This will be called by the transport when the send buffers have been + filled up. We want to simultaneously pause the producing L{Request} + object and also pause our transport. + + The logic behind pausing the transport is specifically to avoid issues + like https://twistedmatrix.com/trac/ticket/8868. In this case, our + inability to send does not prevent us handling more requests, which + means we increasingly queue up more responses in our send buffer + without end. The easiest way to handle this is to ensure that if we are + unable to send our responses, we will not read further data from the + connection until the client pulls some data out. This is a bit of a + blunt instrument, but it's ok. + + Note that this potentially interacts with timeout handling in a + positive way. Once the transport is paused the client may run into a + timeout which will cause us to tear the connection down. That's a good + thing! + """ + self._waitingForTransport = True + + # The first step is to tell any producer we might currently have + # registered to stop producing. If we can slow our applications down + # we should. + if self._requestProducer is not None: + self._requestProducer.pauseProducing() + + # The next step here is to pause our own transport, as discussed in the + # docstring. + if not self._handlingRequest: + self._networkProducer.pauseProducing() + + + def resumeProducing(self): + """ + Resume producing data. + + This will be called by the transport when the send buffer has dropped + enough to actually send more data. When this happens we can unpause any + outstanding L{Request} producers we have, and also unpause our + transport. + """ + self._waitingForTransport = False + + if self._requestProducer is not None: + self._requestProducer.resumeProducing() + + # We only want to resume the network producer if we're not currently + # waiting for a response to show up. + if not self._handlingRequest: + self._networkProducer.resumeProducing() + + + def _send100Continue(self): + """ + Sends a 100 Continue response, used to signal to clients that further + processing will be performed. + """ + self.transport.write(b"HTTP/1.1 100 Continue\r\n\r\n") + + + def _respondToBadRequestAndDisconnect(self): + """ + This is a quick and dirty way of responding to bad requests. + + As described by HTTP standard we should be patient and accept the + whole request from the client before sending a polite bad request + response, even in the case when clients send tons of data. + + @param transport: Transport handling connection to the client. + @type transport: L{interfaces.ITransport} + """ + self.transport.write(b"HTTP/1.1 400 Bad Request\r\n\r\n") + self.loseConnection() + + + +def _escape(s): + """ + Return a string like python repr, but always escaped as if surrounding + quotes were double quotes. + + @param s: The string to escape. + @type s: L{bytes} or L{unicode} + + @return: An escaped string. + @rtype: L{unicode} + """ + if not isinstance(s, bytes): + s = s.encode("ascii") + + r = repr(s) + if not isinstance(r, unicode): + r = r.decode("ascii") + if r.startswith(u"b"): + r = r[1:] + if r.startswith(u"'"): + return r[1:-1].replace(u'"', u'\\"').replace(u"\\'", u"'") + return r[1:-1] + + + +@provider(IAccessLogFormatter) +def combinedLogFormatter(timestamp, request): + """ + @return: A combined log formatted log line for the given request. + + @see: L{IAccessLogFormatter} + """ + clientAddr = request.getClientAddress() + if isinstance(clientAddr, (address.IPv4Address, address.IPv6Address, + _XForwardedForAddress)): + ip = clientAddr.host + else: + ip = b'-' + referrer = _escape(request.getHeader(b"referer") or b"-") + agent = _escape(request.getHeader(b"user-agent") or b"-") + line = ( + u'"%(ip)s" - - %(timestamp)s "%(method)s %(uri)s %(protocol)s" ' + u'%(code)d %(length)s "%(referrer)s" "%(agent)s"' % dict( + ip=_escape(ip), + timestamp=timestamp, + method=_escape(request.method), + uri=_escape(request.uri), + protocol=_escape(request.clientproto), + code=request.code, + length=request.sentLength or u"-", + referrer=referrer, + agent=agent, + )) + return line + + + +@implementer(interfaces.IAddress) +class _XForwardedForAddress(object): + """ + L{IAddress} which represents the client IP to log for a request, as gleaned + from an X-Forwarded-For header. + + @ivar host: An IP address or C{b"-"}. + @type host: L{bytes} + + @see: L{proxiedLogFormatter} + """ + def __init__(self, host): + self.host = host + + + +class _XForwardedForRequest(proxyForInterface(IRequest, "_request")): + """ + Add a layer on top of another request that only uses the value of an + X-Forwarded-For header as the result of C{getClientAddress}. + """ + def getClientAddress(self): + """ + The client address (the first address) in the value of the + I{X-Forwarded-For header}. If the header is not present, the IP is + considered to be C{b"-"}. + + @return: L{_XForwardedForAddress} which wraps the client address as + expected by L{combinedLogFormatter}. + """ + host = self._request.requestHeaders.getRawHeaders( + b"x-forwarded-for", [b"-"])[0].split(b",")[0].strip() + return _XForwardedForAddress(host) + + # These are missing from the interface. Forward them manually. + @property + def clientproto(self): + """ + @return: The protocol version in the request. + @rtype: L{bytes} + """ + return self._request.clientproto + + @property + def code(self): + """ + @return: The response code for the request. + @rtype: L{int} + """ + return self._request.code + + @property + def sentLength(self): + """ + @return: The number of bytes sent in the response body. + @rtype: L{int} + """ + return self._request.sentLength + + + +@provider(IAccessLogFormatter) +def proxiedLogFormatter(timestamp, request): + """ + @return: A combined log formatted log line for the given request but use + the value of the I{X-Forwarded-For} header as the value for the client + IP address. + + @see: L{IAccessLogFormatter} + """ + return combinedLogFormatter(timestamp, _XForwardedForRequest(request)) + + + +class _GenericHTTPChannelProtocol(proxyForInterface(IProtocol, "_channel")): + """ + A proxy object that wraps one of the HTTP protocol objects, and switches + between them depending on TLS negotiated protocol. + + @ivar _negotiatedProtocol: The protocol negotiated with ALPN or NPN, if + any. + @type _negotiatedProtocol: Either a bytestring containing the ALPN token + for the negotiated protocol, or L{None} if no protocol has yet been + negotiated. + + @ivar _channel: The object capable of behaving like a L{HTTPChannel} that + is backing this object. By default this is a L{HTTPChannel}, but if a + HTTP protocol upgrade takes place this may be a different channel + object. Must implement L{IProtocol}. + @type _channel: L{HTTPChannel} + + @ivar _requestFactory: A callable to use to build L{IRequest} objects. + @type _requestFactory: L{IRequest} + + @ivar _site: A reference to the creating L{twisted.web.server.Site} object. + @type _site: L{twisted.web.server.Site} + + @ivar _factory: A reference to the creating L{HTTPFactory} object. + @type _factory: L{HTTPFactory} + + @ivar _timeOut: A timeout value to pass to the backing channel. + @type _timeOut: L{int} or L{None} + + @ivar _callLater: A value for the C{callLater} callback. + @type _callLater: L{callable} + """ + _negotiatedProtocol = None + _requestFactory = Request + _factory = None + _site = None + _timeOut = None + _callLater = None + + + @property + def factory(self): + """ + @see: L{_genericHTTPChannelProtocolFactory} + """ + return self._channel.factory + + + @factory.setter + def factory(self, value): + self._factory = value + self._channel.factory = value + + + @property + def requestFactory(self): + """ + A callable to use to build L{IRequest} objects. + + Retries the object from the current backing channel. + """ + return self._channel.requestFactory + + + @requestFactory.setter + def requestFactory(self, value): + """ + A callable to use to build L{IRequest} objects. + + Sets the object on the backing channel and also stores the value for + propagation to any new channel. + + @param value: The new callable to use. + @type value: A L{callable} returning L{IRequest} + """ + self._requestFactory = value + self._channel.requestFactory = value + + + @property + def site(self): + """ + A reference to the creating L{twisted.web.server.Site} object. + + Returns the site object from the backing channel. + """ + return self._channel.site + + + @site.setter + def site(self, value): + """ + A reference to the creating L{twisted.web.server.Site} object. + + Sets the object on the backing channel and also stores the value for + propagation to any new channel. + + @param value: The L{twisted.web.server.Site} object to set. + @type value: L{twisted.web.server.Site} + """ + self._site = value + self._channel.site = value + + + @property + def timeOut(self): + """ + The idle timeout for the backing channel. + """ + return self._channel.timeOut + + + @timeOut.setter + def timeOut(self, value): + """ + The idle timeout for the backing channel. + + Sets the idle timeout on both the backing channel and stores it for + propagation to any new backing channel. + + @param value: The timeout to set. + @type value: L{int} or L{float} + """ + self._timeOut = value + self._channel.timeOut = value + + + @property + def callLater(self): + """ + A value for the C{callLater} callback. This callback is used by the + L{twisted.protocols.policies.TimeoutMixin} to handle timeouts. + """ + return self._channel.callLater + + + @callLater.setter + def callLater(self, value): + """ + Sets the value for the C{callLater} callback. This callback is used by + the L{twisted.protocols.policies.TimeoutMixin} to handle timeouts. + + @param value: The new callback to use. + @type value: L{callable} + """ + self._callLater = value + self._channel.callLater = value + + + def dataReceived(self, data): + """ + An override of L{IProtocol.dataReceived} that checks what protocol we're + using. + """ + if self._negotiatedProtocol is None: + try: + negotiatedProtocol = self._channel.transport.negotiatedProtocol + except AttributeError: + # Plaintext HTTP, always HTTP/1.1 + negotiatedProtocol = b'http/1.1' + + if negotiatedProtocol is None: + negotiatedProtocol = b'http/1.1' + + if negotiatedProtocol == b'h2': + if not H2_ENABLED: + raise ValueError("Negotiated HTTP/2 without support.") + + # We need to make sure that the HTTPChannel is unregistered + # from the transport so that the H2Connection can register + # itself if possible. + networkProducer = self._channel._networkProducer + networkProducer.unregisterProducer() + + # Cancel the old channel's timeout. + self._channel.setTimeout(None) + + # Cancel the old channel's timeout. + self._channel.setTimeout(None) + + transport = self._channel.transport + self._channel = H2Connection() + self._channel.requestFactory = self._requestFactory + self._channel.site = self._site + self._channel.factory = self._factory + self._channel.timeOut = self._timeOut + self._channel.callLater = self._callLater + self._channel.makeConnection(transport) + + # Register the H2Connection as the transport's + # producer, so that the transport can apply back + # pressure. + networkProducer.registerProducer(self._channel, True) + else: + # Only HTTP/2 and HTTP/1.1 are supported right now. + assert negotiatedProtocol == b'http/1.1', \ + "Unsupported protocol negotiated" + + self._negotiatedProtocol = negotiatedProtocol + + return self._channel.dataReceived(data) + + + +def _genericHTTPChannelProtocolFactory(self): + """ + Returns an appropriately initialized _GenericHTTPChannelProtocol. + """ + return _GenericHTTPChannelProtocol(HTTPChannel()) + + + +class HTTPFactory(protocol.ServerFactory): + """ + Factory for HTTP server. + + @ivar _logDateTime: A cached datetime string for log messages, updated by + C{_logDateTimeCall}. + @type _logDateTime: C{str} + + @ivar _logDateTimeCall: A delayed call for the next update to the cached + log datetime string. + @type _logDateTimeCall: L{IDelayedCall} provided + + @ivar _logFormatter: See the C{logFormatter} parameter to L{__init__} + + @ivar _nativeize: A flag that indicates whether the log file being written + to wants native strings (C{True}) or bytes (C{False}). This is only to + support writing to L{twisted.python.log} which, unfortunately, works + with native strings. + + @ivar _reactor: An L{IReactorTime} provider used to compute logging + timestamps. + """ + + protocol = _genericHTTPChannelProtocolFactory + + logPath = None + + timeOut = _REQUEST_TIMEOUT + + def __init__(self, logPath=None, timeout=_REQUEST_TIMEOUT, + logFormatter=None, reactor=None): + """ + @param logPath: File path to which access log messages will be written + or C{None} to disable logging. + @type logPath: L{str} or L{bytes} + + @param timeout: The initial value of L{timeOut}, which defines the idle + connection timeout in seconds, or C{None} to disable the idle + timeout. + @type timeout: L{float} + + @param logFormatter: An object to format requests into log lines for + the access log. L{combinedLogFormatter} when C{None} is passed. + @type logFormatter: L{IAccessLogFormatter} provider + + @param reactor: A L{IReactorTime} provider used to manage connection + timeouts and compute logging timestamps. + """ + if not reactor: + from twisted.internet import reactor + self._reactor = reactor + + if logPath is not None: + logPath = os.path.abspath(logPath) + self.logPath = logPath + self.timeOut = timeout + if logFormatter is None: + logFormatter = combinedLogFormatter + self._logFormatter = logFormatter + + # For storing the cached log datetime and the callback to update it + self._logDateTime = None + self._logDateTimeCall = None + + + def _updateLogDateTime(self): + """ + Update log datetime periodically, so we aren't always recalculating it. + """ + self._logDateTime = datetimeToLogString(self._reactor.seconds()) + self._logDateTimeCall = self._reactor.callLater(1, self._updateLogDateTime) + + + def buildProtocol(self, addr): + p = protocol.ServerFactory.buildProtocol(self, addr) + + # This is a bit of a hack to ensure that the HTTPChannel timeouts + # occur on the same reactor as the one we're using here. This could + # ideally be resolved by passing the reactor more generally to the + # HTTPChannel, but that won't work for the TimeoutMixin until we fix + # https://twistedmatrix.com/trac/ticket/8488 + p.callLater = self._reactor.callLater + + # timeOut needs to be on the Protocol instance cause + # TimeoutMixin expects it there + p.timeOut = self.timeOut + return p + + + def startFactory(self): + """ + Set up request logging if necessary. + """ + if self._logDateTimeCall is None: + self._updateLogDateTime() + + if self.logPath: + self.logFile = self._openLogFile(self.logPath) + else: + self.logFile = log.logfile + + + def stopFactory(self): + if hasattr(self, "logFile"): + if self.logFile != log.logfile: + self.logFile.close() + del self.logFile + + if self._logDateTimeCall is not None and self._logDateTimeCall.active(): + self._logDateTimeCall.cancel() + self._logDateTimeCall = None + + + def _openLogFile(self, path): + """ + Override in subclasses, e.g. to use L{twisted.python.logfile}. + """ + f = open(path, "ab", 1) + return f + + + def log(self, request): + """ + Write a line representing C{request} to the access log file. + + @param request: The request object about which to log. + @type request: L{Request} + """ + try: + logFile = self.logFile + except AttributeError: + pass + else: + line = self._logFormatter(self._logDateTime, request) + u"\n" + logFile.write(line.encode('utf8')) diff --git a/contrib/python/Twisted/py2/twisted/web/http_headers.py b/contrib/python/Twisted/py2/twisted/web/http_headers.py new file mode 100644 index 0000000000..5b141ac74c --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/http_headers.py @@ -0,0 +1,294 @@ +# -*- test-case-name: twisted.web.test.test_http_headers -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An API for storing HTTP header names and values. +""" + +from __future__ import division, absolute_import + +from twisted.python.compat import comparable, cmp, unicode + + +def _dashCapitalize(name): + """ + Return a byte string which is capitalized using '-' as a word separator. + + @param name: The name of the header to capitalize. + @type name: L{bytes} + + @return: The given header capitalized using '-' as a word separator. + @rtype: L{bytes} + """ + return b'-'.join([word.capitalize() for word in name.split(b'-')]) + + + +def _sanitizeLinearWhitespace(headerComponent): + r""" + Replace linear whitespace (C{\n}, C{\r\n}, C{\r}) in a header key + or value with a single space. If C{headerComponent} is not + L{bytes}, it is passed through unchanged. + + @param headerComponent: The header key or value to sanitize. + @type headerComponent: L{bytes} + + @return: The sanitized header key or value. + @rtype: L{bytes} + """ + return b' '.join(headerComponent.splitlines()) + + + +@comparable +class Headers(object): + """ + Stores HTTP headers in a key and multiple value format. + + Most methods accept L{bytes} and L{unicode}, with an internal L{bytes} + representation. When passed L{unicode}, header names (e.g. 'Content-Type') + are encoded using ISO-8859-1 and header values (e.g. + 'text/html;charset=utf-8') are encoded using UTF-8. Some methods that return + values will return them in the same type as the name given. + + If the header keys or values cannot be encoded or decoded using the rules + above, using just L{bytes} arguments to the methods of this class will + ensure no decoding or encoding is done, and L{Headers} will treat the keys + and values as opaque byte strings. + + @cvar _caseMappings: A L{dict} that maps lowercase header names + to their canonicalized representation. + + @ivar _rawHeaders: A L{dict} mapping header names as L{bytes} to L{list}s of + header values as L{bytes}. + """ + _caseMappings = { + b'content-md5': b'Content-MD5', + b'dnt': b'DNT', + b'etag': b'ETag', + b'p3p': b'P3P', + b'te': b'TE', + b'www-authenticate': b'WWW-Authenticate', + b'x-xss-protection': b'X-XSS-Protection'} + + def __init__(self, rawHeaders=None): + self._rawHeaders = {} + if rawHeaders is not None: + for name, values in rawHeaders.items(): + self.setRawHeaders(name, values) + + + def __repr__(self): + """ + Return a string fully describing the headers set on this object. + """ + return '%s(%r)' % (self.__class__.__name__, self._rawHeaders,) + + + def __cmp__(self, other): + """ + Define L{Headers} instances as being equal to each other if they have + the same raw headers. + """ + if isinstance(other, Headers): + return cmp( + sorted(self._rawHeaders.items()), + sorted(other._rawHeaders.items())) + return NotImplemented + + + def _encodeName(self, name): + """ + Encode the name of a header (eg 'Content-Type') to an ISO-8859-1 encoded + bytestring if required. + + @param name: A HTTP header name + @type name: L{unicode} or L{bytes} + + @return: C{name}, encoded if required, lowercased + @rtype: L{bytes} + """ + if isinstance(name, unicode): + return name.lower().encode('iso-8859-1') + return name.lower() + + + def _encodeValue(self, value): + """ + Encode a single header value to a UTF-8 encoded bytestring if required. + + @param value: A single HTTP header value. + @type value: L{bytes} or L{unicode} + + @return: C{value}, encoded if required + @rtype: L{bytes} + """ + if isinstance(value, unicode): + return value.encode('utf8') + return value + + + def _encodeValues(self, values): + """ + Encode a L{list} of header values to a L{list} of UTF-8 encoded + bytestrings if required. + + @param values: A list of HTTP header values. + @type values: L{list} of L{bytes} or L{unicode} (mixed types allowed) + + @return: C{values}, with each item encoded if required + @rtype: L{list} of L{bytes} + """ + newValues = [] + + for value in values: + newValues.append(self._encodeValue(value)) + return newValues + + + def _decodeValues(self, values): + """ + Decode a L{list} of header values into a L{list} of Unicode strings. + + @param values: A list of HTTP header values. + @type values: L{list} of UTF-8 encoded L{bytes} + + @return: C{values}, with each item decoded + @rtype: L{list} of L{unicode} + """ + newValues = [] + + for value in values: + newValues.append(value.decode('utf8')) + return newValues + + + def copy(self): + """ + Return a copy of itself with the same headers set. + + @return: A new L{Headers} + """ + return self.__class__(self._rawHeaders) + + + def hasHeader(self, name): + """ + Check for the existence of a given header. + + @type name: L{bytes} or L{unicode} + @param name: The name of the HTTP header to check for. + + @rtype: L{bool} + @return: C{True} if the header exists, otherwise C{False}. + """ + return self._encodeName(name) in self._rawHeaders + + + def removeHeader(self, name): + """ + Remove the named header from this header object. + + @type name: L{bytes} or L{unicode} + @param name: The name of the HTTP header to remove. + + @return: L{None} + """ + self._rawHeaders.pop(self._encodeName(name), None) + + + def setRawHeaders(self, name, values): + """ + Sets the raw representation of the given header. + + @type name: L{bytes} or L{unicode} + @param name: The name of the HTTP header to set the values for. + + @type values: L{list} of L{bytes} or L{unicode} strings + @param values: A list of strings each one being a header value of + the given name. + + @return: L{None} + """ + if not isinstance(values, list): + raise TypeError("Header entry %r should be list but found " + "instance of %r instead" % (name, type(values))) + + name = _sanitizeLinearWhitespace(self._encodeName(name)) + encodedValues = [_sanitizeLinearWhitespace(v) + for v in self._encodeValues(values)] + + self._rawHeaders[name] = self._encodeValues(encodedValues) + + + def addRawHeader(self, name, value): + """ + Add a new raw value for the given header. + + @type name: L{bytes} or L{unicode} + @param name: The name of the header for which to set the value. + + @type value: L{bytes} or L{unicode} + @param value: The value to set for the named header. + """ + values = self.getRawHeaders(name) + + if values is not None: + values.append(value) + else: + values = [value] + + self.setRawHeaders(name, values) + + + def getRawHeaders(self, name, default=None): + """ + Returns a list of headers matching the given name as the raw string + given. + + @type name: L{bytes} or L{unicode} + @param name: The name of the HTTP header to get the values of. + + @param default: The value to return if no header with the given C{name} + exists. + + @rtype: L{list} of strings, same type as C{name} (except when + C{default} is returned). + @return: If the named header is present, a L{list} of its + values. Otherwise, C{default}. + """ + encodedName = self._encodeName(name) + values = self._rawHeaders.get(encodedName, default) + + if isinstance(name, unicode) and values is not default: + return self._decodeValues(values) + return values + + + def getAllRawHeaders(self): + """ + Return an iterator of key, value pairs of all headers contained in this + object, as L{bytes}. The keys are capitalized in canonical + capitalization. + """ + for k, v in self._rawHeaders.items(): + yield self._canonicalNameCaps(k), v + + + def _canonicalNameCaps(self, name): + """ + Return the canonical name for the given header. + + @type name: L{bytes} + @param name: The all-lowercase header name to capitalize in its + canonical form. + + @rtype: L{bytes} + @return: The canonical name of the header. + """ + return self._caseMappings.get(name, _dashCapitalize(name)) + + + +__all__ = ['Headers'] diff --git a/contrib/python/Twisted/py2/twisted/web/iweb.py b/contrib/python/Twisted/py2/twisted/web/iweb.py new file mode 100644 index 0000000000..47dc8d3257 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/iweb.py @@ -0,0 +1,828 @@ +# -*- test-case-name: twisted.web.test -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Interface definitions for L{twisted.web}. + +@var UNKNOWN_LENGTH: An opaque object which may be used as the value of + L{IBodyProducer.length} to indicate that the length of the entity + body is not known in advance. +""" + +from zope.interface import Interface, Attribute + +from twisted.internet.interfaces import IPushProducer +from twisted.cred.credentials import IUsernameDigestHash + + +class IRequest(Interface): + """ + An HTTP request. + + @since: 9.0 + """ + + method = Attribute("A L{bytes} giving the HTTP method that was used.") + uri = Attribute( + "A L{bytes} giving the full encoded URI which was requested (including" + " query arguments).") + path = Attribute( + "A L{bytes} giving the encoded query path of the request URI (not " + "including query arguments).") + args = Attribute( + "A mapping of decoded query argument names as L{bytes} to " + "corresponding query argument values as L{list}s of L{bytes}. " + "For example, for a URI with C{foo=bar&foo=baz&quux=spam} " + "for its query part, C{args} will be C{{b'foo': [b'bar', b'baz'], " + "b'quux': [b'spam']}}.") + + prepath = Attribute( + "The URL path segments which have been processed during resource " + "traversal, as a list of {bytes}.") + + postpath = Attribute( + "The URL path segments which have not (yet) been processed " + "during resource traversal, as a list of L{bytes}.") + + requestHeaders = Attribute( + "A L{http_headers.Headers} instance giving all received HTTP request " + "headers.") + + content = Attribute( + "A file-like object giving the request body. This may be a file on " + "disk, an L{io.BytesIO}, or some other type. The implementation is " + "free to decide on a per-request basis.") + + responseHeaders = Attribute( + "A L{http_headers.Headers} instance holding all HTTP response " + "headers to be sent.") + + def getHeader(key): + """ + Get an HTTP request header. + + @type key: L{bytes} or L{str} + @param key: The name of the header to get the value of. + + @rtype: L{bytes} or L{str} or L{None} + @return: The value of the specified header, or L{None} if that header + was not present in the request. The string type of the result + matches the type of C{key}. + """ + + + def getCookie(key): + """ + Get a cookie that was sent from the network. + + @type key: L{bytes} + @param key: The name of the cookie to get. + + @rtype: L{bytes} or L{None} + @returns: The value of the specified cookie, or L{None} if that cookie + was not present in the request. + """ + + + def getAllHeaders(): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{requestHeaders.getAllRawHeaders()} may be preferred. + """ + + + def getRequestHostname(): + """ + Get the hostname that the user passed in to the request. + + This will either use the Host: header (if it is available) or the + host we are listening on if the header is unavailable. + + @returns: the requested hostname + @rtype: L{str} + """ + + + def getHost(): + """ + Get my originally requesting transport's host. + + @return: An L{IAddress<twisted.internet.interfaces.IAddress>}. + """ + + + def getClientAddress(): + """ + Return the address of the client who submitted this request. + + The address may not be a network address. Callers must check + its type before using it. + + @since: 18.4 + + @return: the client's address. + @rtype: an L{IAddress} provider. + """ + + + def getClientIP(): + """ + Return the IP address of the client who submitted this request. + + This method is B{deprecated}. See L{getClientAddress} instead. + + @returns: the client IP address or L{None} if the request was submitted + over a transport where IP addresses do not make sense. + @rtype: L{str} or L{None} + """ + + + def getUser(): + """ + Return the HTTP user sent with this request, if any. + + If no user was supplied, return the empty string. + + @returns: the HTTP user, if any + @rtype: L{str} + """ + + + def getPassword(): + """ + Return the HTTP password sent with this request, if any. + + If no password was supplied, return the empty string. + + @returns: the HTTP password, if any + @rtype: L{str} + """ + + + def isSecure(): + """ + Return True if this request is using a secure transport. + + Normally this method returns True if this request's HTTPChannel + instance is using a transport that implements ISSLTransport. + + This will also return True if setHost() has been called + with ssl=True. + + @returns: True if this request is secure + @rtype: C{bool} + """ + + + def getSession(sessionInterface=None): + """ + Look up the session associated with this request or create a new one if + there is not one. + + @return: The L{Session} instance identified by the session cookie in + the request, or the C{sessionInterface} component of that session + if C{sessionInterface} is specified. + """ + + + def URLPath(): + """ + @return: A L{URLPath<twisted.python.urlpath.URLPath>} instance + which identifies the URL for which this request is. + """ + + + def prePathURL(): + """ + At any time during resource traversal or resource rendering, + returns an absolute URL to the most nested resource which has + yet been reached. + + @see: {twisted.web.server.Request.prepath} + + @return: An absolute URL. + @type: L{bytes} + """ + + + def rememberRootURL(): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + + + def getRootURL(): + """ + Get a previously-remembered URL. + + @return: An absolute URL. + @type: L{bytes} + """ + + + # Methods for outgoing response + def finish(): + """ + Indicate that the response to this request is complete. + """ + + + def write(data): + """ + Write some data to the body of the response to this request. Response + headers are written the first time this method is called, after which + new response headers may not be added. + + @param data: Bytes of the response body. + @type data: L{bytes} + """ + + + def addCookie(k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None): + """ + Set an outgoing HTTP cookie. + + In general, you should consider using sessions instead of cookies, see + L{twisted.web.server.Request.getSession} and the + L{twisted.web.server.Session} class for details. + """ + + + def setResponseCode(code, message=None): + """ + Set the HTTP response code. + + @type code: L{int} + @type message: L{bytes} + """ + + + def setHeader(k, v): + """ + Set an HTTP response header. Overrides any previously set values for + this header. + + @type k: L{bytes} or L{str} + @param k: The name of the header for which to set the value. + + @type v: L{bytes} or L{str} + @param v: The value to set for the named header. A L{str} will be + UTF-8 encoded, which may not interoperable with other + implementations. Avoid passing non-ASCII characters if possible. + """ + + + def redirect(url): + """ + Utility function that does a redirect. + + The request should have finish() called after this. + """ + + + def setLastModified(when): + """ + Set the C{Last-Modified} time for the response to this request. + + If I am called more than once, I ignore attempts to set Last-Modified + earlier, only replacing the Last-Modified time if it is to a later + value. + + If I am a conditional request, I may modify my response code to + L{NOT_MODIFIED<http.NOT_MODIFIED>} if appropriate for the time given. + + @param when: The last time the resource being returned was modified, in + seconds since the epoch. + @type when: L{int}, L{long} or L{float} + + @return: If I am a C{If-Modified-Since} conditional request and the time + given is not newer than the condition, I return + L{CACHED<http.CACHED>} to indicate that you should write no body. + Otherwise, I return a false value. + """ + + + def setETag(etag): + """ + Set an C{entity tag} for the outgoing response. + + That's "entity tag" as in the HTTP/1.1 I{ETag} header, "used for + comparing two or more entities from the same requested resource." + + If I am a conditional request, I may modify my response code to + L{NOT_MODIFIED<http.NOT_MODIFIED>} or + L{PRECONDITION_FAILED<http.PRECONDITION_FAILED>}, if appropriate for the + tag given. + + @param etag: The entity tag for the resource being returned. + @type etag: L{str} + + @return: If I am a C{If-None-Match} conditional request and the tag + matches one in the request, I return L{CACHED<http.CACHED>} to + indicate that you should write no body. Otherwise, I return a + false value. + """ + + + def setHost(host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + This method is useful for working with reverse HTTP proxies (e.g. both + Squid and Apache's mod_proxy can do this), when the address the HTTP + client is using is different than the one we're listening on. + + For example, Apache may be listening on https://www.example.com, and + then forwarding requests to http://localhost:8080, but we don't want + HTML produced by Twisted to say 'http://localhost:8080', they should + say 'https://www.example.com', so we do:: + + request.setHost('www.example.com', 443, ssl=1) + """ + + + +class INonQueuedRequestFactory(Interface): + """ + A factory of L{IRequest} objects that does not take a ``queued`` parameter. + """ + def __call__(channel): + """ + Create an L{IRequest} that is operating on the given channel. There + must only be one L{IRequest} object processing at any given time on a + channel. + + @param channel: A L{twisted.web.http.HTTPChannel} object. + @type channel: L{twisted.web.http.HTTPChannel} + + @return: A request object. + @rtype: L{IRequest} + """ + + + +class IAccessLogFormatter(Interface): + """ + An object which can represent an HTTP request as a line of text for + inclusion in an access log file. + """ + def __call__(timestamp, request): + """ + Generate a line for the access log. + + @param timestamp: The time at which the request was completed in the + standard format for access logs. + @type timestamp: L{unicode} + + @param request: The request object about which to log. + @type request: L{twisted.web.server.Request} + + @return: One line describing the request without a trailing newline. + @rtype: L{unicode} + """ + + + +class ICredentialFactory(Interface): + """ + A credential factory defines a way to generate a particular kind of + authentication challenge and a way to interpret the responses to these + challenges. It creates + L{ICredentials<twisted.cred.credentials.ICredentials>} providers from + responses. These objects will be used with L{twisted.cred} to authenticate + an authorize requests. + """ + scheme = Attribute( + "A L{str} giving the name of the authentication scheme with which " + "this factory is associated. For example, C{'basic'} or C{'digest'}.") + + + def getChallenge(request): + """ + Generate a new challenge to be sent to a client. + + @type peer: L{twisted.web.http.Request} + @param peer: The request the response to which this challenge will be + included. + + @rtype: L{dict} + @return: A mapping from L{str} challenge fields to associated L{str} + values. + """ + + + def decode(response, request): + """ + Create a credentials object from the given response. + + @type response: L{str} + @param response: scheme specific response string + + @type request: L{twisted.web.http.Request} + @param request: The request being processed (from which the response + was taken). + + @raise twisted.cred.error.LoginFailed: If the response is invalid. + + @rtype: L{twisted.cred.credentials.ICredentials} provider + @return: The credentials represented by the given response. + """ + + + +class IBodyProducer(IPushProducer): + """ + Objects which provide L{IBodyProducer} write bytes to an object which + provides L{IConsumer<twisted.internet.interfaces.IConsumer>} by calling its + C{write} method repeatedly. + + L{IBodyProducer} providers may start producing as soon as they have an + L{IConsumer<twisted.internet.interfaces.IConsumer>} provider. That is, they + should not wait for a C{resumeProducing} call to begin writing data. + + L{IConsumer.unregisterProducer<twisted.internet.interfaces.IConsumer.unregisterProducer>} + must not be called. Instead, the + L{Deferred<twisted.internet.defer.Deferred>} returned from C{startProducing} + must be fired when all bytes have been written. + + L{IConsumer.write<twisted.internet.interfaces.IConsumer.write>} may + synchronously invoke any of C{pauseProducing}, C{resumeProducing}, or + C{stopProducing}. These methods must be implemented with this in mind. + + @since: 9.0 + """ + + # Despite the restrictions above and the additional requirements of + # stopProducing documented below, this interface still needs to be an + # IPushProducer subclass. Providers of it will be passed to IConsumer + # providers which only know about IPushProducer and IPullProducer, not + # about this interface. This interface needs to remain close enough to one + # of those interfaces for consumers to work with it. + + length = Attribute( + """ + C{length} is a L{int} indicating how many bytes in total this + L{IBodyProducer} will write to the consumer or L{UNKNOWN_LENGTH} + if this is not known in advance. + """) + + def startProducing(consumer): + """ + Start producing to the given + L{IConsumer<twisted.internet.interfaces.IConsumer>} provider. + + @return: A L{Deferred<twisted.internet.defer.Deferred>} which stops + production of data when L{Deferred.cancel} is called, and which + fires with L{None} when all bytes have been produced or with a + L{Failure<twisted.python.failure.Failure>} if there is any problem + before all bytes have been produced. + """ + + + def stopProducing(): + """ + In addition to the standard behavior of + L{IProducer.stopProducing<twisted.internet.interfaces.IProducer.stopProducing>} + (stop producing data), make sure the + L{Deferred<twisted.internet.defer.Deferred>} returned by + C{startProducing} is never fired. + """ + + + +class IRenderable(Interface): + """ + An L{IRenderable} is an object that may be rendered by the + L{twisted.web.template} templating system. + """ + + def lookupRenderMethod(name): + """ + Look up and return the render method associated with the given name. + + @type name: L{str} + @param name: The value of a render directive encountered in the + document returned by a call to L{IRenderable.render}. + + @return: A two-argument callable which will be invoked with the request + being responded to and the tag object on which the render directive + was encountered. + """ + + + def render(request): + """ + Get the document for this L{IRenderable}. + + @type request: L{IRequest} provider or L{None} + @param request: The request in response to which this method is being + invoked. + + @return: An object which can be flattened. + """ + + + +class ITemplateLoader(Interface): + """ + A loader for templates; something usable as a value for + L{twisted.web.template.Element}'s C{loader} attribute. + """ + + def load(): + """ + Load a template suitable for rendering. + + @return: a L{list} of L{list}s, L{unicode} objects, C{Element}s and + other L{IRenderable} providers. + """ + + + +class IResponse(Interface): + """ + An object representing an HTTP response received from an HTTP server. + + @since: 11.1 + """ + + version = Attribute( + "A three-tuple describing the protocol and protocol version " + "of the response. The first element is of type L{str}, the second " + "and third are of type L{int}. For example, C{(b'HTTP', 1, 1)}.") + + + code = Attribute("The HTTP status code of this response, as a L{int}.") + + + phrase = Attribute( + "The HTTP reason phrase of this response, as a L{str}.") + + + headers = Attribute("The HTTP response L{Headers} of this response.") + + + length = Attribute( + "The L{int} number of bytes expected to be in the body of this " + "response or L{UNKNOWN_LENGTH} if the server did not indicate how " + "many bytes to expect. For I{HEAD} responses, this will be 0; if " + "the response includes a I{Content-Length} header, it will be " + "available in C{headers}.") + + + request = Attribute( + "The L{IClientRequest} that resulted in this response.") + + + previousResponse = Attribute( + "The previous L{IResponse} from a redirect, or L{None} if there was no " + "previous response. This can be used to walk the response or request " + "history for redirections.") + + + def deliverBody(protocol): + """ + Register an L{IProtocol<twisted.internet.interfaces.IProtocol>} provider + to receive the response body. + + The protocol will be connected to a transport which provides + L{IPushProducer}. The protocol's C{connectionLost} method will be + called with: + + - ResponseDone, which indicates that all bytes from the response + have been successfully delivered. + + - PotentialDataLoss, which indicates that it cannot be determined + if the entire response body has been delivered. This only occurs + when making requests to HTTP servers which do not set + I{Content-Length} or a I{Transfer-Encoding} in the response. + + - ResponseFailed, which indicates that some bytes from the response + were lost. The C{reasons} attribute of the exception may provide + more specific indications as to why. + """ + + + def setPreviousResponse(response): + """ + Set the reference to the previous L{IResponse}. + + The value of the previous response can be read via + L{IResponse.previousResponse}. + """ + + + +class _IRequestEncoder(Interface): + """ + An object encoding data passed to L{IRequest.write}, for example for + compression purpose. + + @since: 12.3 + """ + + def encode(data): + """ + Encode the data given and return the result. + + @param data: The content to encode. + @type data: L{str} + + @return: The encoded data. + @rtype: L{str} + """ + + + def finish(): + """ + Callback called when the request is closing. + + @return: If necessary, the pending data accumulated from previous + C{encode} calls. + @rtype: L{str} + """ + + + +class _IRequestEncoderFactory(Interface): + """ + A factory for returing L{_IRequestEncoder} instances. + + @since: 12.3 + """ + + def encoderForRequest(request): + """ + If applicable, returns a L{_IRequestEncoder} instance which will encode + the request. + """ + + + +class IClientRequest(Interface): + """ + An object representing an HTTP request to make to an HTTP server. + + @since: 13.1 + """ + method = Attribute( + "The HTTP method for this request, as L{bytes}. For example: " + "C{b'GET'}, C{b'HEAD'}, C{b'POST'}, etc.") + + + absoluteURI = Attribute( + "The absolute URI of the requested resource, as L{bytes}; or L{None} " + "if the absolute URI cannot be determined.") + + + headers = Attribute( + "Headers to be sent to the server, as " + "a L{twisted.web.http_headers.Headers} instance.") + + + +class IAgent(Interface): + """ + An agent makes HTTP requests. + + The way in which requests are issued is left up to each implementation. + Some may issue them directly to the server indicated by the net location + portion of the request URL. Others may use a proxy specified by system + configuration. + + Processing of responses is also left very widely specified. An + implementation may perform no special handling of responses, or it may + implement redirect following or content negotiation, it may implement a + cookie store or automatically respond to authentication challenges. It may + implement many other unforeseen behaviors as well. + + It is also intended that L{IAgent} implementations be composable. An + implementation which provides cookie handling features should re-use an + implementation that provides connection pooling and this combination could + be used by an implementation which adds content negotiation functionality. + Some implementations will be completely self-contained, such as those which + actually perform the network operations to send and receive requests, but + most or all other implementations should implement a small number of new + features (perhaps one new feature) and delegate the rest of the + request/response machinery to another implementation. + + This allows for great flexibility in the behavior an L{IAgent} will + provide. For example, an L{IAgent} with web browser-like behavior could be + obtained by combining a number of (hypothetical) implementations:: + + baseAgent = Agent(reactor) + redirect = BrowserLikeRedirectAgent(baseAgent, limit=10) + authenticate = AuthenticateAgent( + redirect, [diskStore.credentials, GtkAuthInterface()]) + cookie = CookieAgent(authenticate, diskStore.cookie) + decode = ContentDecoderAgent(cookie, [(b"gzip", GzipDecoder())]) + cache = CacheAgent(decode, diskStore.cache) + + doSomeRequests(cache) + """ + def request(method, uri, headers=None, bodyProducer=None): + """ + Request the resource at the given location. + + @param method: The request method to use, such as C{"GET"}, C{"HEAD"}, + C{"PUT"}, C{"POST"}, etc. + @type method: L{bytes} + + @param uri: The location of the resource to request. This should be an + absolute URI but some implementations may support relative URIs + (with absolute or relative paths). I{HTTP} and I{HTTPS} are the + schemes most likely to be supported but others may be as well. + @type uri: L{bytes} + + @param headers: The headers to send with the request (or L{None} to + send no extra headers). An implementation may add its own headers + to this (for example for client identification or content + negotiation). + @type headers: L{Headers} or L{None} + + @param bodyProducer: An object which can generate bytes to make up the + body of this request (for example, the properly encoded contents of + a file for a file upload). Or, L{None} if the request is to have + no body. + @type bodyProducer: L{IBodyProducer} provider + + @return: A L{Deferred} that fires with an L{IResponse} provider when + the header of the response has been received (regardless of the + response status code) or with a L{Failure} if there is any problem + which prevents that response from being received (including + problems that prevent the request from being sent). + @rtype: L{Deferred} + """ + + +class IPolicyForHTTPS(Interface): + """ + An L{IPolicyForHTTPS} provides a policy for verifying the certificates of + HTTPS connections, in the form of a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} per network + location. + + @since: 14.0 + """ + + def creatorForNetloc(hostname, port): + """ + Create a L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + appropriate for the given URL "netloc"; i.e. hostname and port number + pair. + + @param hostname: The name of the requested remote host. + @type hostname: L{bytes} + + @param port: The number of the requested remote port. + @type port: L{int} + + @return: A client connection creator expressing the security + requirements for the given remote host. + @rtype: L{client connection creator + <twisted.internet.interfaces.IOpenSSLClientConnectionCreator>} + """ + + + +class IAgentEndpointFactory(Interface): + """ + An L{IAgentEndpointFactory} provides a way of constructing an endpoint + used for outgoing Agent requests. This is useful in the case of needing to + proxy outgoing connections, or to otherwise vary the transport used. + + @since: 15.0 + """ + + def endpointForURI(uri): + """ + Construct and return an L{IStreamClientEndpoint} for the outgoing + request's connection. + + @param uri: The URI of the request. + @type uri: L{twisted.web.client.URI} + + @return: An endpoint which will have its C{connect} method called to + issue the request. + @rtype: an L{IStreamClientEndpoint} provider + + @raises twisted.internet.error.SchemeNotSupported: If the given + URI's scheme cannot be handled by this factory. + """ + + + +UNKNOWN_LENGTH = u"twisted.web.iweb.UNKNOWN_LENGTH" + +__all__ = [ + "IUsernameDigestHash", "ICredentialFactory", "IRequest", + "IBodyProducer", "IRenderable", "IResponse", "_IRequestEncoder", + "_IRequestEncoderFactory", "IClientRequest", + + "UNKNOWN_LENGTH"] diff --git a/contrib/python/Twisted/py2/twisted/web/microdom.py b/contrib/python/Twisted/py2/twisted/web/microdom.py new file mode 100644 index 0000000000..1f52279011 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/microdom.py @@ -0,0 +1,1145 @@ +# -*- test-case-name: twisted.web.test.test_xml -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Micro Document Object Model: a partial DOM implementation with SUX. + +This is an implementation of what we consider to be the useful subset of the +DOM. The chief advantage of this library is that, not being burdened with +standards compliance, it can remain very stable between versions. We can also +implement utility 'pythonic' ways to access and mutate the XML tree. + +Since this has not subjected to a serious trial by fire, it is not recommended +to use this outside of Twisted applications. However, it seems to work just +fine for the documentation generator, which parses a fairly representative +sample of XML. + +Microdom mainly focuses on working with HTML and XHTML. +""" + +# System Imports +import re +from io import BytesIO, StringIO + + +# Twisted Imports +from twisted.python.compat import ioType, iteritems, range, unicode +from twisted.python.util import InsensitiveDict +from twisted.web.sux import XMLParser, ParseError + + +def getElementsByTagName(iNode, name): + """ + Return a list of all child elements of C{iNode} with a name matching + C{name}. + + Note that this implementation does not conform to the DOM Level 1 Core + specification because it may return C{iNode}. + + @param iNode: An element at which to begin searching. If C{iNode} has a + name matching C{name}, it will be included in the result. + + @param name: A C{str} giving the name of the elements to return. + + @return: A C{list} of direct or indirect child elements of C{iNode} with + the name C{name}. This may include C{iNode}. + """ + matches = [] + matches_append = matches.append # faster lookup. don't do this at home + slice = [iNode] + while len(slice) > 0: + c = slice.pop(0) + if c.nodeName == name: + matches_append(c) + slice[:0] = c.childNodes + return matches + + + +def getElementsByTagNameNoCase(iNode, name): + name = name.lower() + matches = [] + matches_append = matches.append + slice = [iNode] + while len(slice) > 0: + c = slice.pop(0) + if c.nodeName.lower() == name: + matches_append(c) + slice[:0] = c.childNodes + return matches + + + +def _streamWriteWrapper(stream): + if ioType(stream) == bytes: + def w(s): + if isinstance(s, unicode): + s = s.encode("utf-8") + stream.write(s) + else: + def w(s): + if isinstance(s, bytes): + s = s.decode("utf-8") + stream.write(s) + return w + +# order is important +HTML_ESCAPE_CHARS = (('&', '&'), # don't add any entities before this one + ('<', '<'), + ('>', '>'), + ('"', '"')) +REV_HTML_ESCAPE_CHARS = list(HTML_ESCAPE_CHARS) +REV_HTML_ESCAPE_CHARS.reverse() + +XML_ESCAPE_CHARS = HTML_ESCAPE_CHARS + (("'", '''),) +REV_XML_ESCAPE_CHARS = list(XML_ESCAPE_CHARS) +REV_XML_ESCAPE_CHARS.reverse() + +def unescape(text, chars=REV_HTML_ESCAPE_CHARS): + """ + Perform the exact opposite of 'escape'. + """ + for s, h in chars: + text = text.replace(h, s) + return text + + + +def escape(text, chars=HTML_ESCAPE_CHARS): + """ + Escape a few XML special chars with XML entities. + """ + for s, h in chars: + text = text.replace(s, h) + return text + + + +class MismatchedTags(Exception): + + def __init__(self, filename, expect, got, endLine, endCol, begLine, begCol): + (self.filename, self.expect, self.got, self.begLine, self.begCol, self.endLine, + self.endCol) = filename, expect, got, begLine, begCol, endLine, endCol + + + def __str__(self): + return ("expected </%s>, got </%s> line: %s col: %s, began line: %s col: %s" + % (self.expect, self.got, self.endLine, self.endCol, self.begLine, + self.begCol)) + + + +class Node(object): + nodeName = "Node" + + def __init__(self, parentNode=None): + self.parentNode = parentNode + self.childNodes = [] + + + def isEqualToNode(self, other): + """ + Compare this node to C{other}. If the nodes have the same number of + children and corresponding children are equal to each other, return + C{True}, otherwise return C{False}. + + @type other: L{Node} + @rtype: C{bool} + """ + if len(self.childNodes) != len(other.childNodes): + return False + for a, b in zip(self.childNodes, other.childNodes): + if not a.isEqualToNode(b): + return False + return True + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + raise NotImplementedError() + + + def toxml(self, indent='', addindent='', newl='', strip=0, nsprefixes={}, + namespace=''): + s = StringIO() + self.writexml(s, indent, addindent, newl, strip, nsprefixes, namespace) + rv = s.getvalue() + return rv + + + def writeprettyxml(self, stream, indent='', addindent=' ', newl='\n', strip=0): + return self.writexml(stream, indent, addindent, newl, strip) + + + def toprettyxml(self, indent='', addindent=' ', newl='\n', strip=0): + return self.toxml(indent, addindent, newl, strip) + + + def cloneNode(self, deep=0, parent=None): + raise NotImplementedError() + + + def hasChildNodes(self): + if self.childNodes: + return 1 + else: + return 0 + + + def appendChild(self, child): + """ + Make the given L{Node} the last child of this node. + + @param child: The L{Node} which will become a child of this node. + + @raise TypeError: If C{child} is not a C{Node} instance. + """ + if not isinstance(child, Node): + raise TypeError("expected Node instance") + self.childNodes.append(child) + child.parentNode = self + + + def insertBefore(self, new, ref): + """ + Make the given L{Node} C{new} a child of this node which comes before + the L{Node} C{ref}. + + @param new: A L{Node} which will become a child of this node. + + @param ref: A L{Node} which is already a child of this node which + C{new} will be inserted before. + + @raise TypeError: If C{new} or C{ref} is not a C{Node} instance. + + @return: C{new} + """ + if not isinstance(new, Node) or not isinstance(ref, Node): + raise TypeError("expected Node instance") + i = self.childNodes.index(ref) + new.parentNode = self + self.childNodes.insert(i, new) + return new + + + def removeChild(self, child): + """ + Remove the given L{Node} from this node's children. + + @param child: A L{Node} which is a child of this node which will no + longer be a child of this node after this method is called. + + @raise TypeError: If C{child} is not a C{Node} instance. + + @return: C{child} + """ + if not isinstance(child, Node): + raise TypeError("expected Node instance") + if child in self.childNodes: + self.childNodes.remove(child) + child.parentNode = None + return child + + + def replaceChild(self, newChild, oldChild): + """ + Replace a L{Node} which is already a child of this node with a + different node. + + @param newChild: A L{Node} which will be made a child of this node. + + @param oldChild: A L{Node} which is a child of this node which will + give up its position to C{newChild}. + + @raise TypeError: If C{newChild} or C{oldChild} is not a C{Node} + instance. + + @raise ValueError: If C{oldChild} is not a child of this C{Node}. + """ + if not isinstance(newChild, Node) or not isinstance(oldChild, Node): + raise TypeError("expected Node instance") + if oldChild.parentNode is not self: + raise ValueError("oldChild is not a child of this node") + self.childNodes[self.childNodes.index(oldChild)] = newChild + oldChild.parentNode = None + newChild.parentNode = self + + + def lastChild(self): + return self.childNodes[-1] + + + def firstChild(self): + if len(self.childNodes): + return self.childNodes[0] + return None + + #def get_ownerDocument(self): + # """This doesn't really get the owner document; microdom nodes + # don't even have one necessarily. This gets the root node, + # which is usually what you really meant. + # *NOT DOM COMPLIANT.* + # """ + # node=self + # while (node.parentNode): node=node.parentNode + # return node + #ownerDocument=node.get_ownerDocument() + # leaving commented for discussion; see also domhelpers.getParents(node) + + + +class Document(Node): + + def __init__(self, documentElement=None): + Node.__init__(self) + if documentElement: + self.appendChild(documentElement) + + + def cloneNode(self, deep=0, parent=None): + d = Document() + d.doctype = self.doctype + if deep: + newEl = self.documentElement.cloneNode(1, self) + else: + newEl = self.documentElement + d.appendChild(newEl) + return d + + doctype = None + + def isEqualToDocument(self, n): + return (self.doctype == n.doctype) and Node.isEqualToNode(self, n) + isEqualToNode = isEqualToDocument + + + def get_documentElement(self): + return self.childNodes[0] + documentElement = property(get_documentElement) + + + def appendChild(self, child): + """ + Make the given L{Node} the I{document element} of this L{Document}. + + @param child: The L{Node} to make into this L{Document}'s document + element. + + @raise ValueError: If this document already has a document element. + """ + if self.childNodes: + raise ValueError("Only one element per document.") + Node.appendChild(self, child) + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + w = _streamWriteWrapper(stream) + + w('<?xml version="1.0"?>' + newl) + if self.doctype: + w(u"<!DOCTYPE {}>{}".format(self.doctype, newl)) + self.documentElement.writexml(stream, indent, addindent, newl, strip, + nsprefixes, namespace) + + + # of dubious utility (?) + def createElement(self, name, **kw): + return Element(name, **kw) + + + def createTextNode(self, text): + return Text(text) + + + def createComment(self, text): + return Comment(text) + + + def getElementsByTagName(self, name): + if self.documentElement.caseInsensitive: + return getElementsByTagNameNoCase(self, name) + return getElementsByTagName(self, name) + + + def getElementById(self, id): + childNodes = self.childNodes[:] + while childNodes: + node = childNodes.pop(0) + if node.childNodes: + childNodes.extend(node.childNodes) + if hasattr(node, 'getAttribute') and node.getAttribute("id") == id: + return node + + + +class EntityReference(Node): + + def __init__(self, eref, parentNode=None): + Node.__init__(self, parentNode) + self.eref = eref + self.nodeValue = self.data = "&" + eref + ";" + + + def isEqualToEntityReference(self, n): + if not isinstance(n, EntityReference): + return 0 + return (self.eref == n.eref) and (self.nodeValue == n.nodeValue) + isEqualToNode = isEqualToEntityReference + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + w = _streamWriteWrapper(stream) + w("" + self.nodeValue) + + + def cloneNode(self, deep=0, parent=None): + return EntityReference(self.eref, parent) + + + +class CharacterData(Node): + + def __init__(self, data, parentNode=None): + Node.__init__(self, parentNode) + self.value = self.data = self.nodeValue = data + + + def isEqualToCharacterData(self, n): + return self.value == n.value + isEqualToNode = isEqualToCharacterData + + + +class Comment(CharacterData): + """ + A comment node. + """ + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + w = _streamWriteWrapper(stream) + val = self.data + w(u"<!--{}-->".format(val)) + + + def cloneNode(self, deep=0, parent=None): + return Comment(self.nodeValue, parent) + + + +class Text(CharacterData): + + def __init__(self, data, parentNode=None, raw=0): + CharacterData.__init__(self, data, parentNode) + self.raw = raw + + + def isEqualToNode(self, other): + """ + Compare this text to C{text}. If the underlying values and the C{raw} + flag are the same, return C{True}, otherwise return C{False}. + """ + return ( + CharacterData.isEqualToNode(self, other) and + self.raw == other.raw) + + + def cloneNode(self, deep=0, parent=None): + return Text(self.nodeValue, parent, self.raw) + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + w = _streamWriteWrapper(stream) + if self.raw: + val = self.nodeValue + if not isinstance(val, (str, unicode)): + val = str(self.nodeValue) + else: + v = self.nodeValue + if not isinstance(v, (str, unicode)): + v = str(v) + if strip: + v = ' '.join(v.split()) + val = escape(v) + w(val) + + + def __repr__(self): + return "Text(%s" % repr(self.nodeValue) + ')' + + + +class CDATASection(CharacterData): + def cloneNode(self, deep=0, parent=None): + return CDATASection(self.nodeValue, parent) + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + w = _streamWriteWrapper(stream) + w("<![CDATA[") + w("" + self.nodeValue) + w("]]>") + + + +def _genprefix(): + i = 0 + while True: + yield 'p' + str(i) + i = i + 1 +genprefix = _genprefix() + + + +class _Attr(CharacterData): + "Support class for getAttributeNode." + + + +class Element(Node): + + preserveCase = 0 + caseInsensitive = 1 + nsprefixes = None + + def __init__(self, tagName, attributes=None, parentNode=None, + filename=None, markpos=None, + caseInsensitive=1, preserveCase=0, + namespace=None): + Node.__init__(self, parentNode) + self.preserveCase = preserveCase or not caseInsensitive + self.caseInsensitive = caseInsensitive + if not preserveCase: + tagName = tagName.lower() + if attributes is None: + self.attributes = {} + else: + self.attributes = attributes + for k, v in self.attributes.items(): + self.attributes[k] = unescape(v) + + if caseInsensitive: + self.attributes = InsensitiveDict(self.attributes, + preserve=preserveCase) + + self.endTagName = self.nodeName = self.tagName = tagName + self._filename = filename + self._markpos = markpos + self.namespace = namespace + + + def addPrefixes(self, pfxs): + if self.nsprefixes is None: + self.nsprefixes = pfxs + else: + self.nsprefixes.update(pfxs) + + + def endTag(self, endTagName): + if not self.preserveCase: + endTagName = endTagName.lower() + self.endTagName = endTagName + + + def isEqualToElement(self, n): + if self.caseInsensitive: + return ((self.attributes == n.attributes) + and (self.nodeName.lower() == n.nodeName.lower())) + return (self.attributes == n.attributes) and (self.nodeName == n.nodeName) + + + def isEqualToNode(self, other): + """ + Compare this element to C{other}. If the C{nodeName}, C{namespace}, + C{attributes}, and C{childNodes} are all the same, return C{True}, + otherwise return C{False}. + """ + return ( + self.nodeName.lower() == other.nodeName.lower() and + self.namespace == other.namespace and + self.attributes == other.attributes and + Node.isEqualToNode(self, other)) + + + def cloneNode(self, deep=0, parent=None): + clone = Element( + self.tagName, parentNode=parent, namespace=self.namespace, + preserveCase=self.preserveCase, caseInsensitive=self.caseInsensitive) + clone.attributes.update(self.attributes) + if deep: + clone.childNodes = [child.cloneNode(1, clone) for child in self.childNodes] + else: + clone.childNodes = [] + return clone + + + def getElementsByTagName(self, name): + if self.caseInsensitive: + return getElementsByTagNameNoCase(self, name) + return getElementsByTagName(self, name) + + + def hasAttributes(self): + return 1 + + + def getAttribute(self, name, default=None): + return self.attributes.get(name, default) + + + def getAttributeNS(self, ns, name, default=None): + nsk = (ns, name) + if nsk in self.attributes: + return self.attributes[nsk] + if ns == self.namespace: + return self.attributes.get(name, default) + return default + + + def getAttributeNode(self, name): + return _Attr(self.getAttribute(name), self) + + + def setAttribute(self, name, attr): + self.attributes[name] = attr + + + def removeAttribute(self, name): + if name in self.attributes: + del self.attributes[name] + + + def hasAttribute(self, name): + return name in self.attributes + + + def writexml(self, stream, indent='', addindent='', newl='', strip=0, + nsprefixes={}, namespace=''): + """ + Serialize this L{Element} to the given stream. + + @param stream: A file-like object to which this L{Element} will be + written. + + @param nsprefixes: A C{dict} mapping namespace URIs as C{str} to + prefixes as C{str}. This defines the prefixes which are already in + scope in the document at the point at which this L{Element} exists. + This is essentially an implementation detail for namespace support. + Applications should not try to use it. + + @param namespace: The namespace URI as a C{str} which is the default at + the point in the document at which this L{Element} exists. This is + essentially an implementation detail for namespace support. + Applications should not try to use it. + """ + # write beginning + ALLOWSINGLETON = ('img', 'br', 'hr', 'base', 'meta', 'link', 'param', + 'area', 'input', 'col', 'basefont', 'isindex', + 'frame') + BLOCKELEMENTS = ('html', 'head', 'body', 'noscript', 'ins', 'del', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'script', + 'ul', 'ol', 'dl', 'pre', 'hr', 'blockquote', + 'address', 'p', 'div', 'fieldset', 'table', 'tr', + 'form', 'object', 'fieldset', 'applet', 'map') + FORMATNICELY = ('tr', 'ul', 'ol', 'head') + + # this should never be necessary unless people start + # changing .tagName on the fly(?) + if not self.preserveCase: + self.endTagName = self.tagName + + w = _streamWriteWrapper(stream) + if self.nsprefixes: + newprefixes = self.nsprefixes.copy() + for ns in nsprefixes.keys(): + if ns in newprefixes: + del newprefixes[ns] + else: + newprefixes = {} + + begin = ['<'] + if self.tagName in BLOCKELEMENTS: + begin = [newl, indent] + begin + bext = begin.extend + writeattr = lambda _atr, _val: bext((' ', _atr, '="', escape(_val), '"')) + + # Make a local for tracking what end tag will be used. If namespace + # prefixes are involved, this will be changed to account for that + # before it's actually used. + endTagName = self.endTagName + + if namespace != self.namespace and self.namespace is not None: + # If the current default namespace is not the namespace of this tag + # (and this tag has a namespace at all) then we'll write out + # something related to namespaces. + if self.namespace in nsprefixes: + # This tag's namespace already has a prefix bound to it. Use + # that prefix. + prefix = nsprefixes[self.namespace] + bext(prefix + ':' + self.tagName) + # Also make sure we use it for the end tag. + endTagName = prefix + ':' + self.endTagName + else: + # This tag's namespace has no prefix bound to it. Change the + # default namespace to this tag's namespace so we don't need + # prefixes. Alternatively, we could add a new prefix binding. + # I'm not sure why the code was written one way rather than the + # other. -exarkun + bext(self.tagName) + writeattr("xmlns", self.namespace) + # The default namespace just changed. Make sure any children + # know about this. + namespace = self.namespace + else: + # This tag has no namespace or its namespace is already the default + # namespace. Nothing extra to do here. + bext(self.tagName) + + j = ''.join + for attr, val in sorted(self.attributes.items()): + if isinstance(attr, tuple): + ns, key = attr + if ns in nsprefixes: + prefix = nsprefixes[ns] + else: + prefix = next(genprefix) + newprefixes[ns] = prefix + assert val is not None + writeattr(prefix + ':' + key, val) + else: + assert val is not None + writeattr(attr, val) + if newprefixes: + for ns, prefix in iteritems(newprefixes): + if prefix: + writeattr('xmlns:'+prefix, ns) + newprefixes.update(nsprefixes) + downprefixes = newprefixes + else: + downprefixes = nsprefixes + w(j(begin)) + if self.childNodes: + w(">") + newindent = indent + addindent + for child in self.childNodes: + if self.tagName in BLOCKELEMENTS and \ + self.tagName in FORMATNICELY: + w(j((newl, newindent))) + child.writexml(stream, newindent, addindent, newl, strip, + downprefixes, namespace) + if self.tagName in BLOCKELEMENTS: + w(j((newl, indent))) + w(j(('</', endTagName, '>'))) + elif self.tagName.lower() not in ALLOWSINGLETON: + w(j(('></', endTagName, '>'))) + else: + w(" />") + + + def __repr__(self): + rep = "Element(%s" % repr(self.nodeName) + if self.attributes: + rep += ", attributes=%r" % (self.attributes,) + if self._filename: + rep += ", filename=%r" % (self._filename,) + if self._markpos: + rep += ", markpos=%r" % (self._markpos,) + return rep + ')' + + + def __str__(self): + rep = "<" + self.nodeName + if self._filename or self._markpos: + rep += " (" + if self._filename: + rep += repr(self._filename) + if self._markpos: + rep += " line %s column %s" % self._markpos + if self._filename or self._markpos: + rep += ")" + for item in self.attributes.items(): + rep += " %s=%r" % item + if self.hasChildNodes(): + rep += " >...</%s>" % self.nodeName + else: + rep += " />" + return rep + + + +def _unescapeDict(d): + dd = {} + for k, v in d.items(): + dd[k] = unescape(v) + return dd + + + +def _reverseDict(d): + dd = {} + for k, v in d.items(): + dd[v] = k + return dd + + + +class MicroDOMParser(XMLParser): + + # <dash> glyph: a quick scan thru the DTD says BODY, AREA, LINK, IMG, HR, + # P, DT, DD, LI, INPUT, OPTION, THEAD, TFOOT, TBODY, COLGROUP, COL, TR, TH, + # TD, HEAD, BASE, META, HTML all have optional closing tags + + soonClosers = 'area link br img hr input base meta'.split() + laterClosers = {'p': ['p', 'dt'], + 'dt': ['dt', 'dd'], + 'dd': ['dt', 'dd'], + 'li': ['li'], + 'tbody': ['thead', 'tfoot', 'tbody'], + 'thead': ['thead', 'tfoot', 'tbody'], + 'tfoot': ['thead', 'tfoot', 'tbody'], + 'colgroup': ['colgroup'], + 'col': ['col'], + 'tr': ['tr'], + 'td': ['td'], + 'th': ['th'], + 'head': ['body'], + 'title': ['head', 'body'], # this looks wrong... + 'option': ['option'], + } + + + def __init__(self, beExtremelyLenient=0, caseInsensitive=1, preserveCase=0, + soonClosers=soonClosers, laterClosers=laterClosers): + self.elementstack = [] + d = {'xmlns': 'xmlns', '': None} + dr = _reverseDict(d) + self.nsstack = [(d, None, dr)] + self.documents = [] + self._mddoctype = None + self.beExtremelyLenient = beExtremelyLenient + self.caseInsensitive = caseInsensitive + self.preserveCase = preserveCase or not caseInsensitive + self.soonClosers = soonClosers + self.laterClosers = laterClosers + # self.indentlevel = 0 + + + def shouldPreserveSpace(self): + for edx in range(len(self.elementstack)): + el = self.elementstack[-edx] + if el.tagName == 'pre' or el.getAttribute("xml:space", '') == 'preserve': + return 1 + return 0 + + + def _getparent(self): + if self.elementstack: + return self.elementstack[-1] + else: + return None + + COMMENT = re.compile(r"\s*/[/*]\s*") + + def _fixScriptElement(self, el): + # this deals with case where there is comment or CDATA inside + # <script> tag and we want to do the right thing with it + if not self.beExtremelyLenient or not len(el.childNodes) == 1: + return + c = el.firstChild() + if isinstance(c, Text): + # deal with nasty people who do stuff like: + # <script> // <!-- + # x = 1; + # // --></script> + # tidy does this, for example. + prefix = "" + oldvalue = c.value + match = self.COMMENT.match(oldvalue) + if match: + prefix = match.group() + oldvalue = oldvalue[len(prefix):] + + # now see if contents are actual node and comment or CDATA + try: + e = parseString("<a>%s</a>" % oldvalue).childNodes[0] + except (ParseError, MismatchedTags): + return + if len(e.childNodes) != 1: + return + e = e.firstChild() + if isinstance(e, (CDATASection, Comment)): + el.childNodes = [] + if prefix: + el.childNodes.append(Text(prefix)) + el.childNodes.append(e) + + + def gotDoctype(self, doctype): + self._mddoctype = doctype + + + def gotTagStart(self, name, attributes): + # print ' '*self.indentlevel, 'start tag',name + # self.indentlevel += 1 + parent = self._getparent() + if (self.beExtremelyLenient and isinstance(parent, Element)): + parentName = parent.tagName + myName = name + if self.caseInsensitive: + parentName = parentName.lower() + myName = myName.lower() + if myName in self.laterClosers.get(parentName, []): + self.gotTagEnd(parent.tagName) + parent = self._getparent() + attributes = _unescapeDict(attributes) + namespaces = self.nsstack[-1][0] + newspaces = {} + keysToDelete = [] + for k, v in attributes.items(): + if k.startswith('xmlns'): + spacenames = k.split(':', 1) + if len(spacenames) == 2: + newspaces[spacenames[1]] = v + else: + newspaces[''] = v + keysToDelete.append(k) + for k in keysToDelete: + del attributes[k] + if newspaces: + namespaces = namespaces.copy() + namespaces.update(newspaces) + keysToDelete = [] + for k, v in attributes.items(): + ksplit = k.split(':', 1) + if len(ksplit) == 2: + pfx, tv = ksplit + if pfx != 'xml' and pfx in namespaces: + attributes[namespaces[pfx], tv] = v + keysToDelete.append(k) + for k in keysToDelete: + del attributes[k] + el = Element(name, attributes, parent, + self.filename, self.saveMark(), + caseInsensitive=self.caseInsensitive, + preserveCase=self.preserveCase, + namespace=namespaces.get('')) + revspaces = _reverseDict(newspaces) + el.addPrefixes(revspaces) + + if newspaces: + rscopy = self.nsstack[-1][2].copy() + rscopy.update(revspaces) + self.nsstack.append((namespaces, el, rscopy)) + self.elementstack.append(el) + if parent: + parent.appendChild(el) + if (self.beExtremelyLenient and el.tagName in self.soonClosers): + self.gotTagEnd(name) + + + def _gotStandalone(self, factory, data): + parent = self._getparent() + te = factory(data, parent) + if parent: + parent.appendChild(te) + elif self.beExtremelyLenient: + self.documents.append(te) + + + def gotText(self, data): + if data.strip() or self.shouldPreserveSpace(): + self._gotStandalone(Text, data) + + + def gotComment(self, data): + self._gotStandalone(Comment, data) + + + def gotEntityReference(self, entityRef): + self._gotStandalone(EntityReference, entityRef) + + + def gotCData(self, cdata): + self._gotStandalone(CDATASection, cdata) + + + def gotTagEnd(self, name): + # print ' '*self.indentlevel, 'end tag',name + # self.indentlevel -= 1 + if not self.elementstack: + if self.beExtremelyLenient: + return + raise MismatchedTags(*((self.filename, "NOTHING", name) + + self.saveMark() + (0, 0))) + el = self.elementstack.pop() + pfxdix = self.nsstack[-1][2] + if self.nsstack[-1][1] is el: + nstuple = self.nsstack.pop() + else: + nstuple = None + if self.caseInsensitive: + tn = el.tagName.lower() + cname = name.lower() + else: + tn = el.tagName + cname = name + + nsplit = name.split(':', 1) + if len(nsplit) == 2: + pfx, newname = nsplit + ns = pfxdix.get(pfx, None) + if ns is not None: + if el.namespace != ns: + if not self.beExtremelyLenient: + raise MismatchedTags(*((self.filename, el.tagName, name) + + self.saveMark() + el._markpos)) + if not (tn == cname): + if self.beExtremelyLenient: + if self.elementstack: + lastEl = self.elementstack[0] + for idx in range(len(self.elementstack)): + if self.elementstack[-(idx+1)].tagName == cname: + self.elementstack[-(idx+1)].endTag(name) + break + else: + # this was a garbage close tag; wait for a real one + self.elementstack.append(el) + if nstuple is not None: + self.nsstack.append(nstuple) + return + del self.elementstack[-(idx+1):] + if not self.elementstack: + self.documents.append(lastEl) + return + else: + raise MismatchedTags(*((self.filename, el.tagName, name) + + self.saveMark() + el._markpos)) + el.endTag(name) + if not self.elementstack: + self.documents.append(el) + if self.beExtremelyLenient and el.tagName == "script": + self._fixScriptElement(el) + + + def connectionLost(self, reason): + XMLParser.connectionLost(self, reason) # This can cause more events! + if self.elementstack: + if self.beExtremelyLenient: + self.documents.append(self.elementstack[0]) + else: + raise MismatchedTags(*((self.filename, self.elementstack[-1], + "END_OF_FILE") + + self.saveMark() + + self.elementstack[-1]._markpos)) + + + +def parse(readable, *args, **kwargs): + """ + Parse HTML or XML readable. + """ + if not hasattr(readable, "read"): + readable = open(readable, "rb") + mdp = MicroDOMParser(*args, **kwargs) + mdp.filename = getattr(readable, "name", "<xmlfile />") + mdp.makeConnection(None) + if hasattr(readable, "getvalue"): + mdp.dataReceived(readable.getvalue()) + else: + r = readable.read(1024) + while r: + mdp.dataReceived(r) + r = readable.read(1024) + mdp.connectionLost(None) + + if not mdp.documents: + raise ParseError(mdp.filename, 0, 0, "No top-level Nodes in document") + + if mdp.beExtremelyLenient: + if len(mdp.documents) == 1: + d = mdp.documents[0] + if not isinstance(d, Element): + el = Element("html") + el.appendChild(d) + d = el + else: + d = Element("html") + for child in mdp.documents: + d.appendChild(child) + else: + d = mdp.documents[0] + doc = Document(d) + doc.doctype = mdp._mddoctype + return doc + + + +def parseString(st, *args, **kw): + if isinstance(st, unicode): + # this isn't particularly ideal, but it does work. + return parse(BytesIO(st.encode('UTF-16')), *args, **kw) + return parse(BytesIO(st), *args, **kw) + + + +def parseXML(readable): + """ + Parse an XML readable object. + """ + return parse(readable, caseInsensitive=0, preserveCase=1) + + + +def parseXMLString(st): + """ + Parse an XML readable object. + """ + return parseString(st, caseInsensitive=0, preserveCase=1) + + + +class lmx: + """ + Easy creation of XML. + """ + + def __init__(self, node='div'): + if isinstance(node, (str, unicode)): + node = Element(node) + self.node = node + + + def __getattr__(self, name): + if name[0] == '_': + raise AttributeError("no private attrs") + return lambda **kw: self.add(name, **kw) + + + def __setitem__(self, key, val): + self.node.setAttribute(key, val) + + + def __getitem__(self, key): + return self.node.getAttribute(key) + + + def text(self, txt, raw=0): + nn = Text(txt, raw=raw) + self.node.appendChild(nn) + return self + + + def add(self, tagName, **kw): + newNode = Element(tagName, caseInsensitive=0, preserveCase=0) + self.node.appendChild(newNode) + xf = lmx(newNode) + for k, v in kw.items(): + if k[0] == '_': + k = k[1:] + xf[k] = v + return xf diff --git a/contrib/python/Twisted/py2/twisted/web/proxy.py b/contrib/python/Twisted/py2/twisted/web/proxy.py new file mode 100644 index 0000000000..4ec677cc03 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/proxy.py @@ -0,0 +1,303 @@ +# -*- test-case-name: twisted.web.test.test_proxy -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Simplistic HTTP proxy support. + +This comes in two main variants - the Proxy and the ReverseProxy. + +When a Proxy is in use, a browser trying to connect to a server (say, +www.yahoo.com) will be intercepted by the Proxy, and the proxy will covertly +connect to the server, and return the result. + +When a ReverseProxy is in use, the client connects directly to the ReverseProxy +(say, www.yahoo.com) which farms off the request to one of a pool of servers, +and returns the result. + +Normally, a Proxy is used on the client end of an Internet connection, while a +ReverseProxy is used on the server end. +""" +from __future__ import absolute_import, division + +from twisted.python.compat import urllib_parse, urlquote +from twisted.internet import reactor +from twisted.internet.protocol import ClientFactory +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET +from twisted.web.http import HTTPClient, Request, HTTPChannel, _QUEUED_SENTINEL + + + +class ProxyClient(HTTPClient): + """ + Used by ProxyClientFactory to implement a simple web proxy. + + @ivar _finished: A flag which indicates whether or not the original request + has been finished yet. + """ + _finished = False + + def __init__(self, command, rest, version, headers, data, father): + self.father = father + self.command = command + self.rest = rest + if b"proxy-connection" in headers: + del headers[b"proxy-connection"] + headers[b"connection"] = b"close" + headers.pop(b'keep-alive', None) + self.headers = headers + self.data = data + + + def connectionMade(self): + self.sendCommand(self.command, self.rest) + for header, value in self.headers.items(): + self.sendHeader(header, value) + self.endHeaders() + self.transport.write(self.data) + + + def handleStatus(self, version, code, message): + self.father.setResponseCode(int(code), message) + + + def handleHeader(self, key, value): + # t.web.server.Request sets default values for these headers in its + # 'process' method. When these headers are received from the remote + # server, they ought to override the defaults, rather than append to + # them. + if key.lower() in [b'server', b'date', b'content-type']: + self.father.responseHeaders.setRawHeaders(key, [value]) + else: + self.father.responseHeaders.addRawHeader(key, value) + + + def handleResponsePart(self, buffer): + self.father.write(buffer) + + + def handleResponseEnd(self): + """ + Finish the original request, indicating that the response has been + completely written to it, and disconnect the outgoing transport. + """ + if not self._finished: + self._finished = True + self.father.finish() + self.transport.loseConnection() + + + +class ProxyClientFactory(ClientFactory): + """ + Used by ProxyRequest to implement a simple web proxy. + """ + + protocol = ProxyClient + + + def __init__(self, command, rest, version, headers, data, father): + self.father = father + self.command = command + self.rest = rest + self.headers = headers + self.data = data + self.version = version + + + def buildProtocol(self, addr): + return self.protocol(self.command, self.rest, self.version, + self.headers, self.data, self.father) + + + def clientConnectionFailed(self, connector, reason): + """ + Report a connection failure in a response to the incoming request as + an error. + """ + self.father.setResponseCode(501, b"Gateway error") + self.father.responseHeaders.addRawHeader(b"Content-Type", b"text/html") + self.father.write(b"<H1>Could not connect</H1>") + self.father.finish() + + + +class ProxyRequest(Request): + """ + Used by Proxy to implement a simple web proxy. + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + protocols = {b'http': ProxyClientFactory} + ports = {b'http': 80} + + def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor): + Request.__init__(self, channel, queued) + self.reactor = reactor + + + def process(self): + parsed = urllib_parse.urlparse(self.uri) + protocol = parsed[0] + host = parsed[1].decode('ascii') + port = self.ports[protocol] + if ':' in host: + host, port = host.split(':') + port = int(port) + rest = urllib_parse.urlunparse((b'', b'') + parsed[2:]) + if not rest: + rest = rest + b'/' + class_ = self.protocols[protocol] + headers = self.getAllHeaders().copy() + if b'host' not in headers: + headers[b'host'] = host.encode('ascii') + self.content.seek(0, 0) + s = self.content.read() + clientFactory = class_(self.method, rest, self.clientproto, headers, + s, self) + self.reactor.connectTCP(host, port, clientFactory) + + + +class Proxy(HTTPChannel): + """ + This class implements a simple web proxy. + + Since it inherits from L{twisted.web.http.HTTPChannel}, to use it you + should do something like this:: + + from twisted.web import http + f = http.HTTPFactory() + f.protocol = Proxy + + Make the HTTPFactory a listener on a port as per usual, and you have + a fully-functioning web proxy! + """ + + requestFactory = ProxyRequest + + + +class ReverseProxyRequest(Request): + """ + Used by ReverseProxy to implement a simple reverse proxy. + + @ivar proxyClientFactoryClass: a proxy client factory class, used to create + new connections. + @type proxyClientFactoryClass: L{ClientFactory} + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + proxyClientFactoryClass = ProxyClientFactory + + def __init__(self, channel, queued=_QUEUED_SENTINEL, reactor=reactor): + Request.__init__(self, channel, queued) + self.reactor = reactor + + + def process(self): + """ + Handle this request by connecting to the proxied server and forwarding + it there, then forwarding the response back as the response to this + request. + """ + self.requestHeaders.setRawHeaders(b"host", + [self.factory.host.encode('ascii')]) + clientFactory = self.proxyClientFactoryClass( + self.method, self.uri, self.clientproto, self.getAllHeaders(), + self.content.read(), self) + self.reactor.connectTCP(self.factory.host, self.factory.port, + clientFactory) + + + +class ReverseProxy(HTTPChannel): + """ + Implements a simple reverse proxy. + + For details of usage, see the file examples/reverse-proxy.py. + """ + + requestFactory = ReverseProxyRequest + + + +class ReverseProxyResource(Resource): + """ + Resource that renders the results gotten from another server + + Put this resource in the tree to cause everything below it to be relayed + to a different server. + + @ivar proxyClientFactoryClass: a proxy client factory class, used to create + new connections. + @type proxyClientFactoryClass: L{ClientFactory} + + @ivar reactor: the reactor used to create connections. + @type reactor: object providing L{twisted.internet.interfaces.IReactorTCP} + """ + + proxyClientFactoryClass = ProxyClientFactory + + + def __init__(self, host, port, path, reactor=reactor): + """ + @param host: the host of the web server to proxy. + @type host: C{str} + + @param port: the port of the web server to proxy. + @type port: C{port} + + @param path: the base path to fetch data from. Note that you shouldn't + put any trailing slashes in it, it will be added automatically in + request. For example, if you put B{/foo}, a request on B{/bar} will + be proxied to B{/foo/bar}. Any required encoding of special + characters (such as " " or "/") should have been done already. + + @type path: C{bytes} + """ + Resource.__init__(self) + self.host = host + self.port = port + self.path = path + self.reactor = reactor + + + def getChild(self, path, request): + """ + Create and return a proxy resource with the same proxy configuration + as this one, except that its path also contains the segment given by + C{path} at the end. + """ + return ReverseProxyResource( + self.host, self.port, self.path + b'/' + urlquote(path, safe=b"").encode('utf-8'), + self.reactor) + + + def render(self, request): + """ + Render a request by forwarding it to the proxied server. + """ + # RFC 2616 tells us that we can omit the port if it's the default port, + # but we have to provide it otherwise + if self.port == 80: + host = self.host + else: + host = u"%s:%d" % (self.host, self.port) + request.requestHeaders.setRawHeaders(b"host", [host.encode('ascii')]) + request.content.seek(0, 0) + qs = urllib_parse.urlparse(request.uri)[4] + if qs: + rest = self.path + b'?' + qs + else: + rest = self.path + clientFactory = self.proxyClientFactoryClass( + request.method, rest, request.clientproto, + request.getAllHeaders(), request.content.read(), request) + self.reactor.connectTCP(self.host, self.port, clientFactory) + return NOT_DONE_YET diff --git a/contrib/python/Twisted/py2/twisted/web/resource.py b/contrib/python/Twisted/py2/twisted/web/resource.py new file mode 100644 index 0000000000..147b110ef2 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/resource.py @@ -0,0 +1,422 @@ +# -*- test-case-name: twisted.web.test.test_web -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Implementation of the lowest-level Resource class. +""" + +from __future__ import division, absolute_import + +__all__ = [ + 'IResource', 'getChildForRequest', + 'Resource', 'ErrorPage', 'NoResource', 'ForbiddenResource', + 'EncodingResourceWrapper'] + +import warnings + +from zope.interface import Attribute, Interface, implementer + +from twisted.python.compat import nativeString, unicode +from twisted.python.reflect import prefixedMethodNames +from twisted.python.components import proxyForInterface + +from twisted.web._responses import FORBIDDEN, NOT_FOUND +from twisted.web.error import UnsupportedMethod + + + +class IResource(Interface): + """ + A web resource. + """ + + isLeaf = Attribute( + """ + Signal if this IResource implementor is a "leaf node" or not. If True, + getChildWithDefault will not be called on this Resource. + """) + + + def getChildWithDefault(name, request): + """ + Return a child with the given name for the given request. + This is the external interface used by the Resource publishing + machinery. If implementing IResource without subclassing + Resource, it must be provided. However, if subclassing Resource, + getChild overridden instead. + + @param name: A single path component from a requested URL. For example, + a request for I{http://example.com/foo/bar} will result in calls to + this method with C{b"foo"} and C{b"bar"} as values for this + argument. + @type name: C{bytes} + + @param request: A representation of all of the information about the + request that is being made for this child. + @type request: L{twisted.web.server.Request} + """ + + + def putChild(path, child): + """ + Put a child IResource implementor at the given path. + + @param path: A single path component, to be interpreted relative to the + path this resource is found at, at which to put the given child. + For example, if resource A can be found at I{http://example.com/foo} + then a call like C{A.putChild(b"bar", B)} will make resource B + available at I{http://example.com/foo/bar}. + @type path: C{bytes} + """ + + + def render(request): + """ + Render a request. This is called on the leaf resource for a request. + + @return: Either C{server.NOT_DONE_YET} to indicate an asynchronous or a + C{bytes} instance to write as the response to the request. If + C{NOT_DONE_YET} is returned, at some point later (for example, in a + Deferred callback) call C{request.write(b"<html>")} to write data to + the request, and C{request.finish()} to send the data to the + browser. + + @raise twisted.web.error.UnsupportedMethod: If the HTTP verb + requested is not supported by this resource. + """ + + + +def getChildForRequest(resource, request): + """ + Traverse resource tree to find who will handle the request. + """ + while request.postpath and not resource.isLeaf: + pathElement = request.postpath.pop(0) + request.prepath.append(pathElement) + resource = resource.getChildWithDefault(pathElement, request) + return resource + + + +@implementer(IResource) +class Resource: + """ + Define a web-accessible resource. + + This serves 2 main purposes; one is to provide a standard representation + for what HTTP specification calls an 'entity', and the other is to provide + an abstract directory structure for URL retrieval. + """ + entityType = IResource + + server = None + + def __init__(self): + """ + Initialize. + """ + self.children = {} + + isLeaf = 0 + + ### Abstract Collection Interface + + def listStaticNames(self): + return list(self.children.keys()) + + def listStaticEntities(self): + return list(self.children.items()) + + def listNames(self): + return list(self.listStaticNames()) + self.listDynamicNames() + + def listEntities(self): + return list(self.listStaticEntities()) + self.listDynamicEntities() + + def listDynamicNames(self): + return [] + + def listDynamicEntities(self, request=None): + return [] + + def getStaticEntity(self, name): + return self.children.get(name) + + def getDynamicEntity(self, name, request): + if name not in self.children: + return self.getChild(name, request) + else: + return None + + def delEntity(self, name): + del self.children[name] + + def reallyPutEntity(self, name, entity): + self.children[name] = entity + + # Concrete HTTP interface + + def getChild(self, path, request): + """ + Retrieve a 'child' resource from me. + + Implement this to create dynamic resource generation -- resources which + are always available may be registered with self.putChild(). + + This will not be called if the class-level variable 'isLeaf' is set in + your subclass; instead, the 'postpath' attribute of the request will be + left as a list of the remaining path elements. + + For example, the URL /foo/bar/baz will normally be:: + + | site.resource.getChild('foo').getChild('bar').getChild('baz'). + + However, if the resource returned by 'bar' has isLeaf set to true, then + the getChild call will never be made on it. + + Parameters and return value have the same meaning and requirements as + those defined by L{IResource.getChildWithDefault}. + """ + return NoResource("No such child resource.") + + + def getChildWithDefault(self, path, request): + """ + Retrieve a static or dynamically generated child resource from me. + + First checks if a resource was added manually by putChild, and then + call getChild to check for dynamic resources. Only override if you want + to affect behaviour of all child lookups, rather than just dynamic + ones. + + This will check to see if I have a pre-registered child resource of the + given name, and call getChild if I do not. + + @see: L{IResource.getChildWithDefault} + """ + if path in self.children: + return self.children[path] + return self.getChild(path, request) + + + def getChildForRequest(self, request): + warnings.warn("Please use module level getChildForRequest.", DeprecationWarning, 2) + return getChildForRequest(self, request) + + + def putChild(self, path, child): + """ + Register a static child. + + You almost certainly don't want '/' in your path. If you + intended to have the root of a folder, e.g. /foo/, you want + path to be ''. + + @param path: A single path component. + @type path: L{bytes} + + @param child: The child resource to register. + @type child: L{IResource} + + @see: L{IResource.putChild} + """ + if not isinstance(path, bytes): + warnings.warn( + 'Path segment must be bytes; ' + 'passing {0} has never worked, and ' + 'will raise an exception in the future.' + .format(type(path)), + category=DeprecationWarning, + stacklevel=2) + + self.children[path] = child + child.server = self.server + + + def render(self, request): + """ + Render a given resource. See L{IResource}'s render method. + + I delegate to methods of self with the form 'render_METHOD' + where METHOD is the HTTP that was used to make the + request. Examples: render_GET, render_HEAD, render_POST, and + so on. Generally you should implement those methods instead of + overriding this one. + + render_METHOD methods are expected to return a byte string which will be + the rendered page, unless the return value is C{server.NOT_DONE_YET}, in + which case it is this class's responsibility to write the results using + C{request.write(data)} and then call C{request.finish()}. + + Old code that overrides render() directly is likewise expected + to return a byte string or NOT_DONE_YET. + + @see: L{IResource.render} + """ + m = getattr(self, 'render_' + nativeString(request.method), None) + if not m: + try: + allowedMethods = self.allowedMethods + except AttributeError: + allowedMethods = _computeAllowedMethods(self) + raise UnsupportedMethod(allowedMethods) + return m(request) + + + def render_HEAD(self, request): + """ + Default handling of HEAD method. + + I just return self.render_GET(request). When method is HEAD, + the framework will handle this correctly. + """ + return self.render_GET(request) + + + +def _computeAllowedMethods(resource): + """ + Compute the allowed methods on a C{Resource} based on defined render_FOO + methods. Used when raising C{UnsupportedMethod} but C{Resource} does + not define C{allowedMethods} attribute. + """ + allowedMethods = [] + for name in prefixedMethodNames(resource.__class__, "render_"): + # Potentially there should be an API for encode('ascii') in this + # situation - an API for taking a Python native string (bytes on Python + # 2, text on Python 3) and returning a socket-compatible string type. + allowedMethods.append(name.encode('ascii')) + return allowedMethods + + + +class ErrorPage(Resource): + """ + L{ErrorPage} is a resource which responds with a particular + (parameterized) status and a body consisting of HTML containing some + descriptive text. This is useful for rendering simple error pages. + + @ivar template: A native string which will have a dictionary interpolated + into it to generate the response body. The dictionary has the following + keys: + + - C{"code"}: The status code passed to L{ErrorPage.__init__}. + - C{"brief"}: The brief description passed to L{ErrorPage.__init__}. + - C{"detail"}: The detailed description passed to + L{ErrorPage.__init__}. + + @ivar code: An integer status code which will be used for the response. + @type code: C{int} + + @ivar brief: A short string which will be included in the response body as + the page title. + @type brief: C{str} + + @ivar detail: A longer string which will be included in the response body. + @type detail: C{str} + """ + + template = """ +<html> + <head><title>%(code)s - %(brief)s</title></head> + <body> + <h1>%(brief)s</h1> + <p>%(detail)s</p> + </body> +</html> +""" + + def __init__(self, status, brief, detail): + Resource.__init__(self) + self.code = status + self.brief = brief + self.detail = detail + + + def render(self, request): + request.setResponseCode(self.code) + request.setHeader(b"content-type", b"text/html; charset=utf-8") + interpolated = self.template % dict( + code=self.code, brief=self.brief, detail=self.detail) + if isinstance(interpolated, unicode): + return interpolated.encode('utf-8') + return interpolated + + + def getChild(self, chnam, request): + return self + + + +class NoResource(ErrorPage): + """ + L{NoResource} is a specialization of L{ErrorPage} which returns the HTTP + response code I{NOT FOUND}. + """ + def __init__(self, message="Sorry. No luck finding that resource."): + ErrorPage.__init__(self, NOT_FOUND, "No Such Resource", message) + + + +class ForbiddenResource(ErrorPage): + """ + L{ForbiddenResource} is a specialization of L{ErrorPage} which returns the + I{FORBIDDEN} HTTP response code. + """ + def __init__(self, message="Sorry, resource is forbidden."): + ErrorPage.__init__(self, FORBIDDEN, "Forbidden Resource", message) + + + +class _IEncodingResource(Interface): + """ + A resource which knows about L{_IRequestEncoderFactory}. + + @since: 12.3 + """ + + def getEncoder(request): + """ + Parse the request and return an encoder if applicable, using + L{_IRequestEncoderFactory.encoderForRequest}. + + @return: A L{_IRequestEncoder}, or L{None}. + """ + + + +@implementer(_IEncodingResource) +class EncodingResourceWrapper(proxyForInterface(IResource)): + """ + Wrap a L{IResource}, potentially applying an encoding to the response body + generated. + + Note that the returned children resources won't be wrapped, so you have to + explicitly wrap them if you want the encoding to be applied. + + @ivar encoders: A list of + L{_IRequestEncoderFactory<twisted.web.iweb._IRequestEncoderFactory>} + returning L{_IRequestEncoder<twisted.web.iweb._IRequestEncoder>} that + may transform the data passed to C{Request.write}. The list must be + sorted in order of priority: the first encoder factory handling the + request will prevent the others from doing the same. + @type encoders: C{list}. + + @since: 12.3 + """ + + def __init__(self, original, encoders): + super(EncodingResourceWrapper, self).__init__(original) + self._encoders = encoders + + + def getEncoder(self, request): + """ + Browser the list of encoders looking for one applicable encoder. + """ + for encoderFactory in self._encoders: + encoder = encoderFactory.encoderForRequest(request) + if encoder is not None: + return encoder diff --git a/contrib/python/Twisted/py2/twisted/web/rewrite.py b/contrib/python/Twisted/py2/twisted/web/rewrite.py new file mode 100644 index 0000000000..b5366b4eb7 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/rewrite.py @@ -0,0 +1,52 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +# +from twisted.web import resource + +class RewriterResource(resource.Resource): + + def __init__(self, orig, *rewriteRules): + resource.Resource.__init__(self) + self.resource = orig + self.rewriteRules = list(rewriteRules) + + def _rewrite(self, request): + for rewriteRule in self.rewriteRules: + rewriteRule(request) + + def getChild(self, path, request): + request.postpath.insert(0, path) + request.prepath.pop() + self._rewrite(request) + path = request.postpath.pop(0) + request.prepath.append(path) + return self.resource.getChildWithDefault(path, request) + + def render(self, request): + self._rewrite(request) + return self.resource.render(request) + + +def tildeToUsers(request): + if request.postpath and request.postpath[0][:1]=='~': + request.postpath[:1] = ['users', request.postpath[0][1:]] + request.path = '/'+'/'.join(request.prepath+request.postpath) + +def alias(aliasPath, sourcePath): + """ + I am not a very good aliaser. But I'm the best I can be. If I'm + aliasing to a Resource that generates links, and it uses any parts + of request.prepath to do so, the links will not be relative to the + aliased path, but rather to the aliased-to path. That I can't + alias static.File directory listings that nicely. However, I can + still be useful, as many resources will play nice. + """ + sourcePath = sourcePath.split('/') + aliasPath = aliasPath.split('/') + def rewriter(request): + if request.postpath[:len(aliasPath)] == aliasPath: + after = request.postpath[len(aliasPath):] + request.postpath = sourcePath + after + request.path = '/'+'/'.join(request.prepath+request.postpath) + return rewriter diff --git a/contrib/python/Twisted/py2/twisted/web/script.py b/contrib/python/Twisted/py2/twisted/web/script.py new file mode 100644 index 0000000000..aa030368a4 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/script.py @@ -0,0 +1,182 @@ +# -*- test-case-name: twisted.web.test.test_script -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I contain PythonScript, which is a very simple python script resource. +""" + +from __future__ import division, absolute_import + +import os, traceback + +from twisted import copyright +from twisted.python.filepath import _coerceToFilesystemEncoding +from twisted.python.compat import execfile, networkString, NativeStringIO, _PY3 +from twisted.web import http, server, static, resource, util + + +rpyNoResource = """<p>You forgot to assign to the variable "resource" in your script. For example:</p> +<pre> +# MyCoolWebApp.rpy + +import mygreatresource + +resource = mygreatresource.MyGreatResource() +</pre> +""" + +class AlreadyCached(Exception): + """ + This exception is raised when a path has already been cached. + """ + +class CacheScanner: + def __init__(self, path, registry): + self.path = path + self.registry = registry + self.doCache = 0 + + def cache(self): + c = self.registry.getCachedPath(self.path) + if c is not None: + raise AlreadyCached(c) + self.recache() + + def recache(self): + self.doCache = 1 + +noRsrc = resource.ErrorPage(500, "Whoops! Internal Error", rpyNoResource) + +def ResourceScript(path, registry): + """ + I am a normal py file which must define a 'resource' global, which should + be an instance of (a subclass of) web.resource.Resource; it will be + renderred. + """ + cs = CacheScanner(path, registry) + glob = {'__file__': _coerceToFilesystemEncoding("", path), + 'resource': noRsrc, + 'registry': registry, + 'cache': cs.cache, + 'recache': cs.recache} + try: + execfile(path, glob, glob) + except AlreadyCached as ac: + return ac.args[0] + rsrc = glob['resource'] + if cs.doCache and rsrc is not noRsrc: + registry.cachePath(path, rsrc) + return rsrc + + + +def ResourceTemplate(path, registry): + from quixote import ptl_compile + + glob = {'__file__': _coerceToFilesystemEncoding("", path), + 'resource': resource.ErrorPage(500, "Whoops! Internal Error", + rpyNoResource), + 'registry': registry} + + with open(path) as f: # Not closed by quixote as of 2.9.1 + e = ptl_compile.compile_template(f, path) + code = compile(e, "<source>", "exec") + eval(code, glob, glob) + return glob['resource'] + + + +class ResourceScriptWrapper(resource.Resource): + + def __init__(self, path, registry=None): + resource.Resource.__init__(self) + self.path = path + self.registry = registry or static.Registry() + + def render(self, request): + res = ResourceScript(self.path, self.registry) + return res.render(request) + + def getChildWithDefault(self, path, request): + res = ResourceScript(self.path, self.registry) + return res.getChildWithDefault(path, request) + + + +class ResourceScriptDirectory(resource.Resource): + """ + L{ResourceScriptDirectory} is a resource which serves scripts from a + filesystem directory. File children of a L{ResourceScriptDirectory} will + be served using L{ResourceScript}. Directory children will be served using + another L{ResourceScriptDirectory}. + + @ivar path: A C{str} giving the filesystem path in which children will be + looked up. + + @ivar registry: A L{static.Registry} instance which will be used to decide + how to interpret scripts found as children of this resource. + """ + def __init__(self, pathname, registry=None): + resource.Resource.__init__(self) + self.path = pathname + self.registry = registry or static.Registry() + + def getChild(self, path, request): + fn = os.path.join(self.path, path) + + if os.path.isdir(fn): + return ResourceScriptDirectory(fn, self.registry) + if os.path.exists(fn): + return ResourceScript(fn, self.registry) + return resource.NoResource() + + def render(self, request): + return resource.NoResource().render(request) + + + +class PythonScript(resource.Resource): + """ + I am an extremely simple dynamic resource; an embedded python script. + + This will execute a file (usually of the extension '.epy') as Python code, + internal to the webserver. + """ + isLeaf = True + + def __init__(self, filename, registry): + """ + Initialize me with a script name. + """ + self.filename = filename + self.registry = registry + + def render(self, request): + """ + Render me to a web client. + + Load my file, execute it in a special namespace (with 'request' and + '__file__' global vars) and finish the request. Output to the web-page + will NOT be handled with print - standard output goes to the log - but + with request.write. + """ + request.setHeader(b"x-powered-by", networkString("Twisted/%s" % copyright.version)) + namespace = {'request': request, + '__file__': _coerceToFilesystemEncoding("", self.filename), + 'registry': self.registry} + try: + execfile(self.filename, namespace, namespace) + except IOError as e: + if e.errno == 2: #file not found + request.setResponseCode(http.NOT_FOUND) + request.write(resource.NoResource("File not found.").render(request)) + except: + io = NativeStringIO() + traceback.print_exc(file=io) + output = util._PRE(io.getvalue()) + if _PY3: + output = output.encode("utf8") + request.write(output) + request.finish() + return server.NOT_DONE_YET diff --git a/contrib/python/Twisted/py2/twisted/web/server.py b/contrib/python/Twisted/py2/twisted/web/server.py new file mode 100644 index 0000000000..6fa488afb1 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/server.py @@ -0,0 +1,911 @@ +# -*- test-case-name: twisted.web.test.test_web -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +This is a web server which integrates with the twisted.internet infrastructure. + +@var NOT_DONE_YET: A token value which L{twisted.web.resource.IResource.render} + implementations can return to indicate that the application will later call + C{.write} and C{.finish} to complete the request, and that the HTTP + connection should be left open. +@type NOT_DONE_YET: Opaque; do not depend on any particular type for this + value. +""" + +from __future__ import division, absolute_import + +import copy +import os +import re +try: + from urllib import quote +except ImportError: + from urllib.parse import quote as _quote + + def quote(string, *args, **kwargs): + return _quote( + string.decode('charmap'), *args, **kwargs).encode('charmap') + +import zlib +from binascii import hexlify + +from zope.interface import implementer + +from twisted.python.compat import networkString, nativeString, intToBytes +from twisted.spread.pb import Copyable, ViewPoint +from twisted.internet import address, interfaces +from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from twisted.web import iweb, http, util +from twisted.web.http import unquote +from twisted.python import reflect, failure, components +from twisted import copyright +from twisted.web import resource +from twisted.web.error import UnsupportedMethod + +from incremental import Version +from twisted.python.deprecate import deprecatedModuleAttribute +from twisted.python.compat import escape +from twisted.logger import Logger + +NOT_DONE_YET = 1 + +__all__ = [ + 'supportedMethods', + 'Request', + 'Session', + 'Site', + 'version', + 'NOT_DONE_YET', + 'GzipEncoderFactory' +] + + +# backwards compatibility +deprecatedModuleAttribute( + Version("Twisted", 12, 1, 0), + "Please use twisted.web.http.datetimeToString instead", + "twisted.web.server", + "date_time_string") +deprecatedModuleAttribute( + Version("Twisted", 12, 1, 0), + "Please use twisted.web.http.stringToDatetime instead", + "twisted.web.server", + "string_date_time") +date_time_string = http.datetimeToString +string_date_time = http.stringToDatetime + +# Support for other methods may be implemented on a per-resource basis. +supportedMethods = (b'GET', b'HEAD', b'POST') + + +def _addressToTuple(addr): + if isinstance(addr, address.IPv4Address): + return ('INET', addr.host, addr.port) + elif isinstance(addr, address.UNIXAddress): + return ('UNIX', addr.name) + else: + return tuple(addr) + + + +@implementer(iweb.IRequest) +class Request(Copyable, http.Request, components.Componentized): + """ + An HTTP request. + + @ivar defaultContentType: A L{bytes} giving the default I{Content-Type} + value to send in responses if no other value is set. L{None} disables + the default. + + @ivar _insecureSession: The L{Session} object representing state that will + be transmitted over plain-text HTTP. + + @ivar _secureSession: The L{Session} object representing the state that + will be transmitted only over HTTPS. + """ + + defaultContentType = b"text/html" + + site = None + appRootURL = None + prepath = postpath = None + __pychecker__ = 'unusednames=issuer' + _inFakeHead = False + _encoder = None + _log = Logger() + + def __init__(self, *args, **kw): + http.Request.__init__(self, *args, **kw) + components.Componentized.__init__(self) + + + def getStateToCopyFor(self, issuer): + x = self.__dict__.copy() + del x['transport'] + # XXX refactor this attribute out; it's from protocol + # del x['server'] + del x['channel'] + del x['content'] + del x['site'] + self.content.seek(0, 0) + x['content_data'] = self.content.read() + x['remote'] = ViewPoint(issuer, self) + + # Address objects aren't jellyable + x['host'] = _addressToTuple(x['host']) + x['client'] = _addressToTuple(x['client']) + + # Header objects also aren't jellyable. + x['requestHeaders'] = list(x['requestHeaders'].getAllRawHeaders()) + + return x + + # HTML generation helpers + + + def sibLink(self, name): + """ + Return the text that links to a sibling of the requested resource. + + @param name: The sibling resource + @type name: C{bytes} + + @return: A relative URL. + @rtype: C{bytes} + """ + if self.postpath: + return (len(self.postpath)*b"../") + name + else: + return name + + + def childLink(self, name): + """ + Return the text that links to a child of the requested resource. + + @param name: The child resource + @type name: C{bytes} + + @return: A relative URL. + @rtype: C{bytes} + """ + lpp = len(self.postpath) + if lpp > 1: + return ((lpp-1)*b"../") + name + elif lpp == 1: + return name + else: # lpp == 0 + if len(self.prepath) and self.prepath[-1]: + return self.prepath[-1] + b'/' + name + else: + return name + + + def gotLength(self, length): + """ + Called when HTTP channel got length of content in this request. + + This method is not intended for users. + + @param length: The length of the request body, as indicated by the + request headers. L{None} if the request headers do not indicate a + length. + """ + try: + getContentFile = self.channel.site.getContentFile + except AttributeError: + http.Request.gotLength(self, length) + else: + self.content = getContentFile(length) + + + def process(self): + """ + Process a request. + + Find the addressed resource in this request's L{Site}, + and call L{self.render()<Request.render()>} with it. + + @see: L{Site.getResourceFor()} + """ + + # get site from channel + self.site = self.channel.site + + # set various default headers + self.setHeader(b'server', version) + self.setHeader(b'date', http.datetimeToString()) + + # Resource Identification + self.prepath = [] + self.postpath = list(map(unquote, self.path[1:].split(b'/'))) + + # Short-circuit for requests whose path is '*'. + if self.path == b'*': + self._handleStar() + return + + try: + resrc = self.site.getResourceFor(self) + if resource._IEncodingResource.providedBy(resrc): + encoder = resrc.getEncoder(self) + if encoder is not None: + self._encoder = encoder + self.render(resrc) + except: + self.processingFailed(failure.Failure()) + + + def write(self, data): + """ + Write data to the transport (if not responding to a HEAD request). + + @param data: A string to write to the response. + @type data: L{bytes} + """ + if not self.startedWriting: + # Before doing the first write, check to see if a default + # Content-Type header should be supplied. We omit it on + # NOT_MODIFIED and NO_CONTENT responses. We also omit it if there + # is a Content-Length header set to 0, as empty bodies don't need + # a content-type. + needsCT = self.code not in (http.NOT_MODIFIED, http.NO_CONTENT) + contentType = self.responseHeaders.getRawHeaders(b'content-type') + contentLength = self.responseHeaders.getRawHeaders( + b'content-length' + ) + contentLengthZero = contentLength and (contentLength[0] == b'0') + + if (needsCT and contentType is None and + self.defaultContentType is not None and + not contentLengthZero + ): + self.responseHeaders.setRawHeaders( + b'content-type', [self.defaultContentType]) + + # Only let the write happen if we're not generating a HEAD response by + # faking out the request method. Note, if we are doing that, + # startedWriting will never be true, and the above logic may run + # multiple times. It will only actually change the responseHeaders + # once though, so it's still okay. + if not self._inFakeHead: + if self._encoder: + data = self._encoder.encode(data) + http.Request.write(self, data) + + + def finish(self): + """ + Override C{http.Request.finish} for possible encoding. + """ + if self._encoder: + data = self._encoder.finish() + if data: + http.Request.write(self, data) + return http.Request.finish(self) + + + def render(self, resrc): + """ + Ask a resource to render itself. + + If the resource does not support the requested method, + generate a C{NOT IMPLEMENTED} or C{NOT ALLOWED} response. + + @param resrc: The resource to render. + @type resrc: L{twisted.web.resource.IResource} + + @see: L{IResource.render()<twisted.web.resource.IResource.render()>} + """ + try: + body = resrc.render(self) + except UnsupportedMethod as e: + allowedMethods = e.allowedMethods + if (self.method == b"HEAD") and (b"GET" in allowedMethods): + # We must support HEAD (RFC 2616, 5.1.1). If the + # resource doesn't, fake it by giving the resource + # a 'GET' request and then return only the headers, + # not the body. + self._log.info( + "Using GET to fake a HEAD request for {resrc}", + resrc=resrc + ) + self.method = b"GET" + self._inFakeHead = True + body = resrc.render(self) + + if body is NOT_DONE_YET: + self._log.info( + "Tried to fake a HEAD request for {resrc}, but " + "it got away from me.", resrc=resrc + ) + # Oh well, I guess we won't include the content length. + else: + self.setHeader(b'content-length', intToBytes(len(body))) + + self._inFakeHead = False + self.method = b"HEAD" + self.write(b'') + self.finish() + return + + if self.method in (supportedMethods): + # We MUST include an Allow header + # (RFC 2616, 10.4.6 and 14.7) + self.setHeader(b'Allow', b', '.join(allowedMethods)) + s = ('''Your browser approached me (at %(URI)s) with''' + ''' the method "%(method)s". I only allow''' + ''' the method%(plural)s %(allowed)s here.''' % { + 'URI': escape(nativeString(self.uri)), + 'method': nativeString(self.method), + 'plural': ((len(allowedMethods) > 1) and 's') or '', + 'allowed': ', '.join( + [nativeString(x) for x in allowedMethods]) + }) + epage = resource.ErrorPage(http.NOT_ALLOWED, + "Method Not Allowed", s) + body = epage.render(self) + else: + epage = resource.ErrorPage( + http.NOT_IMPLEMENTED, "Huh?", + "I don't know how to treat a %s request." % + (escape(self.method.decode("charmap")),)) + body = epage.render(self) + # end except UnsupportedMethod + + if body is NOT_DONE_YET: + return + if not isinstance(body, bytes): + body = resource.ErrorPage( + http.INTERNAL_SERVER_ERROR, + "Request did not return bytes", + "Request: " + util._PRE(reflect.safe_repr(self)) + "<br />" + + "Resource: " + util._PRE(reflect.safe_repr(resrc)) + "<br />" + + "Value: " + util._PRE(reflect.safe_repr(body))).render(self) + + if self.method == b"HEAD": + if len(body) > 0: + # This is a Bad Thing (RFC 2616, 9.4) + self._log.info( + "Warning: HEAD request {slf} for resource {resrc} is" + " returning a message body. I think I'll eat it.", + slf=self, + resrc=resrc + ) + self.setHeader(b'content-length', + intToBytes(len(body))) + self.write(b'') + else: + self.setHeader(b'content-length', + intToBytes(len(body))) + self.write(body) + self.finish() + + + def processingFailed(self, reason): + """ + Finish this request with an indication that processing failed and + possibly display a traceback. + + @param reason: Reason this request has failed. + @type reason: L{twisted.python.failure.Failure} + + @return: The reason passed to this method. + @rtype: L{twisted.python.failure.Failure} + """ + self._log.failure('', failure=reason) + if self.site.displayTracebacks: + body = (b"<html><head><title>web.Server Traceback" + b" (most recent call last)</title></head>" + b"<body><b>web.Server Traceback" + b" (most recent call last):</b>\n\n" + + util.formatFailure(reason) + + b"\n\n</body></html>\n") + else: + body = (b"<html><head><title>Processing Failed" + b"</title></head><body>" + b"<b>Processing Failed</b></body></html>") + + self.setResponseCode(http.INTERNAL_SERVER_ERROR) + self.setHeader(b'content-type', b"text/html") + self.setHeader(b'content-length', intToBytes(len(body))) + self.write(body) + self.finish() + return reason + + + def view_write(self, issuer, data): + """Remote version of write; same interface. + """ + self.write(data) + + + def view_finish(self, issuer): + """Remote version of finish; same interface. + """ + self.finish() + + + def view_addCookie(self, issuer, k, v, **kwargs): + """Remote version of addCookie; same interface. + """ + self.addCookie(k, v, **kwargs) + + + def view_setHeader(self, issuer, k, v): + """Remote version of setHeader; same interface. + """ + self.setHeader(k, v) + + + def view_setLastModified(self, issuer, when): + """Remote version of setLastModified; same interface. + """ + self.setLastModified(when) + + + def view_setETag(self, issuer, tag): + """Remote version of setETag; same interface. + """ + self.setETag(tag) + + + def view_setResponseCode(self, issuer, code, message=None): + """ + Remote version of setResponseCode; same interface. + """ + self.setResponseCode(code, message) + + + def view_registerProducer(self, issuer, producer, streaming): + """Remote version of registerProducer; same interface. + (requires a remote producer.) + """ + self.registerProducer(_RemoteProducerWrapper(producer), streaming) + + + def view_unregisterProducer(self, issuer): + self.unregisterProducer() + + ### these calls remain local + + _secureSession = None + _insecureSession = None + + @property + def session(self): + """ + If a session has already been created or looked up with + L{Request.getSession}, this will return that object. (This will always + be the session that matches the security of the request; so if + C{forceNotSecure} is used on a secure request, this will not return + that session.) + + @return: the session attribute + @rtype: L{Session} or L{None} + """ + if self.isSecure(): + return self._secureSession + else: + return self._insecureSession + + + def getSession(self, sessionInterface=None, forceNotSecure=False): + """ + Check if there is a session cookie, and if not, create it. + + By default, the cookie with be secure for HTTPS requests and not secure + for HTTP requests. If for some reason you need access to the insecure + cookie from a secure request you can set C{forceNotSecure = True}. + + @param forceNotSecure: Should we retrieve a session that will be + transmitted over HTTP, even if this L{Request} was delivered over + HTTPS? + @type forceNotSecure: L{bool} + """ + # Make sure we aren't creating a secure session on a non-secure page + secure = self.isSecure() and not forceNotSecure + + if not secure: + cookieString = b"TWISTED_SESSION" + sessionAttribute = "_insecureSession" + else: + cookieString = b"TWISTED_SECURE_SESSION" + sessionAttribute = "_secureSession" + + session = getattr(self, sessionAttribute) + + if session is not None: + # We have a previously created session. + try: + # Refresh the session, to keep it alive. + session.touch() + except (AlreadyCalled, AlreadyCancelled): + # Session has already expired. + session = None + + if session is None: + # No session was created yet for this request. + cookiename = b"_".join([cookieString] + self.sitepath) + sessionCookie = self.getCookie(cookiename) + if sessionCookie: + try: + session = self.site.getSession(sessionCookie) + except KeyError: + pass + # if it still hasn't been set, fix it up. + if not session: + session = self.site.makeSession() + self.addCookie(cookiename, session.uid, path=b"/", + secure=secure) + + setattr(self, sessionAttribute, session) + + if sessionInterface: + return session.getComponent(sessionInterface) + + return session + + + def _prePathURL(self, prepath): + port = self.getHost().port + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostport = '' + else: + hostport = ':%d' % port + prefix = networkString('http%s://%s%s/' % ( + self.isSecure() and 's' or '', + nativeString(self.getRequestHostname()), + hostport)) + path = b'/'.join([quote(segment, safe=b'') for segment in prepath]) + return prefix + path + + + def prePathURL(self): + return self._prePathURL(self.prepath) + + + def URLPath(self): + from twisted.python import urlpath + return urlpath.URLPath.fromRequest(self) + + + def rememberRootURL(self): + """ + Remember the currently-processed part of the URL for later + recalling. + """ + url = self._prePathURL(self.prepath[:-1]) + self.appRootURL = url + + + def getRootURL(self): + """ + Get a previously-remembered URL. + + @return: An absolute URL. + @rtype: L{bytes} + """ + return self.appRootURL + + + def _handleStar(self): + """ + Handle receiving a request whose path is '*'. + + RFC 7231 defines an OPTIONS * request as being something that a client + can send as a low-effort way to probe server capabilities or readiness. + Rather than bother the user with this, we simply fast-path it back to + an empty 200 OK. Any non-OPTIONS verb gets a 405 Method Not Allowed + telling the client they can only use OPTIONS. + """ + if self.method == b'OPTIONS': + self.setResponseCode(http.OK) + else: + self.setResponseCode(http.NOT_ALLOWED) + self.setHeader(b'Allow', b'OPTIONS') + + # RFC 7231 says we MUST set content-length 0 when responding to this + # with no body. + self.setHeader(b'Content-Length', b'0') + self.finish() + + +@implementer(iweb._IRequestEncoderFactory) +class GzipEncoderFactory(object): + """ + @cvar compressLevel: The compression level used by the compressor, default + to 9 (highest). + + @since: 12.3 + """ + _gzipCheckRegex = re.compile(br'(:?^|[\s,])gzip(:?$|[\s,])') + compressLevel = 9 + + def encoderForRequest(self, request): + """ + Check the headers if the client accepts gzip encoding, and encodes the + request if so. + """ + acceptHeaders = b','.join( + request.requestHeaders.getRawHeaders(b'accept-encoding', [])) + if self._gzipCheckRegex.search(acceptHeaders): + encoding = request.responseHeaders.getRawHeaders( + b'content-encoding') + if encoding: + encoding = b','.join(encoding + [b'gzip']) + else: + encoding = b'gzip' + + request.responseHeaders.setRawHeaders(b'content-encoding', + [encoding]) + return _GzipEncoder(self.compressLevel, request) + + + +@implementer(iweb._IRequestEncoder) +class _GzipEncoder(object): + """ + An encoder which supports gzip. + + @ivar _zlibCompressor: The zlib compressor instance used to compress the + stream. + + @ivar _request: A reference to the originating request. + + @since: 12.3 + """ + + _zlibCompressor = None + + def __init__(self, compressLevel, request): + self._zlibCompressor = zlib.compressobj( + compressLevel, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + self._request = request + + + def encode(self, data): + """ + Write to the request, automatically compressing data on the fly. + """ + if not self._request.startedWriting: + # Remove the content-length header, we can't honor it + # because we compress on the fly. + self._request.responseHeaders.removeHeader(b'content-length') + return self._zlibCompressor.compress(data) + + + def finish(self): + """ + Finish handling the request request, flushing any data from the zlib + buffer. + """ + remain = self._zlibCompressor.flush() + self._zlibCompressor = None + return remain + + + +class _RemoteProducerWrapper: + def __init__(self, remote): + self.resumeProducing = remote.remoteMethod("resumeProducing") + self.pauseProducing = remote.remoteMethod("pauseProducing") + self.stopProducing = remote.remoteMethod("stopProducing") + + + +class Session(components.Componentized): + """ + A user's session with a system. + + This utility class contains no functionality, but is used to + represent a session. + + @ivar uid: A unique identifier for the session. + @type uid: L{bytes} + + @ivar _reactor: An object providing L{IReactorTime} to use for scheduling + expiration. + @ivar sessionTimeout: timeout of a session, in seconds. + """ + sessionTimeout = 900 + + _expireCall = None + + def __init__(self, site, uid, reactor=None): + """ + Initialize a session with a unique ID for that session. + """ + components.Componentized.__init__(self) + + if reactor is None: + from twisted.internet import reactor + self._reactor = reactor + + self.site = site + self.uid = uid + self.expireCallbacks = [] + self.touch() + self.sessionNamespaces = {} + + + def startCheckingExpiration(self): + """ + Start expiration tracking. + + @return: L{None} + """ + self._expireCall = self._reactor.callLater( + self.sessionTimeout, self.expire) + + + def notifyOnExpire(self, callback): + """ + Call this callback when the session expires or logs out. + """ + self.expireCallbacks.append(callback) + + + def expire(self): + """ + Expire/logout of the session. + """ + del self.site.sessions[self.uid] + for c in self.expireCallbacks: + c() + self.expireCallbacks = [] + if self._expireCall and self._expireCall.active(): + self._expireCall.cancel() + # Break reference cycle. + self._expireCall = None + + + def touch(self): + """ + Notify session modification. + """ + self.lastModified = self._reactor.seconds() + if self._expireCall is not None: + self._expireCall.reset(self.sessionTimeout) + + +version = networkString("TwistedWeb/%s" % (copyright.version,)) + + + +@implementer(interfaces.IProtocolNegotiationFactory) +class Site(http.HTTPFactory): + """ + A web site: manage log, sessions, and resources. + + @ivar counter: increment value used for generating unique sessions ID. + @ivar requestFactory: A factory which is called with (channel) + and creates L{Request} instances. Default to L{Request}. + @ivar displayTracebacks: If set, unhandled exceptions raised during + rendering are returned to the client as HTML. Default to C{False}. + @ivar sessionFactory: factory for sessions objects. Default to L{Session}. + @ivar sessionCheckTime: Deprecated. See L{Session.sessionTimeout} instead. + """ + counter = 0 + requestFactory = Request + displayTracebacks = False + sessionFactory = Session + sessionCheckTime = 1800 + _entropy = os.urandom + + def __init__(self, resource, requestFactory=None, *args, **kwargs): + """ + @param resource: The root of the resource hierarchy. All request + traversal for requests received by this factory will begin at this + resource. + @type resource: L{IResource} provider + @param requestFactory: Overwrite for default requestFactory. + @type requestFactory: C{callable} or C{class}. + + @see: L{twisted.web.http.HTTPFactory.__init__} + """ + http.HTTPFactory.__init__(self, *args, **kwargs) + self.sessions = {} + self.resource = resource + if requestFactory is not None: + self.requestFactory = requestFactory + + + def _openLogFile(self, path): + from twisted.python import logfile + return logfile.LogFile(os.path.basename(path), os.path.dirname(path)) + + + def __getstate__(self): + d = self.__dict__.copy() + d['sessions'] = {} + return d + + + def _mkuid(self): + """ + (internal) Generate an opaque, unique ID for a user's session. + """ + self.counter = self.counter + 1 + return hexlify(self._entropy(32)) + + + def makeSession(self): + """ + Generate a new Session instance, and store it for future reference. + """ + uid = self._mkuid() + session = self.sessions[uid] = self.sessionFactory(self, uid) + session.startCheckingExpiration() + return session + + + def getSession(self, uid): + """ + Get a previously generated session. + + @param uid: Unique ID of the session. + @type uid: L{bytes}. + + @raise: L{KeyError} if the session is not found. + """ + return self.sessions[uid] + + + def buildProtocol(self, addr): + """ + Generate a channel attached to this site. + """ + channel = http.HTTPFactory.buildProtocol(self, addr) + channel.requestFactory = self.requestFactory + channel.site = self + return channel + + isLeaf = 0 + + def render(self, request): + """ + Redirect because a Site is always a directory. + """ + request.redirect(request.prePathURL() + b'/') + request.finish() + + + def getChildWithDefault(self, pathEl, request): + """ + Emulate a resource's getChild method. + """ + request.site = self + return self.resource.getChildWithDefault(pathEl, request) + + + def getResourceFor(self, request): + """ + Get a resource for a request. + + This iterates through the resource hierarchy, calling + getChildWithDefault on each resource it finds for a path element, + stopping when it hits an element where isLeaf is true. + """ + request.site = self + # Sitepath is used to determine cookie names between distributed + # servers and disconnected sites. + request.sitepath = copy.copy(request.prepath) + return resource.getChildForRequest(self.resource, request) + + # IProtocolNegotiationFactory + def acceptableProtocols(self): + """ + Protocols this server can speak. + """ + baseProtocols = [b'http/1.1'] + + if http.H2_ENABLED: + baseProtocols.insert(0, b'h2') + + return baseProtocols diff --git a/contrib/python/Twisted/py2/twisted/web/soap.py b/contrib/python/Twisted/py2/twisted/web/soap.py new file mode 100644 index 0000000000..fc15e038fa --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/soap.py @@ -0,0 +1,154 @@ +# -*- test-case-name: twisted.web.test.test_soap -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +SOAP support for twisted.web. + +Requires SOAPpy 0.10.1 or later. + +Maintainer: Itamar Shtull-Trauring + +Future plans: +SOAPContext support of some kind. +Pluggable method lookup policies. +""" + +# SOAPpy +import SOAPpy + +# twisted imports +from twisted.web import server, resource, client +from twisted.internet import defer + + +class SOAPPublisher(resource.Resource): + """Publish SOAP methods. + + By default, publish methods beginning with 'soap_'. If the method + has an attribute 'useKeywords', it well get the arguments passed + as keyword args. + """ + + isLeaf = 1 + + # override to change the encoding used for responses + encoding = "UTF-8" + + def lookupFunction(self, functionName): + """Lookup published SOAP function. + + Override in subclasses. Default behaviour - publish methods + starting with soap_. + + @return: callable or None if not found. + """ + return getattr(self, "soap_%s" % functionName, None) + + def render(self, request): + """Handle a SOAP command.""" + data = request.content.read() + + p, header, body, attrs = SOAPpy.parseSOAPRPC(data, 1, 1, 1) + + methodName, args, kwargs = p._name, p._aslist, p._asdict + + # deal with changes in SOAPpy 0.11 + if callable(args): + args = args() + if callable(kwargs): + kwargs = kwargs() + + function = self.lookupFunction(methodName) + + if not function: + self._methodNotFound(request, methodName) + return server.NOT_DONE_YET + else: + if hasattr(function, "useKeywords"): + keywords = {} + for k, v in kwargs.items(): + keywords[str(k)] = v + d = defer.maybeDeferred(function, **keywords) + else: + d = defer.maybeDeferred(function, *args) + + d.addCallback(self._gotResult, request, methodName) + d.addErrback(self._gotError, request, methodName) + return server.NOT_DONE_YET + + def _methodNotFound(self, request, methodName): + response = SOAPpy.buildSOAP(SOAPpy.faultType("%s:Client" % + SOAPpy.NS.ENV_T, "Method %s not found" % methodName), + encoding=self.encoding) + self._sendResponse(request, response, status=500) + + def _gotResult(self, result, request, methodName): + if not isinstance(result, SOAPpy.voidType): + result = {"Result": result} + response = SOAPpy.buildSOAP(kw={'%sResponse' % methodName: result}, + encoding=self.encoding) + self._sendResponse(request, response) + + def _gotError(self, failure, request, methodName): + e = failure.value + if isinstance(e, SOAPpy.faultType): + fault = e + else: + fault = SOAPpy.faultType("%s:Server" % SOAPpy.NS.ENV_T, + "Method %s failed." % methodName) + response = SOAPpy.buildSOAP(fault, encoding=self.encoding) + self._sendResponse(request, response, status=500) + + def _sendResponse(self, request, response, status=200): + request.setResponseCode(status) + + if self.encoding is not None: + mimeType = 'text/xml; charset="%s"' % self.encoding + else: + mimeType = "text/xml" + request.setHeader("Content-type", mimeType) + request.setHeader("Content-length", str(len(response))) + request.write(response) + request.finish() + + +class Proxy: + """A Proxy for making remote SOAP calls. + + Pass the URL of the remote SOAP server to the constructor. + + Use proxy.callRemote('foobar', 1, 2) to call remote method + 'foobar' with args 1 and 2, proxy.callRemote('foobar', x=1) + will call foobar with named argument 'x'. + """ + + # at some point this should have encoding etc. kwargs + def __init__(self, url, namespace=None, header=None): + self.url = url + self.namespace = namespace + self.header = header + + def _cbGotResult(self, result): + result = SOAPpy.parseSOAPRPC(result) + if hasattr(result, 'Result'): + return result.Result + elif len(result) == 1: + ## SOAPpy 0.11.6 wraps the return results in a containing structure. + ## This check added to make Proxy behaviour emulate SOAPProxy, which + ## flattens the structure by default. + ## This behaviour is OK because even singleton lists are wrapped in + ## another singleton structType, which is almost always useless. + return result[0] + else: + return result + + def callRemote(self, method, *args, **kwargs): + payload = SOAPpy.buildSOAP(args=args, kw=kwargs, method=method, + header=self.header, namespace=self.namespace) + return client.getPage(self.url, postdata=payload, method="POST", + headers={'content-type': 'text/xml', + 'SOAPAction': method} + ).addCallback(self._cbGotResult) + diff --git a/contrib/python/Twisted/py2/twisted/web/static.py b/contrib/python/Twisted/py2/twisted/web/static.py new file mode 100644 index 0000000000..c603b6bbdd --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/static.py @@ -0,0 +1,1103 @@ +# -*- test-case-name: twisted.web.test.test_static -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Static resources for L{twisted.web}. +""" + +from __future__ import division, absolute_import + +import errno +import itertools +import mimetypes +import os +import time +import warnings + +from zope.interface import implementer + +from twisted.web import server +from twisted.web import resource +from twisted.web import http +from twisted.web.util import redirectTo + +from twisted.python.compat import (_PY3, intToBytes, nativeString, + networkString) +from twisted.python.compat import escape + +from twisted.python import components, filepath, log +from twisted.internet import abstract, interfaces +from twisted.python.util import InsensitiveDict +from twisted.python.runtime import platformType +from twisted.python.url import URL +from incremental import Version +from twisted.python.deprecate import deprecated + +if _PY3: + from urllib.parse import quote, unquote +else: + from urllib import quote, unquote + +dangerousPathError = resource.NoResource("Invalid request URL.") + +def isDangerous(path): + return path == b'..' or b'/' in path or networkString(os.sep) in path + + +class Data(resource.Resource): + """ + This is a static, in-memory resource. + """ + + def __init__(self, data, type): + """ + @param data: The bytes that make up this data resource. + @type data: L{bytes} + + @param type: A native string giving the Internet media type for this + content. + @type type: L{str} + """ + resource.Resource.__init__(self) + self.data = data + self.type = type + + + def render_GET(self, request): + request.setHeader(b"content-type", networkString(self.type)) + request.setHeader(b"content-length", intToBytes(len(self.data))) + if request.method == b"HEAD": + return b'' + return self.data + render_HEAD = render_GET + + + +@deprecated(Version("Twisted", 16, 0, 0)) +def addSlash(request): + """ + Add a trailing slash to C{request}'s URI. Deprecated, do not use. + """ + return _addSlash(request) + + + +def _addSlash(request): + """ + Add a trailing slash to C{request}'s URI. + + @param request: The incoming request to add the ending slash to. + @type request: An object conforming to L{twisted.web.iweb.IRequest} + + @return: A URI with a trailing slash, with query and fragment preserved. + @rtype: L{bytes} + """ + url = URL.fromText(request.uri.decode('ascii')) + # Add an empty path segment at the end, so that it adds a trailing slash + url = url.replace(path=list(url.path) + [u""]) + return url.asText().encode('ascii') + + + +class Redirect(resource.Resource): + def __init__(self, request): + resource.Resource.__init__(self) + self.url = _addSlash(request) + + def render(self, request): + return redirectTo(self.url, request) + + +class Registry(components.Componentized): + """ + I am a Componentized object that will be made available to internal Twisted + file-based dynamic web content such as .rpy and .epy scripts. + """ + + def __init__(self): + components.Componentized.__init__(self) + self._pathCache = {} + + def cachePath(self, path, rsrc): + self._pathCache[path] = rsrc + + def getCachedPath(self, path): + return self._pathCache.get(path) + + +def loadMimeTypes(mimetype_locations=None, init=mimetypes.init): + """ + Produces a mapping of extensions (with leading dot) to MIME types. + + It does this by calling the C{init} function of the L{mimetypes} module. + This will have the side effect of modifying the global MIME types cache + in that module. + + Multiple file locations containing mime-types can be passed as a list. + The files will be sourced in that order, overriding mime-types from the + files sourced beforehand, but only if a new entry explicitly overrides + the current entry. + + @param mimetype_locations: Optional. List of paths to C{mime.types} style + files that should be used. + @type mimetype_locations: iterable of paths or L{None} + @param init: The init function to call. Defaults to the global C{init} + function of the C{mimetypes} module. For internal use (testing) only. + @type init: callable + """ + init(mimetype_locations) + mimetypes.types_map.update( + { + '.conf': 'text/plain', + '.diff': 'text/plain', + '.flac': 'audio/x-flac', + '.java': 'text/plain', + '.oz': 'text/x-oz', + '.swf': 'application/x-shockwave-flash', + '.wml': 'text/vnd.wap.wml', + '.xul': 'application/vnd.mozilla.xul+xml', + '.patch': 'text/plain' + } + ) + return mimetypes.types_map + + +def getTypeAndEncoding(filename, types, encodings, defaultType): + p, ext = filepath.FilePath(filename).splitext() + ext = filepath._coerceToFilesystemEncoding('', ext.lower()) + if ext in encodings: + enc = encodings[ext] + ext = os.path.splitext(p)[1].lower() + else: + enc = None + type = types.get(ext, defaultType) + return type, enc + + + +class File(resource.Resource, filepath.FilePath): + """ + File is a resource that represents a plain non-interpreted file + (although it can look for an extension like .rpy or .cgi and hand the + file to a processor for interpretation if you wish). Its constructor + takes a file path. + + Alternatively, you can give a directory path to the constructor. In this + case the resource will represent that directory, and its children will + be files underneath that directory. This provides access to an entire + filesystem tree with a single Resource. + + If you map the URL 'http://server/FILE' to a resource created as + File('/tmp'), then http://server/FILE/ will return an HTML-formatted + listing of the /tmp/ directory, and http://server/FILE/foo/bar.html will + return the contents of /tmp/foo/bar.html . + + @cvar childNotFound: L{Resource} used to render 404 Not Found error pages. + @cvar forbidden: L{Resource} used to render 403 Forbidden error pages. + + @ivar contentTypes: a mapping of extensions to MIME types used to set the + default value for the Content-Type header. + It is initialized with the values returned by L{loadMimeTypes}. + @type contentTypes: C{dict} + + @ivar contentEncodings: a mapping of extensions to encoding types used to + set default value for the Content-Encoding header. + @type contentEncodings: C{dict} + """ + + contentTypes = loadMimeTypes() + + contentEncodings = { + ".gz" : "gzip", + ".bz2": "bzip2" + } + + processors = {} + + indexNames = ["index", "index.html", "index.htm", "index.rpy"] + + type = None + + def __init__(self, path, defaultType="text/html", ignoredExts=(), registry=None, allowExt=0): + """ + Create a file with the given path. + + @param path: The filename of the file from which this L{File} will + serve data. + @type path: C{str} + + @param defaultType: A I{major/minor}-style MIME type specifier + indicating the I{Content-Type} with which this L{File}'s data + will be served if a MIME type cannot be determined based on + C{path}'s extension. + @type defaultType: C{str} + + @param ignoredExts: A sequence giving the extensions of paths in the + filesystem which will be ignored for the purposes of child + lookup. For example, if C{ignoredExts} is C{(".bar",)} and + C{path} is a directory containing a file named C{"foo.bar"}, a + request for the C{"foo"} child of this resource will succeed + with a L{File} pointing to C{"foo.bar"}. + + @param registry: The registry object being used to handle this + request. If L{None}, one will be created. + @type registry: L{Registry} + + @param allowExt: Ignored parameter, only present for backwards + compatibility. Do not pass a value for this parameter. + """ + resource.Resource.__init__(self) + filepath.FilePath.__init__(self, path) + self.defaultType = defaultType + if ignoredExts in (0, 1) or allowExt: + warnings.warn("ignoredExts should receive a list, not a boolean") + if ignoredExts or allowExt: + self.ignoredExts = ['*'] + else: + self.ignoredExts = [] + else: + self.ignoredExts = list(ignoredExts) + self.registry = registry or Registry() + + + def ignoreExt(self, ext): + """Ignore the given extension. + + Serve file.ext if file is requested + """ + self.ignoredExts.append(ext) + + childNotFound = resource.NoResource("File not found.") + forbidden = resource.ForbiddenResource() + + + def directoryListing(self): + """ + Return a resource that generates an HTML listing of the + directory this path represents. + + @return: A resource that renders the directory to HTML. + @rtype: L{DirectoryLister} + """ + if _PY3: + path = self.path + names = self.listNames() + else: + # DirectoryLister works in terms of native strings, so on + # Python 2, ensure we have a bytes paths for this + # directory and its contents. We use the asBytesMode + # method inherited from FilePath to ensure consistent + # encoding of the actual path. This returns a FilePath + # instance even when called on subclasses, however, so we + # have to create a new File instance. + nativeStringPath = self.createSimilarFile(self.asBytesMode().path) + path = nativeStringPath.path + names = nativeStringPath.listNames() + return DirectoryLister(path, + names, + self.contentTypes, + self.contentEncodings, + self.defaultType) + + + def getChild(self, path, request): + """ + If this L{File}"s path refers to a directory, return a L{File} + referring to the file named C{path} in that directory. + + If C{path} is the empty string, return a L{DirectoryLister} + instead. + + @param path: The current path segment. + @type path: L{bytes} + + @param request: The incoming request. + @type request: An that provides L{twisted.web.iweb.IRequest}. + + @return: A resource representing the requested file or + directory, or L{NoResource} if the path cannot be + accessed. + @rtype: An object that provides L{resource.IResource}. + """ + if isinstance(path, bytes): + try: + # Request calls urllib.unquote on each path segment, + # leaving us with raw bytes. + path = path.decode('utf-8') + except UnicodeDecodeError: + log.err(None, + "Could not decode path segment as utf-8: %r" % (path,)) + return self.childNotFound + + self.restat(reraise=False) + + if not self.isdir(): + return self.childNotFound + + if path: + try: + fpath = self.child(path) + except filepath.InsecurePath: + return self.childNotFound + else: + fpath = self.childSearchPreauth(*self.indexNames) + if fpath is None: + return self.directoryListing() + + if not fpath.exists(): + fpath = fpath.siblingExtensionSearch(*self.ignoredExts) + if fpath is None: + return self.childNotFound + + extension = fpath.splitext()[1] + if platformType == "win32": + # don't want .RPY to be different than .rpy, since that would allow + # source disclosure. + processor = InsensitiveDict(self.processors).get(extension) + else: + processor = self.processors.get(extension) + if processor: + return resource.IResource(processor(fpath.path, self.registry)) + return self.createSimilarFile(fpath.path) + + + # methods to allow subclasses to e.g. decrypt files on the fly: + def openForReading(self): + """Open a file and return it.""" + return self.open() + + + def getFileSize(self): + """Return file size.""" + return self.getsize() + + + def _parseRangeHeader(self, range): + """ + Parse the value of a Range header into (start, stop) pairs. + + In a given pair, either of start or stop can be None, signifying that + no value was provided, but not both. + + @return: A list C{[(start, stop)]} of pairs of length at least one. + + @raise ValueError: if the header is syntactically invalid or if the + Bytes-Unit is anything other than "bytes'. + """ + try: + kind, value = range.split(b'=', 1) + except ValueError: + raise ValueError("Missing '=' separator") + kind = kind.strip() + if kind != b'bytes': + raise ValueError("Unsupported Bytes-Unit: %r" % (kind,)) + unparsedRanges = list(filter(None, map(bytes.strip, value.split(b',')))) + parsedRanges = [] + for byteRange in unparsedRanges: + try: + start, end = byteRange.split(b'-', 1) + except ValueError: + raise ValueError("Invalid Byte-Range: %r" % (byteRange,)) + if start: + try: + start = int(start) + except ValueError: + raise ValueError("Invalid Byte-Range: %r" % (byteRange,)) + else: + start = None + if end: + try: + end = int(end) + except ValueError: + raise ValueError("Invalid Byte-Range: %r" % (byteRange,)) + else: + end = None + if start is not None: + if end is not None and start > end: + # Start must be less than or equal to end or it is invalid. + raise ValueError("Invalid Byte-Range: %r" % (byteRange,)) + elif end is None: + # One or both of start and end must be specified. Omitting + # both is invalid. + raise ValueError("Invalid Byte-Range: %r" % (byteRange,)) + parsedRanges.append((start, end)) + return parsedRanges + + + def _rangeToOffsetAndSize(self, start, end): + """ + Convert a start and end from a Range header to an offset and size. + + This method checks that the resulting range overlaps with the resource + being served (and so has the value of C{getFileSize()} as an indirect + input). + + Either but not both of start or end can be L{None}: + + - Omitted start means that the end value is actually a start value + relative to the end of the resource. + + - Omitted end means the end of the resource should be the end of + the range. + + End is interpreted as inclusive, as per RFC 2616. + + If this range doesn't overlap with any of this resource, C{(0, 0)} is + returned, which is not otherwise a value return value. + + @param start: The start value from the header, or L{None} if one was + not present. + @param end: The end value from the header, or L{None} if one was not + present. + @return: C{(offset, size)} where offset is how far into this resource + this resource the range begins and size is how long the range is, + or C{(0, 0)} if the range does not overlap this resource. + """ + size = self.getFileSize() + if start is None: + start = size - end + end = size + elif end is None: + end = size + elif end < size: + end += 1 + elif end > size: + end = size + if start >= size: + start = end = 0 + return start, (end - start) + + + def _contentRange(self, offset, size): + """ + Return a string suitable for the value of a Content-Range header for a + range with the given offset and size. + + The offset and size are not sanity checked in any way. + + @param offset: How far into this resource the range begins. + @param size: How long the range is. + @return: The value as appropriate for the value of a Content-Range + header. + """ + return networkString('bytes %d-%d/%d' % ( + offset, offset + size - 1, self.getFileSize())) + + + def _doSingleRangeRequest(self, request, startAndEnd): + """ + Set up the response for Range headers that specify a single range. + + This method checks if the request is satisfiable and sets the response + code and Content-Range header appropriately. The return value + indicates which part of the resource to return. + + @param request: The Request object. + @param startAndEnd: A 2-tuple of start of the byte range as specified by + the header and the end of the byte range as specified by the header. + At most one of the start and end may be L{None}. + @return: A 2-tuple of the offset and size of the range to return. + offset == size == 0 indicates that the request is not satisfiable. + """ + start, end = startAndEnd + offset, size = self._rangeToOffsetAndSize(start, end) + if offset == size == 0: + # This range doesn't overlap with any of this resource, so the + # request is unsatisfiable. + request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) + request.setHeader( + b'content-range', networkString('bytes */%d' % (self.getFileSize(),))) + else: + request.setResponseCode(http.PARTIAL_CONTENT) + request.setHeader( + b'content-range', self._contentRange(offset, size)) + return offset, size + + + def _doMultipleRangeRequest(self, request, byteRanges): + """ + Set up the response for Range headers that specify a single range. + + This method checks if the request is satisfiable and sets the response + code and Content-Type and Content-Length headers appropriately. The + return value, which is a little complicated, indicates which parts of + the resource to return and the boundaries that should separate the + parts. + + In detail, the return value is a tuple rangeInfo C{rangeInfo} is a + list of 3-tuples C{(partSeparator, partOffset, partSize)}. The + response to this request should be, for each element of C{rangeInfo}, + C{partSeparator} followed by C{partSize} bytes of the resource + starting at C{partOffset}. Each C{partSeparator} includes the + MIME-style boundary and the part-specific Content-type and + Content-range headers. It is convenient to return the separator as a + concrete string from this method, because this method needs to compute + the number of bytes that will make up the response to be able to set + the Content-Length header of the response accurately. + + @param request: The Request object. + @param byteRanges: A list of C{(start, end)} values as specified by + the header. For each range, at most one of C{start} and C{end} + may be L{None}. + @return: See above. + """ + matchingRangeFound = False + rangeInfo = [] + contentLength = 0 + boundary = networkString("%x%x" % (int(time.time()*1000000), os.getpid())) + if self.type: + contentType = self.type + else: + contentType = b'bytes' # It's what Apache does... + for start, end in byteRanges: + partOffset, partSize = self._rangeToOffsetAndSize(start, end) + if partOffset == partSize == 0: + continue + contentLength += partSize + matchingRangeFound = True + partContentRange = self._contentRange(partOffset, partSize) + partSeparator = networkString(( + "\r\n" + "--%s\r\n" + "Content-type: %s\r\n" + "Content-range: %s\r\n" + "\r\n") % (nativeString(boundary), nativeString(contentType), nativeString(partContentRange))) + contentLength += len(partSeparator) + rangeInfo.append((partSeparator, partOffset, partSize)) + if not matchingRangeFound: + request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) + request.setHeader( + b'content-length', b'0') + request.setHeader( + b'content-range', networkString('bytes */%d' % (self.getFileSize(),))) + return [], b'' + finalBoundary = b"\r\n--" + boundary + b"--\r\n" + rangeInfo.append((finalBoundary, 0, 0)) + request.setResponseCode(http.PARTIAL_CONTENT) + request.setHeader( + b'content-type', networkString('multipart/byteranges; boundary="%s"' % (nativeString(boundary),))) + request.setHeader( + b'content-length', intToBytes(contentLength + len(finalBoundary))) + return rangeInfo + + + def _setContentHeaders(self, request, size=None): + """ + Set the Content-length and Content-type headers for this request. + + This method is not appropriate for requests for multiple byte ranges; + L{_doMultipleRangeRequest} will set these headers in that case. + + @param request: The L{twisted.web.http.Request} object. + @param size: The size of the response. If not specified, default to + C{self.getFileSize()}. + """ + if size is None: + size = self.getFileSize() + request.setHeader(b'content-length', intToBytes(size)) + if self.type: + request.setHeader(b'content-type', networkString(self.type)) + if self.encoding: + request.setHeader(b'content-encoding', networkString(self.encoding)) + + + def makeProducer(self, request, fileForReading): + """ + Make a L{StaticProducer} that will produce the body of this response. + + This method will also set the response code and Content-* headers. + + @param request: The L{twisted.web.http.Request} object. + @param fileForReading: The file object containing the resource. + @return: A L{StaticProducer}. Calling C{.start()} on this will begin + producing the response. + """ + byteRange = request.getHeader(b'range') + if byteRange is None: + self._setContentHeaders(request) + request.setResponseCode(http.OK) + return NoRangeStaticProducer(request, fileForReading) + try: + parsedRanges = self._parseRangeHeader(byteRange) + except ValueError: + log.msg("Ignoring malformed Range header %r" % (byteRange.decode(),)) + self._setContentHeaders(request) + request.setResponseCode(http.OK) + return NoRangeStaticProducer(request, fileForReading) + + if len(parsedRanges) == 1: + offset, size = self._doSingleRangeRequest( + request, parsedRanges[0]) + self._setContentHeaders(request, size) + return SingleRangeStaticProducer( + request, fileForReading, offset, size) + else: + rangeInfo = self._doMultipleRangeRequest(request, parsedRanges) + return MultipleRangeStaticProducer( + request, fileForReading, rangeInfo) + + + def render_GET(self, request): + """ + Begin sending the contents of this L{File} (or a subset of the + contents, based on the 'range' header) to the given request. + """ + self.restat(False) + + if self.type is None: + self.type, self.encoding = getTypeAndEncoding(self.basename(), + self.contentTypes, + self.contentEncodings, + self.defaultType) + + if not self.exists(): + return self.childNotFound.render(request) + + if self.isdir(): + return self.redirect(request) + + request.setHeader(b'accept-ranges', b'bytes') + + try: + fileForReading = self.openForReading() + except IOError as e: + if e.errno == errno.EACCES: + return self.forbidden.render(request) + else: + raise + + if request.setLastModified(self.getModificationTime()) is http.CACHED: + # `setLastModified` also sets the response code for us, so if the + # request is cached, we close the file now that we've made sure that + # the request would otherwise succeed and return an empty body. + fileForReading.close() + return b'' + + if request.method == b'HEAD': + # Set the content headers here, rather than making a producer. + self._setContentHeaders(request) + # We've opened the file to make sure it's accessible, so close it + # now that we don't need it. + fileForReading.close() + return b'' + + producer = self.makeProducer(request, fileForReading) + producer.start() + + # and make sure the connection doesn't get closed + return server.NOT_DONE_YET + render_HEAD = render_GET + + + def redirect(self, request): + return redirectTo(_addSlash(request), request) + + + def listNames(self): + if not self.isdir(): + return [] + directory = self.listdir() + directory.sort() + return directory + + def listEntities(self): + return list(map(lambda fileName, self=self: self.createSimilarFile(os.path.join(self.path, fileName)), self.listNames())) + + + def createSimilarFile(self, path): + f = self.__class__(path, self.defaultType, self.ignoredExts, self.registry) + # refactoring by steps, here - constructor should almost certainly take these + f.processors = self.processors + f.indexNames = self.indexNames[:] + f.childNotFound = self.childNotFound + return f + + + +@implementer(interfaces.IPullProducer) +class StaticProducer(object): + """ + Superclass for classes that implement the business of producing. + + @ivar request: The L{IRequest} to write the contents of the file to. + @ivar fileObject: The file the contents of which to write to the request. + """ + + bufferSize = abstract.FileDescriptor.bufferSize + + + def __init__(self, request, fileObject): + """ + Initialize the instance. + """ + self.request = request + self.fileObject = fileObject + + + def start(self): + raise NotImplementedError(self.start) + + + def resumeProducing(self): + raise NotImplementedError(self.resumeProducing) + + + def stopProducing(self): + """ + Stop producing data. + + L{twisted.internet.interfaces.IProducer.stopProducing} + is called when our consumer has died, and subclasses also call this + method when they are done producing data. + """ + self.fileObject.close() + self.request = None + + + +class NoRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes the entire file to the request. + """ + + def start(self): + self.request.registerProducer(self, False) + + + def resumeProducing(self): + if not self.request: + return + data = self.fileObject.read(self.bufferSize) + if data: + # this .write will spin the reactor, calling .doWrite and then + # .resumeProducing again, so be prepared for a re-entrant call + self.request.write(data) + else: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + + +class SingleRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes a single chunk of a file to the request. + """ + + def __init__(self, request, fileObject, offset, size): + """ + Initialize the instance. + + @param request: See L{StaticProducer}. + @param fileObject: See L{StaticProducer}. + @param offset: The offset into the file of the chunk to be written. + @param size: The size of the chunk to write. + """ + StaticProducer.__init__(self, request, fileObject) + self.offset = offset + self.size = size + + + def start(self): + self.fileObject.seek(self.offset) + self.bytesWritten = 0 + self.request.registerProducer(self, 0) + + + def resumeProducing(self): + if not self.request: + return + data = self.fileObject.read( + min(self.bufferSize, self.size - self.bytesWritten)) + if data: + self.bytesWritten += len(data) + # this .write will spin the reactor, calling .doWrite and then + # .resumeProducing again, so be prepared for a re-entrant call + self.request.write(data) + if self.request and self.bytesWritten == self.size: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + + +class MultipleRangeStaticProducer(StaticProducer): + """ + A L{StaticProducer} that writes several chunks of a file to the request. + """ + + def __init__(self, request, fileObject, rangeInfo): + """ + Initialize the instance. + + @param request: See L{StaticProducer}. + @param fileObject: See L{StaticProducer}. + @param rangeInfo: A list of tuples C{[(boundary, offset, size)]} + where: + - C{boundary} will be written to the request first. + - C{offset} the offset into the file of chunk to write. + - C{size} the size of the chunk to write. + """ + StaticProducer.__init__(self, request, fileObject) + self.rangeInfo = rangeInfo + + + def start(self): + self.rangeIter = iter(self.rangeInfo) + self._nextRange() + self.request.registerProducer(self, 0) + + + def _nextRange(self): + self.partBoundary, partOffset, self._partSize = next(self.rangeIter) + self._partBytesWritten = 0 + self.fileObject.seek(partOffset) + + + def resumeProducing(self): + if not self.request: + return + data = [] + dataLength = 0 + done = False + while dataLength < self.bufferSize: + if self.partBoundary: + dataLength += len(self.partBoundary) + data.append(self.partBoundary) + self.partBoundary = None + p = self.fileObject.read( + min(self.bufferSize - dataLength, + self._partSize - self._partBytesWritten)) + self._partBytesWritten += len(p) + dataLength += len(p) + data.append(p) + if self.request and self._partBytesWritten == self._partSize: + try: + self._nextRange() + except StopIteration: + done = True + break + self.request.write(b''.join(data)) + if done: + self.request.unregisterProducer() + self.request.finish() + self.stopProducing() + + + +class ASISProcessor(resource.Resource): + """ + Serve files exactly as responses without generating a status-line or any + headers. Inspired by Apache's mod_asis. + """ + + def __init__(self, path, registry=None): + resource.Resource.__init__(self) + self.path = path + self.registry = registry or Registry() + + + def render(self, request): + request.startedWriting = 1 + res = File(self.path, registry=self.registry) + return res.render(request) + + + +def formatFileSize(size): + """ + Format the given file size in bytes to human readable format. + """ + if size < 1024: + return '%iB' % size + elif size < (1024 ** 2): + return '%iK' % (size / 1024) + elif size < (1024 ** 3): + return '%iM' % (size / (1024 ** 2)) + else: + return '%iG' % (size / (1024 ** 3)) + + + +class DirectoryLister(resource.Resource): + """ + Print the content of a directory. + + @ivar template: page template used to render the content of the directory. + It must contain the format keys B{header} and B{tableContent}. + @type template: C{str} + + @ivar linePattern: template used to render one line in the listing table. + It must contain the format keys B{class}, B{href}, B{text}, B{size}, + B{type} and B{encoding}. + @type linePattern: C{str} + + @ivar contentTypes: a mapping of extensions to MIME types used to populate + the information of a member of this directory. + It is initialized with the value L{File.contentTypes}. + @type contentTypes: C{dict} + + @ivar contentEncodings: a mapping of extensions to encoding types. + It is initialized with the value L{File.contentEncodings}. + @type contentEncodings: C{dict} + + @ivar defaultType: default type used when no mimetype is detected. + @type defaultType: C{str} + + @ivar dirs: filtered content of C{path}, if the whole content should not be + displayed (default to L{None}, which means the actual content of + C{path} is printed). + @type dirs: L{None} or C{list} + + @ivar path: directory which content should be listed. + @type path: C{str} + """ + + template = """<html> +<head> +<title>%(header)s</title> +<style> +.even-dir { background-color: #efe0ef } +.even { background-color: #eee } +.odd-dir {background-color: #f0d0ef } +.odd { background-color: #dedede } +.icon { text-align: center } +.listing { + margin-left: auto; + margin-right: auto; + width: 50%%; + padding: 0.1em; + } + +body { border: 0; padding: 0; margin: 0; background-color: #efefef; } +h1 {padding: 0.1em; background-color: #777; color: white; border-bottom: thin white dashed;} + +</style> +</head> + +<body> +<h1>%(header)s</h1> + +<table> + <thead> + <tr> + <th>Filename</th> + <th>Size</th> + <th>Content type</th> + <th>Content encoding</th> + </tr> + </thead> + <tbody> +%(tableContent)s + </tbody> +</table> + +</body> +</html> +""" + + linePattern = """<tr class="%(class)s"> + <td><a href="%(href)s">%(text)s</a></td> + <td>%(size)s</td> + <td>%(type)s</td> + <td>%(encoding)s</td> +</tr> +""" + + def __init__(self, pathname, dirs=None, + contentTypes=File.contentTypes, + contentEncodings=File.contentEncodings, + defaultType='text/html'): + resource.Resource.__init__(self) + self.contentTypes = contentTypes + self.contentEncodings = contentEncodings + self.defaultType = defaultType + # dirs allows usage of the File to specify what gets listed + self.dirs = dirs + self.path = pathname + + + def _getFilesAndDirectories(self, directory): + """ + Helper returning files and directories in given directory listing, with + attributes to be used to build a table content with + C{self.linePattern}. + + @return: tuple of (directories, files) + @rtype: C{tuple} of C{list} + """ + files = [] + dirs = [] + + for path in directory: + if _PY3: + if isinstance(path, bytes): + path = path.decode("utf8") + + url = quote(path, "/") + escapedPath = escape(path) + childPath = filepath.FilePath(self.path).child(path) + + if childPath.isdir(): + dirs.append({'text': escapedPath + "/", 'href': url + "/", + 'size': '', 'type': '[Directory]', + 'encoding': ''}) + else: + mimetype, encoding = getTypeAndEncoding(path, self.contentTypes, + self.contentEncodings, + self.defaultType) + try: + size = childPath.getsize() + except OSError: + continue + files.append({ + 'text': escapedPath, "href": url, + 'type': '[%s]' % mimetype, + 'encoding': (encoding and '[%s]' % encoding or ''), + 'size': formatFileSize(size)}) + return dirs, files + + + def _buildTableContent(self, elements): + """ + Build a table content using C{self.linePattern} and giving elements odd + and even classes. + """ + tableContent = [] + rowClasses = itertools.cycle(['odd', 'even']) + for element, rowClass in zip(elements, rowClasses): + element["class"] = rowClass + tableContent.append(self.linePattern % element) + return tableContent + + + def render(self, request): + """ + Render a listing of the content of C{self.path}. + """ + request.setHeader(b"content-type", b"text/html; charset=utf-8") + if self.dirs is None: + directory = os.listdir(self.path) + directory.sort() + else: + directory = self.dirs + + dirs, files = self._getFilesAndDirectories(directory) + + tableContent = "".join(self._buildTableContent(dirs + files)) + + header = "Directory listing for %s" % ( + escape(unquote(nativeString(request.uri))),) + + done = self.template % {"header": header, "tableContent": tableContent} + if _PY3: + done = done.encode("utf8") + + return done + + + def __repr__(self): + return '<DirectoryLister of %r>' % self.path + + __str__ = __repr__ diff --git a/contrib/python/Twisted/py2/twisted/web/sux.py b/contrib/python/Twisted/py2/twisted/web/sux.py new file mode 100644 index 0000000000..6d248d3aa1 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/sux.py @@ -0,0 +1,637 @@ +# -*- test-case-name: twisted.web.test.test_xml -*- +# +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +*S*mall, *U*ncomplicated *X*ML. + +This is a very simple implementation of XML/HTML as a network +protocol. It is not at all clever. Its main features are that it +does not: + + - support namespaces + - mung mnemonic entity references + - validate + - perform *any* external actions (such as fetching URLs or writing files) + under *any* circumstances + - has lots and lots of horrible hacks for supporting broken HTML (as an + option, they're not on by default). +""" + +from __future__ import print_function + +from twisted.internet.protocol import Protocol +from twisted.python.compat import unicode +from twisted.python.reflect import prefixedMethodNames + + + +# Elements of the three-tuples in the state table. +BEGIN_HANDLER = 0 +DO_HANDLER = 1 +END_HANDLER = 2 + +identChars = '.-_:' +lenientIdentChars = identChars + ';+#/%~' + +def nop(*args, **kw): + "Do nothing." + + +def unionlist(*args): + l = [] + for x in args: + l.extend(x) + d = dict([(x, 1) for x in l]) + return d.keys() + + +def zipfndict(*args, **kw): + default = kw.get('default', nop) + d = {} + for key in unionlist(*[fndict.keys() for fndict in args]): + d[key] = tuple([x.get(key, default) for x in args]) + return d + + +def prefixedMethodClassDict(clazz, prefix): + return dict([(name, getattr(clazz, prefix + name)) for name in prefixedMethodNames(clazz, prefix)]) + + +def prefixedMethodObjDict(obj, prefix): + return dict([(name, getattr(obj, prefix + name)) for name in prefixedMethodNames(obj.__class__, prefix)]) + + +class ParseError(Exception): + + def __init__(self, filename, line, col, message): + self.filename = filename + self.line = line + self.col = col + self.message = message + + def __str__(self): + return "%s:%s:%s: %s" % (self.filename, self.line, self.col, + self.message) + +class XMLParser(Protocol): + + state = None + encodings = None + filename = "<xml />" + beExtremelyLenient = 0 + _prepend = None + + # _leadingBodyData will sometimes be set before switching to the + # 'bodydata' state, when we "accidentally" read a byte of bodydata + # in a different state. + _leadingBodyData = None + + def connectionMade(self): + self.lineno = 1 + self.colno = 0 + self.encodings = [] + + def saveMark(self): + '''Get the line number and column of the last character parsed''' + # This gets replaced during dataReceived, restored afterwards + return (self.lineno, self.colno) + + def _parseError(self, message): + raise ParseError(*((self.filename,)+self.saveMark()+(message,))) + + def _buildStateTable(self): + '''Return a dictionary of begin, do, end state function tuples''' + # _buildStateTable leaves something to be desired but it does what it + # does.. probably slowly, so I'm doing some evil caching so it doesn't + # get called more than once per class. + stateTable = getattr(self.__class__, '__stateTable', None) + if stateTable is None: + stateTable = self.__class__.__stateTable = zipfndict( + *[prefixedMethodObjDict(self, prefix) + for prefix in ('begin_', 'do_', 'end_')]) + return stateTable + + def _decode(self, data): + if 'UTF-16' in self.encodings or 'UCS-2' in self.encodings: + assert not len(data) & 1, 'UTF-16 must come in pairs for now' + if self._prepend: + data = self._prepend + data + for encoding in self.encodings: + data = unicode(data, encoding) + return data + + def maybeBodyData(self): + if self.endtag: + return 'bodydata' + + # Get ready for fun! We're going to allow + # <script>if (foo < bar)</script> to work! + # We do this by making everything between <script> and + # </script> a Text + # BUT <script src="foo"> will be special-cased to do regular, + # lenient behavior, because those may not have </script> + # -radix + + if (self.tagName == 'script' and 'src' not in self.tagAttributes): + # we do this ourselves rather than having begin_waitforendscript + # because that can get called multiple times and we don't want + # bodydata to get reset other than the first time. + self.begin_bodydata(None) + return 'waitforendscript' + return 'bodydata' + + + + def dataReceived(self, data): + stateTable = self._buildStateTable() + if not self.state: + # all UTF-16 starts with this string + if data.startswith((b'\xff\xfe', b'\xfe\xff')): + self._prepend = data[0:2] + self.encodings.append('UTF-16') + data = data[2:] + self.state = 'begin' + if self.encodings: + data = self._decode(data) + else: + data = data.decode("utf-8") + # bring state, lineno, colno into local scope + lineno, colno = self.lineno, self.colno + curState = self.state + # replace saveMark with a nested scope function + _saveMark = self.saveMark + def saveMark(): + return (lineno, colno) + self.saveMark = saveMark + # fetch functions from the stateTable + beginFn, doFn, endFn = stateTable[curState] + try: + for byte in data: + # do newline stuff + if byte == u'\n': + lineno += 1 + colno = 0 + else: + colno += 1 + newState = doFn(byte) + if newState is not None and newState != curState: + # this is the endFn from the previous state + endFn() + curState = newState + beginFn, doFn, endFn = stateTable[curState] + beginFn(byte) + finally: + self.saveMark = _saveMark + self.lineno, self.colno = lineno, colno + # state doesn't make sense if there's an exception.. + self.state = curState + + + def connectionLost(self, reason): + """ + End the last state we were in. + """ + stateTable = self._buildStateTable() + stateTable[self.state][END_HANDLER]() + + + # state methods + + def do_begin(self, byte): + if byte.isspace(): + return + if byte != '<': + if self.beExtremelyLenient: + self._leadingBodyData = byte + return 'bodydata' + self._parseError("First char of document [%r] wasn't <" % (byte,)) + return 'tagstart' + + def begin_comment(self, byte): + self.commentbuf = '' + + def do_comment(self, byte): + self.commentbuf += byte + if self.commentbuf.endswith('-->'): + self.gotComment(self.commentbuf[:-3]) + return 'bodydata' + + def begin_tagstart(self, byte): + self.tagName = '' # name of the tag + self.tagAttributes = {} # attributes of the tag + self.termtag = 0 # is the tag self-terminating + self.endtag = 0 + + def do_tagstart(self, byte): + if byte.isalnum() or byte in identChars: + self.tagName += byte + if self.tagName == '!--': + return 'comment' + elif byte.isspace(): + if self.tagName: + if self.endtag: + # properly strict thing to do here is probably to only + # accept whitespace + return 'waitforgt' + return 'attrs' + else: + self._parseError("Whitespace before tag-name") + elif byte == '>': + if self.endtag: + self.gotTagEnd(self.tagName) + return 'bodydata' + else: + self.gotTagStart(self.tagName, {}) + return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData() + elif byte == '/': + if self.tagName: + return 'afterslash' + else: + self.endtag = 1 + elif byte in '!?': + if self.tagName: + if not self.beExtremelyLenient: + self._parseError("Invalid character in tag-name") + else: + self.tagName += byte + self.termtag = 1 + elif byte == '[': + if self.tagName == '!': + return 'expectcdata' + else: + self._parseError("Invalid '[' in tag-name") + else: + if self.beExtremelyLenient: + self.bodydata = '<' + return 'unentity' + self._parseError('Invalid tag character: %r'% byte) + + def begin_unentity(self, byte): + self.bodydata += byte + + def do_unentity(self, byte): + self.bodydata += byte + return 'bodydata' + + def end_unentity(self): + self.gotText(self.bodydata) + + def begin_expectcdata(self, byte): + self.cdatabuf = byte + + def do_expectcdata(self, byte): + self.cdatabuf += byte + cdb = self.cdatabuf + cd = '[CDATA[' + if len(cd) > len(cdb): + if cd.startswith(cdb): + return + elif self.beExtremelyLenient: + ## WHAT THE CRAP!? MSWord9 generates HTML that includes these + ## bizarre <![if !foo]> <![endif]> chunks, so I've gotta ignore + ## 'em as best I can. this should really be a separate parse + ## state but I don't even have any idea what these _are_. + return 'waitforgt' + else: + self._parseError("Mal-formed CDATA header") + if cd == cdb: + self.cdatabuf = '' + return 'cdata' + self._parseError("Mal-formed CDATA header") + + def do_cdata(self, byte): + self.cdatabuf += byte + if self.cdatabuf.endswith("]]>"): + self.cdatabuf = self.cdatabuf[:-3] + return 'bodydata' + + def end_cdata(self): + self.gotCData(self.cdatabuf) + self.cdatabuf = '' + + def do_attrs(self, byte): + if byte.isalnum() or byte in identChars: + # XXX FIXME really handle !DOCTYPE at some point + if self.tagName == '!DOCTYPE': + return 'doctype' + if self.tagName[0] in '!?': + return 'waitforgt' + return 'attrname' + elif byte.isspace(): + return + elif byte == '>': + self.gotTagStart(self.tagName, self.tagAttributes) + return (not self.beExtremelyLenient) and 'bodydata' or self.maybeBodyData() + elif byte == '/': + return 'afterslash' + elif self.beExtremelyLenient: + # discard and move on? Only case I've seen of this so far was: + # <foo bar="baz""> + return + self._parseError("Unexpected character: %r" % byte) + + def begin_doctype(self, byte): + self.doctype = byte + + def do_doctype(self, byte): + if byte == '>': + return 'bodydata' + self.doctype += byte + + def end_doctype(self): + self.gotDoctype(self.doctype) + self.doctype = None + + def do_waitforgt(self, byte): + if byte == '>': + if self.endtag or not self.beExtremelyLenient: + return 'bodydata' + return self.maybeBodyData() + + def begin_attrname(self, byte): + self.attrname = byte + self._attrname_termtag = 0 + + def do_attrname(self, byte): + if byte.isalnum() or byte in identChars: + self.attrname += byte + return + elif byte == '=': + return 'beforeattrval' + elif byte.isspace(): + return 'beforeeq' + elif self.beExtremelyLenient: + if byte in '"\'': + return 'attrval' + if byte in lenientIdentChars or byte.isalnum(): + self.attrname += byte + return + if byte == '/': + self._attrname_termtag = 1 + return + if byte == '>': + self.attrval = 'True' + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if self._attrname_termtag: + self.gotTagEnd(self.tagName) + return 'bodydata' + return self.maybeBodyData() + # something is really broken. let's leave this attribute where it + # is and move on to the next thing + return + self._parseError("Invalid attribute name: %r %r" % (self.attrname, byte)) + + def do_beforeattrval(self, byte): + if byte in '"\'': + return 'attrval' + elif byte.isspace(): + return + elif self.beExtremelyLenient: + if byte in lenientIdentChars or byte.isalnum(): + return 'messyattr' + if byte == '>': + self.attrval = 'True' + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + return self.maybeBodyData() + if byte == '\\': + # I saw this in actual HTML once: + # <font size=\"3\"><sup>SM</sup></font> + return + self._parseError("Invalid initial attribute value: %r; Attribute values must be quoted." % byte) + + attrname = '' + attrval = '' + + def begin_beforeeq(self,byte): + self._beforeeq_termtag = 0 + + def do_beforeeq(self, byte): + if byte == '=': + return 'beforeattrval' + elif byte.isspace(): + return + elif self.beExtremelyLenient: + if byte.isalnum() or byte in identChars: + self.attrval = 'True' + self.tagAttributes[self.attrname] = self.attrval + return 'attrname' + elif byte == '>': + self.attrval = 'True' + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if self._beforeeq_termtag: + self.gotTagEnd(self.tagName) + return 'bodydata' + return self.maybeBodyData() + elif byte == '/': + self._beforeeq_termtag = 1 + return + self._parseError("Invalid attribute") + + def begin_attrval(self, byte): + self.quotetype = byte + self.attrval = '' + + def do_attrval(self, byte): + if byte == self.quotetype: + return 'attrs' + self.attrval += byte + + def end_attrval(self): + self.tagAttributes[self.attrname] = self.attrval + self.attrname = self.attrval = '' + + def begin_messyattr(self, byte): + self.attrval = byte + + def do_messyattr(self, byte): + if byte.isspace(): + return 'attrs' + elif byte == '>': + endTag = 0 + if self.attrval.endswith('/'): + endTag = 1 + self.attrval = self.attrval[:-1] + self.tagAttributes[self.attrname] = self.attrval + self.gotTagStart(self.tagName, self.tagAttributes) + if endTag: + self.gotTagEnd(self.tagName) + return 'bodydata' + return self.maybeBodyData() + else: + self.attrval += byte + + def end_messyattr(self): + if self.attrval: + self.tagAttributes[self.attrname] = self.attrval + + def begin_afterslash(self, byte): + self._after_slash_closed = 0 + + def do_afterslash(self, byte): + # this state is only after a self-terminating slash, e.g. <foo/> + if self._after_slash_closed: + self._parseError("Mal-formed")#XXX When does this happen?? + if byte != '>': + if self.beExtremelyLenient: + return + else: + self._parseError("No data allowed after '/'") + self._after_slash_closed = 1 + self.gotTagStart(self.tagName, self.tagAttributes) + self.gotTagEnd(self.tagName) + # don't need maybeBodyData here because there better not be + # any javascript code after a <script/>... we'll see :( + return 'bodydata' + + def begin_bodydata(self, byte): + if self._leadingBodyData: + self.bodydata = self._leadingBodyData + del self._leadingBodyData + else: + self.bodydata = '' + + def do_bodydata(self, byte): + if byte == '<': + return 'tagstart' + if byte == '&': + return 'entityref' + self.bodydata += byte + + def end_bodydata(self): + self.gotText(self.bodydata) + self.bodydata = '' + + def do_waitforendscript(self, byte): + if byte == '<': + return 'waitscriptendtag' + self.bodydata += byte + + def begin_waitscriptendtag(self, byte): + self.temptagdata = '' + self.tagName = '' + self.endtag = 0 + + def do_waitscriptendtag(self, byte): + # 1 enforce / as first byte read + # 2 enforce following bytes to be subset of "script" until + # tagName == "script" + # 2a when that happens, gotText(self.bodydata) and gotTagEnd(self.tagName) + # 3 spaces can happen anywhere, they're ignored + # e.g. < / script > + # 4 anything else causes all data I've read to be moved to the + # bodydata, and switch back to waitforendscript state + + # If it turns out this _isn't_ a </script>, we need to + # remember all the data we've been through so we can append it + # to bodydata + self.temptagdata += byte + + # 1 + if byte == '/': + self.endtag = True + elif not self.endtag: + self.bodydata += "<" + self.temptagdata + return 'waitforendscript' + # 2 + elif byte.isalnum() or byte in identChars: + self.tagName += byte + if not 'script'.startswith(self.tagName): + self.bodydata += "<" + self.temptagdata + return 'waitforendscript' + elif self.tagName == 'script': + self.gotText(self.bodydata) + self.gotTagEnd(self.tagName) + return 'waitforgt' + # 3 + elif byte.isspace(): + return 'waitscriptendtag' + # 4 + else: + self.bodydata += "<" + self.temptagdata + return 'waitforendscript' + + + def begin_entityref(self, byte): + self.erefbuf = '' + self.erefextra = '' # extra bit for lenient mode + + def do_entityref(self, byte): + if byte.isspace() or byte == "<": + if self.beExtremelyLenient: + # '&foo' probably was '&foo' + if self.erefbuf and self.erefbuf != "amp": + self.erefextra = self.erefbuf + self.erefbuf = "amp" + if byte == "<": + return "tagstart" + else: + self.erefextra += byte + return 'spacebodydata' + self._parseError("Bad entity reference") + elif byte != ';': + self.erefbuf += byte + else: + return 'bodydata' + + def end_entityref(self): + self.gotEntityReference(self.erefbuf) + + # hacky support for space after & in entityref in beExtremelyLenient + # state should only happen in that case + def begin_spacebodydata(self, byte): + self.bodydata = self.erefextra + self.erefextra = None + do_spacebodydata = do_bodydata + end_spacebodydata = end_bodydata + + # Sorta SAX-ish API + + def gotTagStart(self, name, attributes): + '''Encountered an opening tag. + + Default behaviour is to print.''' + print('begin', name, attributes) + + def gotText(self, data): + '''Encountered text + + Default behaviour is to print.''' + print('text:', repr(data)) + + def gotEntityReference(self, entityRef): + '''Encountered mnemonic entity reference + + Default behaviour is to print.''' + print('entityRef: &%s;' % entityRef) + + def gotComment(self, comment): + '''Encountered comment. + + Default behaviour is to ignore.''' + pass + + def gotCData(self, cdata): + '''Encountered CDATA + + Default behaviour is to call the gotText method''' + self.gotText(cdata) + + def gotDoctype(self, doctype): + """Encountered DOCTYPE + + This is really grotty: it basically just gives you everything between + '<!DOCTYPE' and '>' as an argument. + """ + print('!DOCTYPE', repr(doctype)) + + def gotTagEnd(self, name): + '''Encountered closing tag + + Default behaviour is to print.''' + print('end', name) diff --git a/contrib/python/Twisted/py2/twisted/web/tap.py b/contrib/python/Twisted/py2/twisted/web/tap.py new file mode 100644 index 0000000000..23df64a4f4 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/tap.py @@ -0,0 +1,316 @@ +# -*- test-case-name: twisted.web.test.test_tap -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Support for creating a service which runs a web server. +""" + +from __future__ import absolute_import, division + +import os +import warnings + +import incremental + +from twisted.application import service, strports +from twisted.internet import interfaces, reactor +from twisted.python import usage, reflect, threadpool, deprecate +from twisted.spread import pb +from twisted.web import distrib +from twisted.web import resource, server, static, script, demo, wsgi +from twisted.web import twcgi + +class Options(usage.Options): + """ + Define the options accepted by the I{twistd web} plugin. + """ + synopsis = "[web options]" + + optParameters = [["logfile", "l", None, + "Path to web CLF (Combined Log Format) log file."], + ["certificate", "c", "server.pem", + "(DEPRECATED: use --listen) " + "SSL certificate to use for HTTPS. "], + ["privkey", "k", "server.pem", + "(DEPRECATED: use --listen) " + "SSL certificate to use for HTTPS."], + ] + + optFlags = [ + ["notracebacks", "n", ( + "(DEPRECATED: Tracebacks are disabled by default. " + "See --enable-tracebacks to turn them on.")], + ["display-tracebacks", "", ( + "Show uncaught exceptions during rendering tracebacks to " + "the client. WARNING: This may be a security risk and " + "expose private data!")], + ] + + optFlags.append([ + "personal", "", + "Instead of generating a webserver, generate a " + "ResourcePublisher which listens on the port given by " + "--listen, or ~/%s " % (distrib.UserDirectory.userSocketName,) + + "if --listen is not specified."]) + + compData = usage.Completions( + optActions={"logfile" : usage.CompleteFiles("*.log"), + "certificate" : usage.CompleteFiles("*.pem"), + "privkey" : usage.CompleteFiles("*.pem")} + ) + + longdesc = """\ +This starts a webserver. If you specify no arguments, it will be a +demo webserver that has the Test class from twisted.web.demo in it.""" + + def __init__(self): + usage.Options.__init__(self) + self['indexes'] = [] + self['root'] = None + self['extraHeaders'] = [] + self['ports'] = [] + self['port'] = self['https'] = None + + + def opt_port(self, port): + """ + (DEPRECATED: use --listen) + Strports description of port to start the server on + """ + msg = deprecate.getDeprecationWarningString( + self.opt_port, incremental.Version('Twisted', 18, 4, 0)) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + self['port'] = port + + opt_p = opt_port + + def opt_https(self, port): + """ + (DEPRECATED: use --listen) + Port to listen on for Secure HTTP. + """ + msg = deprecate.getDeprecationWarningString( + self.opt_https, incremental.Version('Twisted', 18, 4, 0)) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + self['https'] = port + + + def opt_listen(self, port): + """ + Add an strports description of port to start the server on. + [default: tcp:8080] + """ + self['ports'].append(port) + + + def opt_index(self, indexName): + """ + Add the name of a file used to check for directory indexes. + [default: index, index.html] + """ + self['indexes'].append(indexName) + + opt_i = opt_index + + + def opt_user(self): + """ + Makes a server with ~/public_html and ~/.twistd-web-pb support for + users. + """ + self['root'] = distrib.UserDirectory() + + opt_u = opt_user + + + def opt_path(self, path): + """ + <path> is either a specific file or a directory to be set as the root + of the web server. Use this if you have a directory full of HTML, cgi, + epy, or rpy files or any other files that you want to be served up raw. + """ + self['root'] = static.File(os.path.abspath(path)) + self['root'].processors = { + '.epy': script.PythonScript, + '.rpy': script.ResourceScript, + } + self['root'].processors['.cgi'] = twcgi.CGIScript + + + def opt_processor(self, proc): + """ + `ext=class' where `class' is added as a Processor for files ending + with `ext'. + """ + if not isinstance(self['root'], static.File): + raise usage.UsageError( + "You can only use --processor after --path.") + ext, klass = proc.split('=', 1) + self['root'].processors[ext] = reflect.namedClass(klass) + + + def opt_class(self, className): + """ + Create a Resource subclass with a zero-argument constructor. + """ + classObj = reflect.namedClass(className) + self['root'] = classObj() + + + def opt_resource_script(self, name): + """ + An .rpy file to be used as the root resource of the webserver. + """ + self['root'] = script.ResourceScriptWrapper(name) + + + def opt_wsgi(self, name): + """ + The FQPN of a WSGI application object to serve as the root resource of + the webserver. + """ + try: + application = reflect.namedAny(name) + except (AttributeError, ValueError): + raise usage.UsageError("No such WSGI application: %r" % (name,)) + pool = threadpool.ThreadPool() + reactor.callWhenRunning(pool.start) + reactor.addSystemEventTrigger('after', 'shutdown', pool.stop) + self['root'] = wsgi.WSGIResource(reactor, pool, application) + + + def opt_mime_type(self, defaultType): + """ + Specify the default mime-type for static files. + """ + if not isinstance(self['root'], static.File): + raise usage.UsageError( + "You can only use --mime_type after --path.") + self['root'].defaultType = defaultType + opt_m = opt_mime_type + + + def opt_allow_ignore_ext(self): + """ + Specify whether or not a request for 'foo' should return 'foo.ext' + """ + if not isinstance(self['root'], static.File): + raise usage.UsageError("You can only use --allow_ignore_ext " + "after --path.") + self['root'].ignoreExt('*') + + + def opt_ignore_ext(self, ext): + """ + Specify an extension to ignore. These will be processed in order. + """ + if not isinstance(self['root'], static.File): + raise usage.UsageError("You can only use --ignore_ext " + "after --path.") + self['root'].ignoreExt(ext) + + + def opt_add_header(self, header): + """ + Specify an additional header to be included in all responses. Specified + as "HeaderName: HeaderValue". + """ + name, value = header.split(':', 1) + self['extraHeaders'].append((name.strip(), value.strip())) + + + def postOptions(self): + """ + Set up conditional defaults and check for dependencies. + + If SSL is not available but an HTTPS server was configured, raise a + L{UsageError} indicating that this is not possible. + + If no server port was supplied, select a default appropriate for the + other options supplied. + """ + if self['port'] is not None: + self['ports'].append(self['port']) + if self['https'] is not None: + try: + reflect.namedModule('OpenSSL.SSL') + except ImportError: + raise usage.UsageError("SSL support not installed") + sslStrport = 'ssl:port={}:privateKey={}:certKey={}'.format( + self['https'], + self['privkey'], + self['certificate'], + ) + self['ports'].append(sslStrport) + if len(self['ports']) == 0: + if self['personal']: + path = os.path.expanduser( + os.path.join('~', distrib.UserDirectory.userSocketName)) + self['ports'].append('unix:' + path) + else: + self['ports'].append('tcp:8080') + + + +def makePersonalServerFactory(site): + """ + Create and return a factory which will respond to I{distrib} requests + against the given site. + + @type site: L{twisted.web.server.Site} + @rtype: L{twisted.internet.protocol.Factory} + """ + return pb.PBServerFactory(distrib.ResourcePublisher(site)) + + + +class _AddHeadersResource(resource.Resource): + def __init__(self, originalResource, headers): + self._originalResource = originalResource + self._headers = headers + + + def getChildWithDefault(self, name, request): + for k, v in self._headers: + request.responseHeaders.addRawHeader(k, v) + return self._originalResource.getChildWithDefault(name, request) + + + +def makeService(config): + s = service.MultiService() + if config['root']: + root = config['root'] + if config['indexes']: + config['root'].indexNames = config['indexes'] + else: + # This really ought to be web.Admin or something + root = demo.Test() + + if isinstance(root, static.File): + root.registry.setComponent(interfaces.IServiceCollection, s) + + if config['extraHeaders']: + root = _AddHeadersResource(root, config['extraHeaders']) + + if config['logfile']: + site = server.Site(root, logPath=config['logfile']) + else: + site = server.Site(root) + + if config["display-tracebacks"]: + site.displayTracebacks = True + + # Deprecate --notracebacks/-n + if config["notracebacks"]: + msg = deprecate._getDeprecationWarningString( + "--notracebacks", incremental.Version('Twisted', 19, 7, 0)) + warnings.warn(msg, category=DeprecationWarning, stacklevel=2) + + if config['personal']: + site = makePersonalServerFactory(site) + for port in config['ports']: + svc = strports.service(port, site) + svc.setServiceParent(s) + return s diff --git a/contrib/python/Twisted/py2/twisted/web/template.py b/contrib/python/Twisted/py2/twisted/web/template.py new file mode 100644 index 0000000000..1c7c915564 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/template.py @@ -0,0 +1,575 @@ +# -*- test-case-name: twisted.web.test.test_template -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +HTML rendering for twisted.web. + +@var VALID_HTML_TAG_NAMES: A list of recognized HTML tag names, used by the + L{tag} object. + +@var TEMPLATE_NAMESPACE: The XML namespace used to identify attributes and + elements used by the templating system, which should be removed from the + final output document. + +@var tags: A convenience object which can produce L{Tag} objects on demand via + attribute access. For example: C{tags.div} is equivalent to C{Tag("div")}. + Tags not specified in L{VALID_HTML_TAG_NAMES} will result in an + L{AttributeError}. +""" + +from __future__ import division, absolute_import + +__all__ = [ + 'TEMPLATE_NAMESPACE', 'VALID_HTML_TAG_NAMES', 'Element', 'TagLoader', + 'XMLString', 'XMLFile', 'renderer', 'flatten', 'flattenString', 'tags', + 'Comment', 'CDATA', 'Tag', 'slot', 'CharRef', 'renderElement' + ] + +import warnings + +from collections import OrderedDict + +from zope.interface import implementer + +from xml.sax import make_parser, handler + +from twisted.python.compat import NativeStringIO, items +from twisted.python.filepath import FilePath +from twisted.web._stan import Tag, slot, Comment, CDATA, CharRef +from twisted.web.iweb import ITemplateLoader +from twisted.logger import Logger + +TEMPLATE_NAMESPACE = 'http://twistedmatrix.com/ns/twisted.web.template/0.1' + +# Go read the definition of NOT_DONE_YET. For lulz. This is totally +# equivalent. And this turns out to be necessary, because trying to import +# NOT_DONE_YET in this module causes a circular import which we cannot escape +# from. From which we cannot escape. Etc. glyph is okay with this solution for +# now, and so am I, as long as this comment stays to explain to future +# maintainers what it means. ~ C. +# +# See http://twistedmatrix.com/trac/ticket/5557 for progress on fixing this. +NOT_DONE_YET = 1 +_moduleLog = Logger() + + +class _NSContext(object): + """ + A mapping from XML namespaces onto their prefixes in the document. + """ + + def __init__(self, parent=None): + """ + Pull out the parent's namespaces, if there's no parent then default to + XML. + """ + self.parent = parent + if parent is not None: + self.nss = OrderedDict(parent.nss) + else: + self.nss = {'http://www.w3.org/XML/1998/namespace':'xml'} + + + def get(self, k, d=None): + """ + Get a prefix for a namespace. + + @param d: The default prefix value. + """ + return self.nss.get(k, d) + + + def __setitem__(self, k, v): + """ + Proxy through to setting the prefix for the namespace. + """ + self.nss.__setitem__(k, v) + + + def __getitem__(self, k): + """ + Proxy through to getting the prefix for the namespace. + """ + return self.nss.__getitem__(k) + + + +class _ToStan(handler.ContentHandler, handler.EntityResolver): + """ + A SAX parser which converts an XML document to the Twisted STAN + Document Object Model. + """ + + def __init__(self, sourceFilename): + """ + @param sourceFilename: the filename to load the XML out of. + """ + self.sourceFilename = sourceFilename + self.prefixMap = _NSContext() + self.inCDATA = False + + + def setDocumentLocator(self, locator): + """ + Set the document locator, which knows about line and character numbers. + """ + self.locator = locator + + + def startDocument(self): + """ + Initialise the document. + """ + self.document = [] + self.current = self.document + self.stack = [] + self.xmlnsAttrs = [] + + + def endDocument(self): + """ + Document ended. + """ + + + def processingInstruction(self, target, data): + """ + Processing instructions are ignored. + """ + + + def startPrefixMapping(self, prefix, uri): + """ + Set up the prefix mapping, which maps fully qualified namespace URIs + onto namespace prefixes. + + This gets called before startElementNS whenever an C{xmlns} attribute + is seen. + """ + + self.prefixMap = _NSContext(self.prefixMap) + self.prefixMap[uri] = prefix + + # Ignore the template namespace; we'll replace those during parsing. + if uri == TEMPLATE_NAMESPACE: + return + + # Add to a list that will be applied once we have the element. + if prefix is None: + self.xmlnsAttrs.append(('xmlns',uri)) + else: + self.xmlnsAttrs.append(('xmlns:%s'%prefix,uri)) + + + def endPrefixMapping(self, prefix): + """ + "Pops the stack" on the prefix mapping. + + Gets called after endElementNS. + """ + self.prefixMap = self.prefixMap.parent + + + def startElementNS(self, namespaceAndName, qname, attrs): + """ + Gets called when we encounter a new xmlns attribute. + + @param namespaceAndName: a (namespace, name) tuple, where name + determines which type of action to take, if the namespace matches + L{TEMPLATE_NAMESPACE}. + @param qname: ignored. + @param attrs: attributes on the element being started. + """ + + filename = self.sourceFilename + lineNumber = self.locator.getLineNumber() + columnNumber = self.locator.getColumnNumber() + + ns, name = namespaceAndName + if ns == TEMPLATE_NAMESPACE: + if name == 'transparent': + name = '' + elif name == 'slot': + try: + # Try to get the default value for the slot + default = attrs[(None, 'default')] + except KeyError: + # If there wasn't one, then use None to indicate no + # default. + default = None + el = slot( + attrs[(None, 'name')], default=default, + filename=filename, lineNumber=lineNumber, + columnNumber=columnNumber) + self.stack.append(el) + self.current.append(el) + self.current = el.children + return + + render = None + + attrs = OrderedDict(attrs) + for k, v in items(attrs): + attrNS, justTheName = k + if attrNS != TEMPLATE_NAMESPACE: + continue + if justTheName == 'render': + render = v + del attrs[k] + + # nonTemplateAttrs is a dictionary mapping attributes that are *not* in + # TEMPLATE_NAMESPACE to their values. Those in TEMPLATE_NAMESPACE were + # just removed from 'attrs' in the loop immediately above. The key in + # nonTemplateAttrs is either simply the attribute name (if it was not + # specified as having a namespace in the template) or prefix:name, + # preserving the xml namespace prefix given in the document. + + nonTemplateAttrs = OrderedDict() + for (attrNs, attrName), v in items(attrs): + nsPrefix = self.prefixMap.get(attrNs) + if nsPrefix is None: + attrKey = attrName + else: + attrKey = '%s:%s' % (nsPrefix, attrName) + nonTemplateAttrs[attrKey] = v + + if ns == TEMPLATE_NAMESPACE and name == 'attr': + if not self.stack: + # TODO: define a better exception for this? + raise AssertionError( + '<{%s}attr> as top-level element' % (TEMPLATE_NAMESPACE,)) + if 'name' not in nonTemplateAttrs: + # TODO: same here + raise AssertionError( + '<{%s}attr> requires a name attribute' % (TEMPLATE_NAMESPACE,)) + el = Tag('', render=render, filename=filename, + lineNumber=lineNumber, columnNumber=columnNumber) + self.stack[-1].attributes[nonTemplateAttrs['name']] = el + self.stack.append(el) + self.current = el.children + return + + # Apply any xmlns attributes + if self.xmlnsAttrs: + nonTemplateAttrs.update(OrderedDict(self.xmlnsAttrs)) + self.xmlnsAttrs = [] + + # Add the prefix that was used in the parsed template for non-template + # namespaces (which will not be consumed anyway). + if ns != TEMPLATE_NAMESPACE and ns is not None: + prefix = self.prefixMap[ns] + if prefix is not None: + name = '%s:%s' % (self.prefixMap[ns],name) + el = Tag( + name, attributes=OrderedDict(nonTemplateAttrs), render=render, + filename=filename, lineNumber=lineNumber, + columnNumber=columnNumber) + self.stack.append(el) + self.current.append(el) + self.current = el.children + + + def characters(self, ch): + """ + Called when we receive some characters. CDATA characters get passed + through as is. + + @type ch: C{string} + """ + if self.inCDATA: + self.stack[-1].append(ch) + return + self.current.append(ch) + + + def endElementNS(self, name, qname): + """ + A namespace tag is closed. Pop the stack, if there's anything left in + it, otherwise return to the document's namespace. + """ + self.stack.pop() + if self.stack: + self.current = self.stack[-1].children + else: + self.current = self.document + + + def startDTD(self, name, publicId, systemId): + """ + DTDs are ignored. + """ + + + def endDTD(self, *args): + """ + DTDs are ignored. + """ + + + def startCDATA(self): + """ + We're starting to be in a CDATA element, make a note of this. + """ + self.inCDATA = True + self.stack.append([]) + + + def endCDATA(self): + """ + We're no longer in a CDATA element. Collect up the characters we've + parsed and put them in a new CDATA object. + """ + self.inCDATA = False + comment = ''.join(self.stack.pop()) + self.current.append(CDATA(comment)) + + + def comment(self, content): + """ + Add an XML comment which we've encountered. + """ + self.current.append(Comment(content)) + + + +def _flatsaxParse(fl): + """ + Perform a SAX parse of an XML document with the _ToStan class. + + @param fl: The XML document to be parsed. + @type fl: A file object or filename. + + @return: a C{list} of Stan objects. + """ + parser = make_parser() + parser.setFeature(handler.feature_validation, 0) + parser.setFeature(handler.feature_namespaces, 1) + parser.setFeature(handler.feature_external_ges, 0) + parser.setFeature(handler.feature_external_pes, 0) + + s = _ToStan(getattr(fl, "name", None)) + parser.setContentHandler(s) + parser.setEntityResolver(s) + parser.setProperty(handler.property_lexical_handler, s) + + parser.parse(fl) + + return s.document + + +@implementer(ITemplateLoader) +class TagLoader(object): + """ + An L{ITemplateLoader} that loads existing L{IRenderable} providers. + + @ivar tag: The object which will be loaded. + @type tag: An L{IRenderable} provider. + """ + + def __init__(self, tag): + """ + @param tag: The object which will be loaded. + @type tag: An L{IRenderable} provider. + """ + self.tag = tag + + + def load(self): + return [self.tag] + + + +@implementer(ITemplateLoader) +class XMLString(object): + """ + An L{ITemplateLoader} that loads and parses XML from a string. + + @ivar _loadedTemplate: The loaded document. + @type _loadedTemplate: a C{list} of Stan objects. + """ + + def __init__(self, s): + """ + Run the parser on a L{NativeStringIO} copy of the string. + + @param s: The string from which to load the XML. + @type s: C{str}, or a UTF-8 encoded L{bytes}. + """ + if not isinstance(s, str): + s = s.decode('utf8') + + self._loadedTemplate = _flatsaxParse(NativeStringIO(s)) + + + def load(self): + """ + Return the document. + + @return: the loaded document. + @rtype: a C{list} of Stan objects. + """ + return self._loadedTemplate + + + +@implementer(ITemplateLoader) +class XMLFile(object): + """ + An L{ITemplateLoader} that loads and parses XML from a file. + + @ivar _loadedTemplate: The loaded document, or L{None}, if not loaded. + @type _loadedTemplate: a C{list} of Stan objects, or L{None}. + + @ivar _path: The L{FilePath}, file object, or filename that is being + loaded from. + """ + + def __init__(self, path): + """ + Run the parser on a file. + + @param path: The file from which to load the XML. + @type path: L{FilePath} + """ + if not isinstance(path, FilePath): + warnings.warn( + "Passing filenames or file objects to XMLFile is deprecated " + "since Twisted 12.1. Pass a FilePath instead.", + category=DeprecationWarning, stacklevel=2) + self._loadedTemplate = None + self._path = path + + + def _loadDoc(self): + """ + Read and parse the XML. + + @return: the loaded document. + @rtype: a C{list} of Stan objects. + """ + if not isinstance(self._path, FilePath): + return _flatsaxParse(self._path) + else: + with self._path.open('r') as f: + return _flatsaxParse(f) + + + def __repr__(self): + return '<XMLFile of %r>' % (self._path,) + + + def load(self): + """ + Return the document, first loading it if necessary. + + @return: the loaded document. + @rtype: a C{list} of Stan objects. + """ + if self._loadedTemplate is None: + self._loadedTemplate = self._loadDoc() + return self._loadedTemplate + + + +# Last updated October 2011, using W3Schools as a reference. Link: +# http://www.w3schools.com/html5/html5_reference.asp +# Note that <xmp> is explicitly omitted; its semantics do not work with +# t.w.template and it is officially deprecated. +VALID_HTML_TAG_NAMES = set([ + 'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside', + 'audio', 'b', 'base', 'basefont', 'bdi', 'bdo', 'big', 'blockquote', + 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', + 'col', 'colgroup', 'command', 'datalist', 'dd', 'del', 'details', 'dfn', + 'dir', 'div', 'dl', 'dt', 'em', 'embed', 'fieldset', 'figcaption', + 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', + 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'i', 'iframe', + 'img', 'input', 'ins', 'isindex', 'keygen', 'kbd', 'label', 'legend', + 'li', 'link', 'map', 'mark', 'menu', 'meta', 'meter', 'nav', 'noframes', + 'noscript', 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', + 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', + 'section', 'select', 'small', 'source', 'span', 'strike', 'strong', + 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'textarea', + 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'tt', 'u', 'ul', 'var', + 'video', 'wbr', +]) + + + +class _TagFactory(object): + """ + A factory for L{Tag} objects; the implementation of the L{tags} object. + + This allows for the syntactic convenience of C{from twisted.web.html import + tags; tags.a(href="linked-page.html")}, where 'a' can be basically any HTML + tag. + + The class is not exposed publicly because you only ever need one of these, + and we already made it for you. + + @see: L{tags} + """ + def __getattr__(self, tagName): + if tagName == 'transparent': + return Tag('') + # allow for E.del as E.del_ + tagName = tagName.rstrip('_') + if tagName not in VALID_HTML_TAG_NAMES: + raise AttributeError('unknown tag %r' % (tagName,)) + return Tag(tagName) + + + +tags = _TagFactory() + + + +def renderElement(request, element, + doctype=b'<!DOCTYPE html>', _failElement=None): + """ + Render an element or other C{IRenderable}. + + @param request: The C{Request} being rendered to. + @param element: An C{IRenderable} which will be rendered. + @param doctype: A C{bytes} which will be written as the first line of + the request, or L{None} to disable writing of a doctype. The C{string} + should not include a trailing newline and will default to the HTML5 + doctype C{'<!DOCTYPE html>'}. + + @returns: NOT_DONE_YET + + @since: 12.1 + """ + if doctype is not None: + request.write(doctype) + request.write(b'\n') + + if _failElement is None: + _failElement = twisted.web.util.FailureElement + + d = flatten(request, element, request.write) + + def eb(failure): + _moduleLog.failure( + "An error occurred while rendering the response.", + failure=failure + ) + if request.site.displayTracebacks: + return flatten(request, _failElement(failure), + request.write).encode('utf8') + else: + request.write( + (b'<div style="font-size:800%;' + b'background-color:#FFF;' + b'color:#F00' + b'">An error occurred while rendering the response.</div>')) + + d.addErrback(eb) + d.addBoth(lambda _: request.finish()) + return NOT_DONE_YET + + + +from twisted.web._element import Element, renderer +from twisted.web._flatten import flatten, flattenString +import twisted.web.util diff --git a/contrib/python/Twisted/py2/twisted/web/test/requesthelper.py b/contrib/python/Twisted/py2/twisted/web/test/requesthelper.py new file mode 100644 index 0000000000..7e16477ce3 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/test/requesthelper.py @@ -0,0 +1,486 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +Helpers related to HTTP requests, used by tests. +""" + +from __future__ import division, absolute_import + +__all__ = ['DummyChannel', 'DummyRequest'] + +from io import BytesIO + +from zope.interface import implementer, verify + +from twisted.python.compat import intToBytes +from twisted.python.deprecate import deprecated +from incremental import Version +from twisted.internet.defer import Deferred +from twisted.internet.address import IPv4Address, IPv6Address +from twisted.internet.interfaces import ISSLTransport, IAddress + +from twisted.trial import unittest + +from twisted.web.http_headers import Headers +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET, Session, Site +from twisted.web._responses import FOUND + + + +textLinearWhitespaceComponents = [ + u"Foo%sbar" % (lw,) for lw in + [u'\r', u'\n', u'\r\n'] +] + +sanitizedText = "Foo bar" +bytesLinearWhitespaceComponents = [ + component.encode('ascii') for component in + textLinearWhitespaceComponents +] +sanitizedBytes = sanitizedText.encode('ascii') + + + +@implementer(IAddress) +class NullAddress(object): + """ + A null implementation of L{IAddress}. + """ + + + +class DummyChannel: + class TCP: + port = 80 + disconnected = False + + def __init__(self, peer=None): + if peer is None: + peer = IPv4Address("TCP", '192.168.1.1', 12344) + self._peer = peer + self.written = BytesIO() + self.producers = [] + + def getPeer(self): + return self._peer + + def write(self, data): + if not isinstance(data, bytes): + raise TypeError("Can only write bytes to a transport, not %r" % (data,)) + self.written.write(data) + + def writeSequence(self, iovec): + for data in iovec: + self.write(data) + + def getHost(self): + return IPv4Address("TCP", '10.0.0.1', self.port) + + def registerProducer(self, producer, streaming): + self.producers.append((producer, streaming)) + + def unregisterProducer(self): + pass + + def loseConnection(self): + self.disconnected = True + + + @implementer(ISSLTransport) + class SSL(TCP): + pass + + site = Site(Resource()) + + def __init__(self, peer=None): + self.transport = self.TCP(peer) + + + def requestDone(self, request): + pass + + + def writeHeaders(self, version, code, reason, headers): + response_line = version + b" " + code + b" " + reason + b"\r\n" + headerSequence = [response_line] + headerSequence.extend( + name + b': ' + value + b"\r\n" for name, value in headers + ) + headerSequence.append(b"\r\n") + self.transport.writeSequence(headerSequence) + + + def getPeer(self): + return self.transport.getPeer() + + + def getHost(self): + return self.transport.getHost() + + + def registerProducer(self, producer, streaming): + self.transport.registerProducer(producer, streaming) + + + def unregisterProducer(self): + self.transport.unregisterProducer() + + + def write(self, data): + self.transport.write(data) + + + def writeSequence(self, iovec): + self.transport.writeSequence(iovec) + + + def loseConnection(self): + self.transport.loseConnection() + + + def endRequest(self): + pass + + + def isSecure(self): + return isinstance(self.transport, self.SSL) + + + +class DummyRequest(object): + """ + Represents a dummy or fake request. See L{twisted.web.server.Request}. + + @ivar _finishedDeferreds: L{None} or a C{list} of L{Deferreds} which will + be called back with L{None} when C{finish} is called or which will be + errbacked if C{processingFailed} is called. + + @type requestheaders: C{Headers} + @ivar requestheaders: A Headers instance that stores values for all request + headers. + + @type responseHeaders: C{Headers} + @ivar responseHeaders: A Headers instance that stores values for all + response headers. + + @type responseCode: C{int} + @ivar responseCode: The response code which was passed to + C{setResponseCode}. + + @type written: C{list} of C{bytes} + @ivar written: The bytes which have been written to the request. + """ + uri = b'http://dummy/' + method = b'GET' + client = None + + + def registerProducer(self, prod, s): + """ + Call an L{IPullProducer}'s C{resumeProducing} method in a + loop until it unregisters itself. + + @param prod: The producer. + @type prod: L{IPullProducer} + + @param s: Whether or not the producer is streaming. + """ + # XXX: Handle IPushProducers + self.go = 1 + while self.go: + prod.resumeProducing() + + + def unregisterProducer(self): + self.go = 0 + + + def __init__(self, postpath, session=None, client=None): + self.sitepath = [] + self.written = [] + self.finished = 0 + self.postpath = postpath + self.prepath = [] + self.session = None + self.protoSession = session or Session(0, self) + self.args = {} + self.requestHeaders = Headers() + self.responseHeaders = Headers() + self.responseCode = None + self._finishedDeferreds = [] + self._serverName = b"dummy" + self.clientproto = b"HTTP/1.0" + + + def getAllHeaders(self): + """ + Return dictionary mapping the names of all received headers to the last + value received for each. + + Since this method does not return all header information, + C{self.requestHeaders.getAllRawHeaders()} may be preferred. + + NOTE: This function is a direct copy of + C{twisted.web.http.Request.getAllRawHeaders}. + """ + headers = {} + for k, v in self.requestHeaders.getAllRawHeaders(): + headers[k.lower()] = v[-1] + return headers + + + def getHeader(self, name): + """ + Retrieve the value of a request header. + + @type name: C{bytes} + @param name: The name of the request header for which to retrieve the + value. Header names are compared case-insensitively. + + @rtype: C{bytes} or L{None} + @return: The value of the specified request header. + """ + return self.requestHeaders.getRawHeaders(name.lower(), [None])[0] + + + def setHeader(self, name, value): + """TODO: make this assert on write() if the header is content-length + """ + self.responseHeaders.addRawHeader(name, value) + + + def getSession(self): + if self.session: + return self.session + assert not self.written, "Session cannot be requested after data has been written." + self.session = self.protoSession + return self.session + + + def render(self, resource): + """ + Render the given resource as a response to this request. + + This implementation only handles a few of the most common behaviors of + resources. It can handle a render method that returns a string or + C{NOT_DONE_YET}. It doesn't know anything about the semantics of + request methods (eg HEAD) nor how to set any particular headers. + Basically, it's largely broken, but sufficient for some tests at least. + It should B{not} be expanded to do all the same stuff L{Request} does. + Instead, L{DummyRequest} should be phased out and L{Request} (or some + other real code factored in a different way) used. + """ + result = resource.render(self) + if result is NOT_DONE_YET: + return + self.write(result) + self.finish() + + + def write(self, data): + if not isinstance(data, bytes): + raise TypeError("write() only accepts bytes") + self.written.append(data) + + + def notifyFinish(self): + """ + Return a L{Deferred} which is called back with L{None} when the request + is finished. This will probably only work if you haven't called + C{finish} yet. + """ + finished = Deferred() + self._finishedDeferreds.append(finished) + return finished + + + def finish(self): + """ + Record that the request is finished and callback and L{Deferred}s + waiting for notification of this. + """ + self.finished = self.finished + 1 + if self._finishedDeferreds is not None: + observers = self._finishedDeferreds + self._finishedDeferreds = None + for obs in observers: + obs.callback(None) + + + def processingFailed(self, reason): + """ + Errback and L{Deferreds} waiting for finish notification. + """ + if self._finishedDeferreds is not None: + observers = self._finishedDeferreds + self._finishedDeferreds = None + for obs in observers: + obs.errback(reason) + + + def addArg(self, name, value): + self.args[name] = [value] + + + def setResponseCode(self, code, message=None): + """ + Set the HTTP status response code, but takes care that this is called + before any data is written. + """ + assert not self.written, "Response code cannot be set after data has been written: %s." % "@@@@".join(self.written) + self.responseCode = code + self.responseMessage = message + + + def setLastModified(self, when): + assert not self.written, "Last-Modified cannot be set after data has been written: %s." % "@@@@".join(self.written) + + + def setETag(self, tag): + assert not self.written, "ETag cannot be set after data has been written: %s." % "@@@@".join(self.written) + + + def getClientIP(self): + """ + Return the IPv4 address of the client which made this request, if there + is one, otherwise L{None}. + """ + if isinstance(self.client, (IPv4Address, IPv6Address)): + return self.client.host + return None + + + def getClientAddress(self): + """ + Return the L{IAddress} of the client that made this request. + + @return: an address. + @rtype: an L{IAddress} provider. + """ + if self.client is None: + return NullAddress() + return self.client + + + def getRequestHostname(self): + """ + Get a dummy hostname associated to the HTTP request. + + @rtype: C{bytes} + @returns: a dummy hostname + """ + return self._serverName + + + def getHost(self): + """ + Get a dummy transport's host. + + @rtype: C{IPv4Address} + @returns: a dummy transport's host + """ + return IPv4Address('TCP', '127.0.0.1', 80) + + + def setHost(self, host, port, ssl=0): + """ + Change the host and port the request thinks it's using. + + @type host: C{bytes} + @param host: The value to which to change the host header. + + @type ssl: C{bool} + @param ssl: A flag which, if C{True}, indicates that the request is + considered secure (if C{True}, L{isSecure} will return C{True}). + """ + self._forceSSL = ssl # set first so isSecure will work + if self.isSecure(): + default = 443 + else: + default = 80 + if port == default: + hostHeader = host + else: + hostHeader = host + b":" + intToBytes(port) + self.requestHeaders.addRawHeader(b"host", hostHeader) + + + def redirect(self, url): + """ + Utility function that does a redirect. + + The request should have finish() called after this. + """ + self.setResponseCode(FOUND) + self.setHeader(b"location", url) + + + +DummyRequest.getClientIP = deprecated( + Version('Twisted', 18, 4, 0), + replacement="getClientAddress", +)(DummyRequest.getClientIP) + + + +class DummyRequestTests(unittest.SynchronousTestCase): + """ + Tests for L{DummyRequest}. + """ + + def test_getClientIPDeprecated(self): + """ + L{DummyRequest.getClientIP} is deprecated in favor of + L{DummyRequest.getClientAddress} + """ + + request = DummyRequest([]) + request.getClientIP() + + warnings = self.flushWarnings( + offendingFunctions=[self.test_getClientIPDeprecated]) + + self.assertEqual(1, len(warnings)) + [warning] = warnings + self.assertEqual(warning.get("category"), DeprecationWarning) + self.assertEqual( + warning.get("message"), + ("twisted.web.test.requesthelper.DummyRequest.getClientIP " + "was deprecated in Twisted 18.4.0; " + "please use getClientAddress instead"), + ) + + + def test_getClientIPSupportsIPv6(self): + """ + L{DummyRequest.getClientIP} supports IPv6 addresses, just like + L{twisted.web.http.Request.getClientIP}. + """ + request = DummyRequest([]) + client = IPv6Address("TCP", "::1", 12345) + request.client = client + + self.assertEqual("::1", request.getClientIP()) + + + def test_getClientAddressWithoutClient(self): + """ + L{DummyRequest.getClientAddress} returns an L{IAddress} + provider no C{client} has been set. + """ + request = DummyRequest([]) + null = request.getClientAddress() + verify.verifyObject(IAddress, null) + + + def test_getClientAddress(self): + """ + L{DummyRequest.getClientAddress} returns the C{client}. + """ + request = DummyRequest([]) + client = IPv4Address("TCP", "127.0.0.1", 12345) + request.client = client + address = request.getClientAddress() + self.assertIs(address, client) diff --git a/contrib/python/Twisted/py2/twisted/web/twcgi.py b/contrib/python/Twisted/py2/twisted/web/twcgi.py new file mode 100644 index 0000000000..1c92960cfc --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/twcgi.py @@ -0,0 +1,321 @@ +# -*- test-case-name: twisted.web.test.test_cgi -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +I hold resource classes and helper classes that deal with CGI scripts. +""" + +# System Imports +import os +import urllib + +# Twisted Imports +from twisted.internet import protocol +from twisted.logger import Logger +from twisted.python import filepath +from twisted.spread import pb +from twisted.web import http, resource, server, static + + +class CGIDirectory(resource.Resource, filepath.FilePath): + def __init__(self, pathname): + resource.Resource.__init__(self) + filepath.FilePath.__init__(self, pathname) + + + def getChild(self, path, request): + fnp = self.child(path) + if not fnp.exists(): + return static.File.childNotFound + elif fnp.isdir(): + return CGIDirectory(fnp.path) + else: + return CGIScript(fnp.path) + return resource.NoResource() + + + def render(self, request): + notFound = resource.NoResource( + "CGI directories do not support directory listing.") + return notFound.render(request) + + + +class CGIScript(resource.Resource): + """ + L{CGIScript} is a resource which runs child processes according to the CGI + specification. + + The implementation is complex due to the fact that it requires asynchronous + IPC with an external process with an unpleasant protocol. + """ + isLeaf = 1 + def __init__(self, filename, registry=None, reactor=None): + """ + Initialize, with the name of a CGI script file. + """ + self.filename = filename + if reactor is None: + # This installs a default reactor, if None was installed before. + # We do a late import here, so that importing the current module + # won't directly trigger installing a default reactor. + from twisted.internet import reactor + self._reactor = reactor + + + def render(self, request): + """ + Do various things to conform to the CGI specification. + + I will set up the usual slew of environment variables, then spin off a + process. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + """ + scriptName = b"/" + b"/".join(request.prepath) + serverName = request.getRequestHostname().split(b':')[0] + env = {"SERVER_SOFTWARE": server.version, + "SERVER_NAME": serverName, + "GATEWAY_INTERFACE": "CGI/1.1", + "SERVER_PROTOCOL": request.clientproto, + "SERVER_PORT": str(request.getHost().port), + "REQUEST_METHOD": request.method, + "SCRIPT_NAME": scriptName, + "SCRIPT_FILENAME": self.filename, + "REQUEST_URI": request.uri} + + ip = request.getClientAddress().host + if ip is not None: + env['REMOTE_ADDR'] = ip + pp = request.postpath + if pp: + env["PATH_INFO"] = "/" + "/".join(pp) + + if hasattr(request, "content"): + # 'request.content' is either a StringIO or a TemporaryFile, and + # the file pointer is sitting at the beginning (seek(0,0)) + request.content.seek(0, 2) + length = request.content.tell() + request.content.seek(0, 0) + env['CONTENT_LENGTH'] = str(length) + + try: + qindex = request.uri.index(b'?') + except ValueError: + env['QUERY_STRING'] = '' + qargs = [] + else: + qs = env['QUERY_STRING'] = request.uri[qindex+1:] + if '=' in qs: + qargs = [] + else: + qargs = [urllib.unquote(x) for x in qs.split('+')] + + # Propagate HTTP headers + for title, header in request.getAllHeaders().items(): + envname = title.replace(b'-', b'_').upper() + if title not in (b'content-type', b'content-length', b'proxy'): + envname = b"HTTP_" + envname + env[envname] = header + # Propagate our environment + for key, value in os.environ.items(): + if key not in env: + env[key] = value + # And they're off! + self.runProcess(env, request, qargs) + return server.NOT_DONE_YET + + + def runProcess(self, env, request, qargs=[]): + """ + Run the cgi script. + + @type env: A L{dict} of L{str}, or L{None} + @param env: The environment variables to pass to the process that will + get spawned. See + L{twisted.internet.interfaces.IReactorProcess.spawnProcess} for + more information about environments and process creation. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + + @type qargs: A L{list} of L{str} + @param qargs: The command line arguments to pass to the process that + will get spawned. + """ + p = CGIProcessProtocol(request) + self._reactor.spawnProcess(p, self.filename, [self.filename] + qargs, + env, os.path.dirname(self.filename)) + + + +class FilteredScript(CGIScript): + """ + I am a special version of a CGI script, that uses a specific executable. + + This is useful for interfacing with other scripting languages that adhere + to the CGI standard. My C{filter} attribute specifies what executable to + run, and my C{filename} init parameter describes which script to pass to + the first argument of that script. + + To customize me for a particular location of a CGI interpreter, override + C{filter}. + + @type filter: L{str} + @ivar filter: The absolute path to the executable. + """ + + filter = '/usr/bin/cat' + + + def runProcess(self, env, request, qargs=[]): + """ + Run a script through the C{filter} executable. + + @type env: A L{dict} of L{str}, or L{None} + @param env: The environment variables to pass to the process that will + get spawned. See + L{twisted.internet.interfaces.IReactorProcess.spawnProcess} + for more information about environments and process creation. + + @type request: L{twisted.web.http.Request} + @param request: An HTTP request. + + @type qargs: A L{list} of L{str} + @param qargs: The command line arguments to pass to the process that + will get spawned. + """ + p = CGIProcessProtocol(request) + self._reactor.spawnProcess(p, self.filter, + [self.filter, self.filename] + qargs, env, + os.path.dirname(self.filename)) + + + +class CGIProcessProtocol(protocol.ProcessProtocol, pb.Viewable): + handling_headers = 1 + headers_written = 0 + headertext = b'' + errortext = b'' + _log = Logger() + + # Remotely relay producer interface. + + def view_resumeProducing(self, issuer): + self.resumeProducing() + + + def view_pauseProducing(self, issuer): + self.pauseProducing() + + + def view_stopProducing(self, issuer): + self.stopProducing() + + + def resumeProducing(self): + self.transport.resumeProducing() + + + def pauseProducing(self): + self.transport.pauseProducing() + + + def stopProducing(self): + self.transport.loseConnection() + + + def __init__(self, request): + self.request = request + + + def connectionMade(self): + self.request.registerProducer(self, 1) + self.request.content.seek(0, 0) + content = self.request.content.read() + if content: + self.transport.write(content) + self.transport.closeStdin() + + + def errReceived(self, error): + self.errortext = self.errortext + error + + + def outReceived(self, output): + """ + Handle a chunk of input + """ + # First, make sure that the headers from the script are sorted + # out (we'll want to do some parsing on these later.) + if self.handling_headers: + text = self.headertext + output + headerEnds = [] + for delimiter in b'\n\n', b'\r\n\r\n', b'\r\r', b'\n\r\n': + headerend = text.find(delimiter) + if headerend != -1: + headerEnds.append((headerend, delimiter)) + if headerEnds: + # The script is entirely in control of response headers; + # disable the default Content-Type value normally provided by + # twisted.web.server.Request. + self.request.defaultContentType = None + + headerEnds.sort() + headerend, delimiter = headerEnds[0] + self.headertext = text[:headerend] + # This is a final version of the header text. + linebreak = delimiter[:len(delimiter)//2] + headers = self.headertext.split(linebreak) + for header in headers: + br = header.find(b': ') + if br == -1: + self._log.error( + 'ignoring malformed CGI header: {header!r}', + header=header) + else: + headerName = header[:br].lower() + headerText = header[br+2:] + if headerName == b'location': + self.request.setResponseCode(http.FOUND) + if headerName == b'status': + try: + # "XXX <description>" sometimes happens. + statusNum = int(headerText[:3]) + except: + self._log.error("malformed status header") + else: + self.request.setResponseCode(statusNum) + else: + # Don't allow the application to control + # these required headers. + if headerName.lower() not in (b'server', b'date'): + self.request.responseHeaders.addRawHeader( + headerName, headerText) + output = text[headerend+len(delimiter):] + self.handling_headers = 0 + if self.handling_headers: + self.headertext = text + if not self.handling_headers: + self.request.write(output) + + + def processEnded(self, reason): + if reason.value.exitCode != 0: + self._log.error("CGI {uri} exited with exit code {exitCode}", + uri=self.request.uri, exitCode=reason.value.exitCode) + if self.errortext: + self._log.error("Errors from CGI {uri}: {errorText}", + uri=self.request.uri, errorText=self.errortext) + if self.handling_headers: + self._log.error("Premature end of headers in {uri}: {headerText}", + uri=self.request.uri, headerText=self.headertext) + self.request.write( + resource.ErrorPage(http.INTERNAL_SERVER_ERROR, + "CGI Script Error", + "Premature end of script headers.").render(self.request)) + self.request.unregisterProducer() + self.request.finish() diff --git a/contrib/python/Twisted/py2/twisted/web/util.py b/contrib/python/Twisted/py2/twisted/web/util.py new file mode 100644 index 0000000000..3fffac1eac --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/util.py @@ -0,0 +1,443 @@ +# -*- test-case-name: twisted.web.test.test_util -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An assortment of web server-related utilities. +""" + +from __future__ import division, absolute_import + +import linecache + +from twisted.python import urlpath +from twisted.python.compat import _PY3, unicode, nativeString, escape +from twisted.python.reflect import fullyQualifiedName + +from twisted.web import resource + +from twisted.web.template import TagLoader, XMLString, Element, renderer +from twisted.web.template import flattenString + + + +def _PRE(text): + """ + Wraps <pre> tags around some text and HTML-escape it. + + This is here since once twisted.web.html was deprecated it was hard to + migrate the html.PRE from current code to twisted.web.template. + + For new code consider using twisted.web.template. + + @return: Escaped text wrapped in <pre> tags. + @rtype: C{str} + """ + return '<pre>%s</pre>' % (escape(text),) + + + +def redirectTo(URL, request): + """ + Generate a redirect to the given location. + + @param URL: A L{bytes} giving the location to which to redirect. + @type URL: L{bytes} + + @param request: The request object to use to generate the redirect. + @type request: L{IRequest<twisted.web.iweb.IRequest>} provider + + @raise TypeError: If the type of C{URL} a L{unicode} instead of L{bytes}. + + @return: A C{bytes} containing HTML which tries to convince the client agent + to visit the new location even if it doesn't respect the I{FOUND} + response code. This is intended to be returned from a render method, + eg:: + + def render_GET(self, request): + return redirectTo(b"http://example.com/", request) + """ + if isinstance(URL, unicode) : + raise TypeError("Unicode object not allowed as URL") + request.setHeader(b"Content-Type", b"text/html; charset=utf-8") + request.redirect(URL) + content = """ +<html> + <head> + <meta http-equiv=\"refresh\" content=\"0;URL=%(url)s\"> + </head> + <body bgcolor=\"#FFFFFF\" text=\"#000000\"> + <a href=\"%(url)s\">click here</a> + </body> +</html> +""" % {'url': nativeString(URL)} + if _PY3: + content = content.encode("utf8") + return content + + +class Redirect(resource.Resource): + isLeaf = True + + def __init__(self, url): + resource.Resource.__init__(self) + self.url = url + + def render(self, request): + return redirectTo(self.url, request) + + def getChild(self, name, request): + return self + + +class ChildRedirector(Redirect): + isLeaf = 0 + def __init__(self, url): + # XXX is this enough? + if ((url.find('://') == -1) + and (not url.startswith('..')) + and (not url.startswith('/'))): + raise ValueError("It seems you've given me a redirect (%s) that is a child of myself! That's not good, it'll cause an infinite redirect." % url) + Redirect.__init__(self, url) + + def getChild(self, name, request): + newUrl = self.url + if not newUrl.endswith('/'): + newUrl += '/' + newUrl += name + return ChildRedirector(newUrl) + + +class ParentRedirect(resource.Resource): + """ + I redirect to URLPath.here(). + """ + isLeaf = 1 + def render(self, request): + return redirectTo(urlpath.URLPath.fromRequest(request).here(), request) + + def getChild(self, request): + return self + + +class DeferredResource(resource.Resource): + """ + I wrap up a Deferred that will eventually result in a Resource + object. + """ + isLeaf = 1 + + def __init__(self, d): + resource.Resource.__init__(self) + self.d = d + + def getChild(self, name, request): + return self + + def render(self, request): + self.d.addCallback(self._cbChild, request).addErrback( + self._ebChild,request) + from twisted.web.server import NOT_DONE_YET + return NOT_DONE_YET + + def _cbChild(self, child, request): + request.render(resource.getChildForRequest(child, request)) + + def _ebChild(self, reason, request): + request.processingFailed(reason) + + + +class _SourceLineElement(Element): + """ + L{_SourceLineElement} is an L{IRenderable} which can render a single line of + source code. + + @ivar number: A C{int} giving the line number of the source code to be + rendered. + @ivar source: A C{str} giving the source code to be rendered. + """ + def __init__(self, loader, number, source): + Element.__init__(self, loader) + self.number = number + self.source = source + + + @renderer + def sourceLine(self, request, tag): + """ + Render the line of source as a child of C{tag}. + """ + return tag(self.source.replace(' ', u' \N{NO-BREAK SPACE}')) + + + @renderer + def lineNumber(self, request, tag): + """ + Render the line number as a child of C{tag}. + """ + return tag(str(self.number)) + + + +class _SourceFragmentElement(Element): + """ + L{_SourceFragmentElement} is an L{IRenderable} which can render several lines + of source code near the line number of a particular frame object. + + @ivar frame: A L{Failure<twisted.python.failure.Failure>}-style frame object + for which to load a source line to render. This is really a tuple + holding some information from a frame object. See + L{Failure.frames<twisted.python.failure.Failure>} for specifics. + """ + def __init__(self, loader, frame): + Element.__init__(self, loader) + self.frame = frame + + + def _getSourceLines(self): + """ + Find the source line references by C{self.frame} and yield, in source + line order, it and the previous and following lines. + + @return: A generator which yields two-tuples. Each tuple gives a source + line number and the contents of that source line. + """ + filename = self.frame[1] + lineNumber = self.frame[2] + for snipLineNumber in range(lineNumber - 1, lineNumber + 2): + yield (snipLineNumber, + linecache.getline(filename, snipLineNumber).rstrip()) + + + @renderer + def sourceLines(self, request, tag): + """ + Render the source line indicated by C{self.frame} and several + surrounding lines. The active line will be given a I{class} of + C{"snippetHighlightLine"}. Other lines will be given a I{class} of + C{"snippetLine"}. + """ + for (lineNumber, sourceLine) in self._getSourceLines(): + newTag = tag.clone() + if lineNumber == self.frame[2]: + cssClass = "snippetHighlightLine" + else: + cssClass = "snippetLine" + loader = TagLoader(newTag(**{"class": cssClass})) + yield _SourceLineElement(loader, lineNumber, sourceLine) + + + +class _FrameElement(Element): + """ + L{_FrameElement} is an L{IRenderable} which can render details about one + frame from a L{Failure<twisted.python.failure.Failure>}. + + @ivar frame: A L{Failure<twisted.python.failure.Failure>}-style frame object + for which to load a source line to render. This is really a tuple + holding some information from a frame object. See + L{Failure.frames<twisted.python.failure.Failure>} for specifics. + """ + def __init__(self, loader, frame): + Element.__init__(self, loader) + self.frame = frame + + + @renderer + def filename(self, request, tag): + """ + Render the name of the file this frame references as a child of C{tag}. + """ + return tag(self.frame[1]) + + + @renderer + def lineNumber(self, request, tag): + """ + Render the source line number this frame references as a child of + C{tag}. + """ + return tag(str(self.frame[2])) + + + @renderer + def function(self, request, tag): + """ + Render the function name this frame references as a child of C{tag}. + """ + return tag(self.frame[0]) + + + @renderer + def source(self, request, tag): + """ + Render the source code surrounding the line this frame references, + replacing C{tag}. + """ + return _SourceFragmentElement(TagLoader(tag), self.frame) + + + +class _StackElement(Element): + """ + L{_StackElement} renders an L{IRenderable} which can render a list of frames. + """ + def __init__(self, loader, stackFrames): + Element.__init__(self, loader) + self.stackFrames = stackFrames + + + @renderer + def frames(self, request, tag): + """ + Render the list of frames in this L{_StackElement}, replacing C{tag}. + """ + return [ + _FrameElement(TagLoader(tag.clone()), frame) + for frame + in self.stackFrames] + + + +class FailureElement(Element): + """ + L{FailureElement} is an L{IRenderable} which can render detailed information + about a L{Failure<twisted.python.failure.Failure>}. + + @ivar failure: The L{Failure<twisted.python.failure.Failure>} instance which + will be rendered. + + @since: 12.1 + """ + loader = XMLString(""" +<div xmlns:t="http://twistedmatrix.com/ns/twisted.web.template/0.1"> + <style type="text/css"> + div.error { + color: red; + font-family: Verdana, Arial, helvetica, sans-serif; + font-weight: bold; + } + + div { + font-family: Verdana, Arial, helvetica, sans-serif; + } + + div.stackTrace { + } + + div.frame { + padding: 1em; + background: white; + border-bottom: thin black dashed; + } + + div.frame:first-child { + padding: 1em; + background: white; + border-top: thin black dashed; + border-bottom: thin black dashed; + } + + div.location { + } + + span.function { + font-weight: bold; + font-family: "Courier New", courier, monospace; + } + + div.snippet { + margin-bottom: 0.5em; + margin-left: 1em; + background: #FFFFDD; + } + + div.snippetHighlightLine { + color: red; + } + + span.code { + font-family: "Courier New", courier, monospace; + } + </style> + + <div class="error"> + <span t:render="type" />: <span t:render="value" /> + </div> + <div class="stackTrace" t:render="traceback"> + <div class="frame" t:render="frames"> + <div class="location"> + <span t:render="filename" />:<span t:render="lineNumber" /> in + <span class="function" t:render="function" /> + </div> + <div class="snippet" t:render="source"> + <div t:render="sourceLines"> + <span class="lineno" t:render="lineNumber" /> + <code class="code" t:render="sourceLine" /> + </div> + </div> + </div> + </div> + <div class="error"> + <span t:render="type" />: <span t:render="value" /> + </div> +</div> +""") + + def __init__(self, failure, loader=None): + Element.__init__(self, loader) + self.failure = failure + + + @renderer + def type(self, request, tag): + """ + Render the exception type as a child of C{tag}. + """ + return tag(fullyQualifiedName(self.failure.type)) + + + @renderer + def value(self, request, tag): + """ + Render the exception value as a child of C{tag}. + """ + return tag(unicode(self.failure.value).encode('utf8')) + + + @renderer + def traceback(self, request, tag): + """ + Render all the frames in the wrapped + L{Failure<twisted.python.failure.Failure>}'s traceback stack, replacing + C{tag}. + """ + return _StackElement(TagLoader(tag), self.failure.frames) + + + +def formatFailure(myFailure): + """ + Construct an HTML representation of the given failure. + + Consider using L{FailureElement} instead. + + @type myFailure: L{Failure<twisted.python.failure.Failure>} + + @rtype: C{bytes} + @return: A string containing the HTML representation of the given failure. + """ + result = [] + flattenString(None, FailureElement(myFailure)).addBoth(result.append) + if isinstance(result[0], bytes): + # Ensure the result string is all ASCII, for compatibility with the + # default encoding expected by browsers. + return result[0].decode('utf-8').encode('ascii', 'xmlcharrefreplace') + result[0].raiseException() + + + +__all__ = [ + "redirectTo", "Redirect", "ChildRedirector", "ParentRedirect", + "DeferredResource", "FailureElement", "formatFailure"] diff --git a/contrib/python/Twisted/py2/twisted/web/vhost.py b/contrib/python/Twisted/py2/twisted/web/vhost.py new file mode 100644 index 0000000000..3751b768ae --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/vhost.py @@ -0,0 +1,138 @@ +# -*- test-case-name: twisted.web. +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +I am a virtual hosts implementation. +""" + +from __future__ import division, absolute_import + +# Twisted Imports +from twisted.python import roots +from twisted.web import resource + + +class VirtualHostCollection(roots.Homogenous): + """Wrapper for virtual hosts collection. + + This exists for configuration purposes. + """ + entityType = resource.Resource + + def __init__(self, nvh): + self.nvh = nvh + + def listStaticEntities(self): + return self.nvh.hosts.items() + + def getStaticEntity(self, name): + return self.nvh.hosts.get(self) + + def reallyPutEntity(self, name, entity): + self.nvh.addHost(name, entity) + + def delEntity(self, name): + self.nvh.removeHost(name) + + +class NameVirtualHost(resource.Resource): + """I am a resource which represents named virtual hosts. + """ + + default = None + + def __init__(self): + """Initialize. + """ + resource.Resource.__init__(self) + self.hosts = {} + + def listStaticEntities(self): + return resource.Resource.listStaticEntities(self) + [("Virtual Hosts", VirtualHostCollection(self))] + + def getStaticEntity(self, name): + if name == "Virtual Hosts": + return VirtualHostCollection(self) + else: + return resource.Resource.getStaticEntity(self, name) + + def addHost(self, name, resrc): + """Add a host to this virtual host. + + This will take a host named `name', and map it to a resource + `resrc'. For example, a setup for our virtual hosts would be:: + + nvh.addHost('divunal.com', divunalDirectory) + nvh.addHost('www.divunal.com', divunalDirectory) + nvh.addHost('twistedmatrix.com', twistedMatrixDirectory) + nvh.addHost('www.twistedmatrix.com', twistedMatrixDirectory) + """ + self.hosts[name] = resrc + + def removeHost(self, name): + """Remove a host.""" + del self.hosts[name] + + def _getResourceForRequest(self, request): + """(Internal) Get the appropriate resource for the given host. + """ + hostHeader = request.getHeader(b'host') + if hostHeader == None: + return self.default or resource.NoResource() + else: + host = hostHeader.lower().split(b':', 1)[0] + return (self.hosts.get(host, self.default) + or resource.NoResource("host %s not in vhost map" % repr(host))) + + def render(self, request): + """Implementation of resource.Resource's render method. + """ + resrc = self._getResourceForRequest(request) + return resrc.render(request) + + def getChild(self, path, request): + """Implementation of resource.Resource's getChild method. + """ + resrc = self._getResourceForRequest(request) + if resrc.isLeaf: + request.postpath.insert(0,request.prepath.pop(-1)) + return resrc + else: + return resrc.getChildWithDefault(path, request) + +class _HostResource(resource.Resource): + + def getChild(self, path, request): + if b':' in path: + host, port = path.split(b':', 1) + port = int(port) + else: + host, port = path, 80 + request.setHost(host, port) + prefixLen = (3 + request.isSecure() + 4 + len(path) + + len(request.prepath[-3])) + request.path = b'/' + b'/'.join(request.postpath) + request.uri = request.uri[prefixLen:] + del request.prepath[:3] + return request.site.getResourceFor(request) + + +class VHostMonsterResource(resource.Resource): + + """ + Use this to be able to record the hostname and method (http vs. https) + in the URL without disturbing your web site. If you put this resource + in a URL http://foo.com/bar then requests to + http://foo.com/bar/http/baz.com/something will be equivalent to + http://foo.com/something, except that the hostname the request will + appear to be accessing will be "baz.com". So if "baz.com" is redirecting + all requests for to foo.com, while foo.com is inaccessible from the outside, + then redirect and url generation will work correctly + """ + def getChild(self, path, request): + if path == b'http': + request.isSecure = lambda: 0 + elif path == b'https': + request.isSecure = lambda: 1 + return _HostResource() diff --git a/contrib/python/Twisted/py2/twisted/web/wsgi.py b/contrib/python/Twisted/py2/twisted/web/wsgi.py new file mode 100644 index 0000000000..311050f233 --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/wsgi.py @@ -0,0 +1,596 @@ +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +An implementation of +U{Python Web Server Gateway Interface v1.0.1<http://www.python.org/dev/peps/pep-3333/>}. +""" + +__metaclass__ = type + +from sys import exc_info +from warnings import warn + +from zope.interface import implementer + +from twisted.internet.threads import blockingCallFromThread +from twisted.python.compat import reraise, Sequence +from twisted.python.failure import Failure +from twisted.web.resource import IResource +from twisted.web.server import NOT_DONE_YET +from twisted.web.http import INTERNAL_SERVER_ERROR +from twisted.logger import Logger + + + +# PEP-3333 -- which has superseded PEP-333 -- states that, in both Python 2 +# and Python 3, text strings MUST be represented using the platform's native +# string type, limited to characters defined in ISO-8859-1. Byte strings are +# used only for values read from wsgi.input, passed to write() or yielded by +# the application. +# +# Put another way: +# +# - In Python 2, all text strings and binary data are of type str/bytes and +# NEVER of type unicode. Whether the strings contain binary data or +# ISO-8859-1 text depends on context. +# +# - In Python 3, all text strings are of type str, and all binary data are of +# type bytes. Text MUST always be limited to that which can be encoded as +# ISO-8859-1, U+0000 to U+00FF inclusive. +# +# The following pair of functions -- _wsgiString() and _wsgiStringToBytes() -- +# are used to make Twisted's WSGI support compliant with the standard. +if str is bytes: + def _wsgiString(string): # Python 2. + """ + Convert C{string} to an ISO-8859-1 byte string, if it is not already. + + @type string: C{str}/C{bytes} or C{unicode} + @rtype: C{str}/C{bytes} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + if isinstance(string, str): + return string + else: + return string.encode('iso-8859-1') + + def _wsgiStringToBytes(string): # Python 2. + """ + Return C{string} as is; a WSGI string is a byte string in Python 2. + + @type string: C{str}/C{bytes} + @rtype: C{str}/C{bytes} + """ + return string + +else: + def _wsgiString(string): # Python 3. + """ + Convert C{string} to a WSGI "bytes-as-unicode" string. + + If it's a byte string, decode as ISO-8859-1. If it's a Unicode string, + round-trip it to bytes and back using ISO-8859-1 as the encoding. + + @type string: C{str} or C{bytes} + @rtype: C{str} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + if isinstance(string, str): + return string.encode("iso-8859-1").decode('iso-8859-1') + else: + return string.decode("iso-8859-1") + + def _wsgiStringToBytes(string): # Python 3. + """ + Convert C{string} from a WSGI "bytes-as-unicode" string to an + ISO-8859-1 byte string. + + @type string: C{str} + @rtype: C{bytes} + + @raise UnicodeEncodeError: If C{string} contains non-ISO-8859-1 chars. + """ + return string.encode("iso-8859-1") + + + +class _ErrorStream: + """ + File-like object instances of which are used as the value for the + C{'wsgi.errors'} key in the C{environ} dictionary passed to the application + object. + + This simply passes writes on to L{logging<twisted.logger>} system as + error events from the C{'wsgi'} system. In the future, it may be desirable + to expose more information in the events it logs, such as the application + object which generated the message. + """ + _log = Logger() + + def write(self, data): + """ + Generate an event for the logging system with the given bytes as the + message. + + This is called in a WSGI application thread, not the I/O thread. + + @type data: str + + @raise TypeError: On Python 3, if C{data} is not a native string. On + Python 2 a warning will be issued. + """ + if not isinstance(data, str): + if str is bytes: + warn("write() argument should be str, not %r (%s)" % ( + data, type(data).__name__), category=UnicodeWarning) + else: + raise TypeError( + "write() argument must be str, not %r (%s)" + % (data, type(data).__name__)) + + # Note that in old style, message was a tuple. logger._legacy + # will overwrite this value if it is not properly formatted here. + self._log.error( + data, + system='wsgi', + isError=True, + message=(data,) + ) + + + def writelines(self, iovec): + """ + Join the given lines and pass them to C{write} to be handled in the + usual way. + + This is called in a WSGI application thread, not the I/O thread. + + @param iovec: A C{list} of C{'\\n'}-terminated C{str} which will be + logged. + + @raise TypeError: On Python 3, if C{iovec} contains any non-native + strings. On Python 2 a warning will be issued. + """ + self.write(''.join(iovec)) + + + def flush(self): + """ + Nothing is buffered, so flushing does nothing. This method is required + to exist by PEP 333, though. + + This is called in a WSGI application thread, not the I/O thread. + """ + + + +class _InputStream: + """ + File-like object instances of which are used as the value for the + C{'wsgi.input'} key in the C{environ} dictionary passed to the application + object. + + This only exists to make the handling of C{readline(-1)} consistent across + different possible underlying file-like object implementations. The other + supported methods pass through directly to the wrapped object. + """ + def __init__(self, input): + """ + Initialize the instance. + + This is called in the I/O thread, not a WSGI application thread. + """ + self._wrapped = input + + + def read(self, size=None): + """ + Pass through to the underlying C{read}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Avoid passing None because cStringIO and file don't like it. + if size is None: + return self._wrapped.read() + return self._wrapped.read(size) + + + def readline(self, size=None): + """ + Pass through to the underlying C{readline}, with a size of C{-1} replaced + with a size of L{None}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Check for -1 because StringIO doesn't handle it correctly. Check for + # None because files and tempfiles don't accept that. + if size == -1 or size is None: + return self._wrapped.readline() + return self._wrapped.readline(size) + + + def readlines(self, size=None): + """ + Pass through to the underlying C{readlines}. + + This is called in a WSGI application thread, not the I/O thread. + """ + # Avoid passing None because cStringIO and file don't like it. + if size is None: + return self._wrapped.readlines() + return self._wrapped.readlines(size) + + + def __iter__(self): + """ + Pass through to the underlying C{__iter__}. + + This is called in a WSGI application thread, not the I/O thread. + """ + return iter(self._wrapped) + + + +class _WSGIResponse: + """ + Helper for L{WSGIResource} which drives the WSGI application using a + threadpool and hooks it up to the L{http.Request}. + + @ivar started: A L{bool} indicating whether or not the response status and + headers have been written to the request yet. This may only be read or + written in the WSGI application thread. + + @ivar reactor: An L{IReactorThreads} provider which is used to call methods + on the request in the I/O thread. + + @ivar threadpool: A L{ThreadPool} which is used to call the WSGI + application object in a non-I/O thread. + + @ivar application: The WSGI application object. + + @ivar request: The L{http.Request} upon which the WSGI environment is + based and to which the application's output will be sent. + + @ivar environ: The WSGI environment L{dict}. + + @ivar status: The HTTP response status L{str} supplied to the WSGI + I{start_response} callable by the application. + + @ivar headers: A list of HTTP response headers supplied to the WSGI + I{start_response} callable by the application. + + @ivar _requestFinished: A flag which indicates whether it is possible to + generate more response data or not. This is L{False} until + L{http.Request.notifyFinish} tells us the request is done, + then L{True}. + """ + + _requestFinished = False + _log = Logger() + + def __init__(self, reactor, threadpool, application, request): + self.started = False + self.reactor = reactor + self.threadpool = threadpool + self.application = application + self.request = request + self.request.notifyFinish().addBoth(self._finished) + + if request.prepath: + scriptName = b'/' + b'/'.join(request.prepath) + else: + scriptName = b'' + + if request.postpath: + pathInfo = b'/' + b'/'.join(request.postpath) + else: + pathInfo = b'' + + parts = request.uri.split(b'?', 1) + if len(parts) == 1: + queryString = b'' + else: + queryString = parts[1] + + # All keys and values need to be native strings, i.e. of type str in + # *both* Python 2 and Python 3, so says PEP-3333. + self.environ = { + 'REQUEST_METHOD': _wsgiString(request.method), + 'REMOTE_ADDR': _wsgiString(request.getClientAddress().host), + 'SCRIPT_NAME': _wsgiString(scriptName), + 'PATH_INFO': _wsgiString(pathInfo), + 'QUERY_STRING': _wsgiString(queryString), + 'CONTENT_TYPE': _wsgiString( + request.getHeader(b'content-type') or ''), + 'CONTENT_LENGTH': _wsgiString( + request.getHeader(b'content-length') or ''), + 'SERVER_NAME': _wsgiString(request.getRequestHostname()), + 'SERVER_PORT': _wsgiString(str(request.getHost().port)), + 'SERVER_PROTOCOL': _wsgiString(request.clientproto)} + + # The application object is entirely in control of response headers; + # disable the default Content-Type value normally provided by + # twisted.web.server.Request. + self.request.defaultContentType = None + + for name, values in request.requestHeaders.getAllRawHeaders(): + name = 'HTTP_' + _wsgiString(name).upper().replace('-', '_') + # It might be preferable for http.HTTPChannel to clear out + # newlines. + self.environ[name] = ','.join( + _wsgiString(v) for v in values).replace('\n', ' ') + + self.environ.update({ + 'wsgi.version': (1, 0), + 'wsgi.url_scheme': request.isSecure() and 'https' or 'http', + 'wsgi.run_once': False, + 'wsgi.multithread': True, + 'wsgi.multiprocess': False, + 'wsgi.errors': _ErrorStream(), + # Attend: request.content was owned by the I/O thread up until + # this point. By wrapping it and putting the result into the + # environment dictionary, it is effectively being given to + # another thread. This means that whatever it is, it has to be + # safe to access it from two different threads. The access + # *should* all be serialized (first the I/O thread writes to + # it, then the WSGI thread reads from it, then the I/O thread + # closes it). However, since the request is made available to + # arbitrary application code during resource traversal, it's + # possible that some other code might decide to use it in the + # I/O thread concurrently with its use in the WSGI thread. + # More likely than not, this will break. This seems like an + # unlikely possibility to me, but if it is to be allowed, + # something here needs to change. -exarkun + 'wsgi.input': _InputStream(request.content)}) + + + def _finished(self, ignored): + """ + Record the end of the response generation for the request being + serviced. + """ + self._requestFinished = True + + + def startResponse(self, status, headers, excInfo=None): + """ + The WSGI I{start_response} callable. The given values are saved until + they are needed to generate the response. + + This will be called in a non-I/O thread. + """ + if self.started and excInfo is not None: + reraise(excInfo[1], excInfo[2]) + + # PEP-3333 mandates that status should be a native string. In practice + # this is mandated by Twisted's HTTP implementation too, so we enforce + # on both Python 2 and Python 3. + if not isinstance(status, str): + raise TypeError( + "status must be str, not %r (%s)" + % (status, type(status).__name__)) + + # PEP-3333 mandates that headers should be a plain list, but in + # practice we work with any sequence type and only warn when it's not + # a plain list. + if isinstance(headers, list): + pass # This is okay. + elif isinstance(headers, Sequence): + warn("headers should be a list, not %r (%s)" % ( + headers, type(headers).__name__), category=RuntimeWarning) + else: + raise TypeError( + "headers must be a list, not %r (%s)" + % (headers, type(headers).__name__)) + + # PEP-3333 mandates that each header should be a (str, str) tuple, but + # in practice we work with any sequence type and only warn when it's + # not a plain list. + for header in headers: + if isinstance(header, tuple): + pass # This is okay. + elif isinstance(header, Sequence): + warn("header should be a (str, str) tuple, not %r (%s)" % ( + header, type(header).__name__), category=RuntimeWarning) + else: + raise TypeError( + "header must be a (str, str) tuple, not %r (%s)" + % (header, type(header).__name__)) + + # However, the sequence MUST contain only 2 elements. + if len(header) != 2: + raise TypeError( + "header must be a (str, str) tuple, not %r" + % (header, )) + + # Both elements MUST be native strings. Non-native strings will be + # rejected by the underlying HTTP machinery in any case, but we + # reject them here in order to provide a more informative error. + for elem in header: + if not isinstance(elem, str): + raise TypeError( + "header must be (str, str) tuple, not %r" + % (header, )) + + self.status = status + self.headers = headers + return self.write + + + def write(self, data): + """ + The WSGI I{write} callable returned by the I{start_response} callable. + The given bytes will be written to the response body, possibly flushing + the status and headers first. + + This will be called in a non-I/O thread. + """ + # PEP-3333 states: + # + # The server or gateway must transmit the yielded bytestrings to the + # client in an unbuffered fashion, completing the transmission of + # each bytestring before requesting another one. + # + # This write() method is used for the imperative and (indirectly) for + # the more familiar iterable-of-bytestrings WSGI mechanism. It uses + # C{blockingCallFromThread} to schedule writes. This allows exceptions + # to propagate up from the underlying HTTP implementation. However, + # that underlying implementation does not, as yet, provide any way to + # know if the written data has been transmitted, so this method + # violates the above part of PEP-3333. + # + # PEP-3333 also says that a server may: + # + # Use a different thread to ensure that the block continues to be + # transmitted while the application produces the next block. + # + # Which suggests that this is actually compliant with PEP-3333, + # because writes are done in the reactor thread. + # + # However, providing some back-pressure may nevertheless be a Good + # Thing at some point in the future. + + def wsgiWrite(started): + if not started: + self._sendResponseHeaders() + self.request.write(data) + + try: + return blockingCallFromThread( + self.reactor, wsgiWrite, self.started) + finally: + self.started = True + + + def _sendResponseHeaders(self): + """ + Set the response code and response headers on the request object, but + do not flush them. The caller is responsible for doing a write in + order for anything to actually be written out in response to the + request. + + This must be called in the I/O thread. + """ + code, message = self.status.split(None, 1) + code = int(code) + self.request.setResponseCode(code, _wsgiStringToBytes(message)) + + for name, value in self.headers: + # Don't allow the application to control these required headers. + if name.lower() not in ('server', 'date'): + self.request.responseHeaders.addRawHeader( + _wsgiStringToBytes(name), _wsgiStringToBytes(value)) + + + def start(self): + """ + Start the WSGI application in the threadpool. + + This must be called in the I/O thread. + """ + self.threadpool.callInThread(self.run) + + + def run(self): + """ + Call the WSGI application object, iterate it, and handle its output. + + This must be called in a non-I/O thread (ie, a WSGI application + thread). + """ + try: + appIterator = self.application(self.environ, self.startResponse) + for elem in appIterator: + if elem: + self.write(elem) + if self._requestFinished: + break + close = getattr(appIterator, 'close', None) + if close is not None: + close() + except: + def wsgiError(started, type, value, traceback): + self._log.failure( + "WSGI application error", + failure=Failure(value, type, traceback) + ) + if started: + self.request.loseConnection() + else: + self.request.setResponseCode(INTERNAL_SERVER_ERROR) + self.request.finish() + self.reactor.callFromThread(wsgiError, self.started, *exc_info()) + else: + def wsgiFinish(started): + if not self._requestFinished: + if not started: + self._sendResponseHeaders() + self.request.finish() + self.reactor.callFromThread(wsgiFinish, self.started) + self.started = True + + + +@implementer(IResource) +class WSGIResource: + """ + An L{IResource} implementation which delegates responsibility for all + resources hierarchically inferior to it to a WSGI application. + + @ivar _reactor: An L{IReactorThreads} provider which will be passed on to + L{_WSGIResponse} to schedule calls in the I/O thread. + + @ivar _threadpool: A L{ThreadPool} which will be passed on to + L{_WSGIResponse} to run the WSGI application object. + + @ivar _application: The WSGI application object. + """ + + # Further resource segments are left up to the WSGI application object to + # handle. + isLeaf = True + + def __init__(self, reactor, threadpool, application): + self._reactor = reactor + self._threadpool = threadpool + self._application = application + + + def render(self, request): + """ + Turn the request into the appropriate C{environ} C{dict} suitable to be + passed to the WSGI application object and then pass it on. + + The WSGI application object is given almost complete control of the + rendering process. C{NOT_DONE_YET} will always be returned in order + and response completion will be dictated by the application object, as + will the status, headers, and the response body. + """ + response = _WSGIResponse( + self._reactor, self._threadpool, self._application, request) + response.start() + return NOT_DONE_YET + + + def getChildWithDefault(self, name, request): + """ + Reject attempts to retrieve a child resource. All path segments beyond + the one which refers to this resource are handled by the WSGI + application object. + """ + raise RuntimeError("Cannot get IResource children from WSGIResource") + + + def putChild(self, path, child): + """ + Reject attempts to add a child resource to this resource. The WSGI + application object handles all path segments beneath this resource, so + L{IResource} children can never be found. + """ + raise RuntimeError("Cannot put IResource children under WSGIResource") + + +__all__ = ['WSGIResource'] diff --git a/contrib/python/Twisted/py2/twisted/web/xmlrpc.py b/contrib/python/Twisted/py2/twisted/web/xmlrpc.py new file mode 100644 index 0000000000..4a9f3e0afc --- /dev/null +++ b/contrib/python/Twisted/py2/twisted/web/xmlrpc.py @@ -0,0 +1,591 @@ +# -*- test-case-name: twisted.web.test.test_xmlrpc -*- +# Copyright (c) Twisted Matrix Laboratories. +# See LICENSE for details. + +""" +A generic resource for publishing objects via XML-RPC. + +Maintainer: Itamar Shtull-Trauring + +@var Fault: See L{xmlrpclib.Fault} +@type Fault: L{xmlrpclib.Fault} +""" + +from __future__ import division, absolute_import + +from twisted.python.compat import _PY3, intToBytes, nativeString, urllib_parse +from twisted.python.compat import unicode + +# System Imports +import base64 +if _PY3: + import xmlrpc.client as xmlrpclib +else: + import xmlrpclib + +# Sibling Imports +from twisted.web import resource, server, http +from twisted.internet import defer, protocol, reactor +from twisted.python import reflect, failure +from twisted.logger import Logger + +# These are deprecated, use the class level definitions +NOT_FOUND = 8001 +FAILURE = 8002 + + +# Useful so people don't need to import xmlrpclib directly +Fault = xmlrpclib.Fault +Binary = xmlrpclib.Binary +Boolean = xmlrpclib.Boolean +DateTime = xmlrpclib.DateTime + + +def withRequest(f): + """ + Decorator to cause the request to be passed as the first argument + to the method. + + If an I{xmlrpc_} method is wrapped with C{withRequest}, the + request object is passed as the first argument to that method. + For example:: + + @withRequest + def xmlrpc_echo(self, request, s): + return s + + @since: 10.2 + """ + f.withRequest = True + return f + + + +class NoSuchFunction(Fault): + """ + There is no function by the given name. + """ + + +class Handler: + """ + Handle a XML-RPC request and store the state for a request in progress. + + Override the run() method and return result using self.result, + a Deferred. + + We require this class since we're not using threads, so we can't + encapsulate state in a running function if we're going to have + to wait for results. + + For example, lets say we want to authenticate against twisted.cred, + run a LDAP query and then pass its result to a database query, all + as a result of a single XML-RPC command. We'd use a Handler instance + to store the state of the running command. + """ + + def __init__(self, resource, *args): + self.resource = resource # the XML-RPC resource we are connected to + self.result = defer.Deferred() + self.run(*args) + + def run(self, *args): + # event driven equivalent of 'raise UnimplementedError' + self.result.errback( + NotImplementedError("Implement run() in subclasses")) + + +class XMLRPC(resource.Resource): + """ + A resource that implements XML-RPC. + + You probably want to connect this to '/RPC2'. + + Methods published can return XML-RPC serializable results, Faults, + Binary, Boolean, DateTime, Deferreds, or Handler instances. + + By default methods beginning with 'xmlrpc_' are published. + + Sub-handlers for prefixed methods (e.g., system.listMethods) + can be added with putSubHandler. By default, prefixes are + separated with a '.'. Override self.separator to change this. + + @ivar allowNone: Permit XML translating of Python constant None. + @type allowNone: C{bool} + + @ivar useDateTime: Present C{datetime} values as C{datetime.datetime} + objects? + @type useDateTime: C{bool} + """ + + # Error codes for Twisted, if they conflict with yours then + # modify them at runtime. + NOT_FOUND = 8001 + FAILURE = 8002 + + isLeaf = 1 + separator = '.' + allowedMethods = (b'POST',) + _log = Logger() + + def __init__(self, allowNone=False, useDateTime=False): + resource.Resource.__init__(self) + self.subHandlers = {} + self.allowNone = allowNone + self.useDateTime = useDateTime + + + def __setattr__(self, name, value): + self.__dict__[name] = value + + + def putSubHandler(self, prefix, handler): + self.subHandlers[prefix] = handler + + def getSubHandler(self, prefix): + return self.subHandlers.get(prefix, None) + + def getSubHandlerPrefixes(self): + return list(self.subHandlers.keys()) + + def render_POST(self, request): + request.content.seek(0, 0) + request.setHeader(b"content-type", b"text/xml; charset=utf-8") + try: + args, functionPath = xmlrpclib.loads(request.content.read(), + use_datetime=self.useDateTime) + except Exception as e: + f = Fault(self.FAILURE, "Can't deserialize input: %s" % (e,)) + self._cbRender(f, request) + else: + try: + function = self.lookupProcedure(functionPath) + except Fault as f: + self._cbRender(f, request) + else: + # Use this list to track whether the response has failed or not. + # This will be used later on to decide if the result of the + # Deferred should be written out and Request.finish called. + responseFailed = [] + request.notifyFinish().addErrback(responseFailed.append) + if getattr(function, 'withRequest', False): + d = defer.maybeDeferred(function, request, *args) + else: + d = defer.maybeDeferred(function, *args) + d.addErrback(self._ebRender) + d.addCallback(self._cbRender, request, responseFailed) + return server.NOT_DONE_YET + + + def _cbRender(self, result, request, responseFailed=None): + if responseFailed: + return + + if isinstance(result, Handler): + result = result.result + if not isinstance(result, Fault): + result = (result,) + try: + try: + content = xmlrpclib.dumps( + result, methodresponse=True, + allow_none=self.allowNone) + except Exception as e: + f = Fault(self.FAILURE, "Can't serialize output: %s" % (e,)) + content = xmlrpclib.dumps(f, methodresponse=True, + allow_none=self.allowNone) + + if isinstance(content, unicode): + content = content.encode('utf8') + request.setHeader( + b"content-length", intToBytes(len(content))) + request.write(content) + except: + self._log.failure('') + request.finish() + + + def _ebRender(self, failure): + if isinstance(failure.value, Fault): + return failure.value + self._log.failure('', failure) + return Fault(self.FAILURE, "error") + + + def lookupProcedure(self, procedurePath): + """ + Given a string naming a procedure, return a callable object for that + procedure or raise NoSuchFunction. + + The returned object will be called, and should return the result of the + procedure, a Deferred, or a Fault instance. + + Override in subclasses if you want your own policy. The base + implementation that given C{'foo'}, C{self.xmlrpc_foo} will be returned. + If C{procedurePath} contains C{self.separator}, the sub-handler for the + initial prefix is used to search for the remaining path. + + If you override C{lookupProcedure}, you may also want to override + C{listProcedures} to accurately report the procedures supported by your + resource, so that clients using the I{system.listMethods} procedure + receive accurate results. + + @since: 11.1 + """ + if procedurePath.find(self.separator) != -1: + prefix, procedurePath = procedurePath.split(self.separator, 1) + handler = self.getSubHandler(prefix) + if handler is None: + raise NoSuchFunction(self.NOT_FOUND, + "no such subHandler %s" % prefix) + return handler.lookupProcedure(procedurePath) + + f = getattr(self, "xmlrpc_%s" % procedurePath, None) + if not f: + raise NoSuchFunction(self.NOT_FOUND, + "procedure %s not found" % procedurePath) + elif not callable(f): + raise NoSuchFunction(self.NOT_FOUND, + "procedure %s not callable" % procedurePath) + else: + return f + + def listProcedures(self): + """ + Return a list of the names of all xmlrpc procedures. + + @since: 11.1 + """ + return reflect.prefixedMethodNames(self.__class__, 'xmlrpc_') + + +class XMLRPCIntrospection(XMLRPC): + """ + Implement the XML-RPC Introspection API. + + By default, the methodHelp method returns the 'help' method attribute, + if it exists, otherwise the __doc__ method attribute, if it exists, + otherwise the empty string. + + To enable the methodSignature method, add a 'signature' method attribute + containing a list of lists. See methodSignature's documentation for the + format. Note the type strings should be XML-RPC types, not Python types. + """ + + def __init__(self, parent): + """ + Implement Introspection support for an XMLRPC server. + + @param parent: the XMLRPC server to add Introspection support to. + @type parent: L{XMLRPC} + """ + XMLRPC.__init__(self) + self._xmlrpc_parent = parent + + def xmlrpc_listMethods(self): + """ + Return a list of the method names implemented by this server. + """ + functions = [] + todo = [(self._xmlrpc_parent, '')] + while todo: + obj, prefix = todo.pop(0) + functions.extend([prefix + name for name in obj.listProcedures()]) + todo.extend([ (obj.getSubHandler(name), + prefix + name + obj.separator) + for name in obj.getSubHandlerPrefixes() ]) + return functions + + xmlrpc_listMethods.signature = [['array']] + + def xmlrpc_methodHelp(self, method): + """ + Return a documentation string describing the use of the given method. + """ + method = self._xmlrpc_parent.lookupProcedure(method) + return (getattr(method, 'help', None) + or getattr(method, '__doc__', None) or '') + + xmlrpc_methodHelp.signature = [['string', 'string']] + + def xmlrpc_methodSignature(self, method): + """ + Return a list of type signatures. + + Each type signature is a list of the form [rtype, type1, type2, ...] + where rtype is the return type and typeN is the type of the Nth + argument. If no signature information is available, the empty + string is returned. + """ + method = self._xmlrpc_parent.lookupProcedure(method) + return getattr(method, 'signature', None) or '' + + xmlrpc_methodSignature.signature = [['array', 'string'], + ['string', 'string']] + + +def addIntrospection(xmlrpc): + """ + Add Introspection support to an XMLRPC server. + + @param parent: the XMLRPC server to add Introspection support to. + @type parent: L{XMLRPC} + """ + xmlrpc.putSubHandler('system', XMLRPCIntrospection(xmlrpc)) + + +class QueryProtocol(http.HTTPClient): + def connectionMade(self): + self._response = None + self.sendCommand(b'POST', self.factory.path) + self.sendHeader(b'User-Agent', b'Twisted/XMLRPClib') + self.sendHeader(b'Host', self.factory.host) + self.sendHeader(b'Content-type', b'text/xml; charset=utf-8') + payload = self.factory.payload + self.sendHeader(b'Content-length', intToBytes(len(payload))) + + if self.factory.user: + auth = b':'.join([self.factory.user, self.factory.password]) + authHeader = b''.join([b'Basic ', base64.b64encode(auth)]) + self.sendHeader(b'Authorization', authHeader) + self.endHeaders() + self.transport.write(payload) + + def handleStatus(self, version, status, message): + if status != b'200': + self.factory.badStatus(status, message) + + def handleResponse(self, contents): + """ + Handle the XML-RPC response received from the server. + + Specifically, disconnect from the server and store the XML-RPC + response so that it can be properly handled when the disconnect is + finished. + """ + self.transport.loseConnection() + self._response = contents + + def connectionLost(self, reason): + """ + The connection to the server has been lost. + + If we have a full response from the server, then parse it and fired a + Deferred with the return value or C{Fault} that the server gave us. + """ + http.HTTPClient.connectionLost(self, reason) + if self._response is not None: + response, self._response = self._response, None + self.factory.parseResponse(response) + + +payloadTemplate = """<?xml version="1.0"?> +<methodCall> +<methodName>%s</methodName> +%s +</methodCall> +""" + + +class _QueryFactory(protocol.ClientFactory): + """ + XML-RPC Client Factory + + @ivar path: The path portion of the URL to which to post method calls. + @type path: L{bytes} + + @ivar host: The value to use for the Host HTTP header. + @type host: L{bytes} + + @ivar user: The username with which to authenticate with the server + when making calls. + @type user: L{bytes} or L{None} + + @ivar password: The password with which to authenticate with the server + when making calls. + @type password: L{bytes} or L{None} + + @ivar useDateTime: Accept datetime values as datetime.datetime objects. + also passed to the underlying xmlrpclib implementation. Defaults to + C{False}. + @type useDateTime: C{bool} + """ + + deferred = None + protocol = QueryProtocol + + def __init__(self, path, host, method, user=None, password=None, + allowNone=False, args=(), canceller=None, useDateTime=False): + """ + @param method: The name of the method to call. + @type method: C{str} + + @param allowNone: allow the use of None values in parameters. It's + passed to the underlying xmlrpclib implementation. Defaults to + C{False}. + @type allowNone: C{bool} or L{None} + + @param args: the arguments to pass to the method. + @type args: C{tuple} + + @param canceller: A 1-argument callable passed to the deferred as the + canceller callback. + @type canceller: callable or L{None} + """ + self.path, self.host = path, host + self.user, self.password = user, password + self.payload = payloadTemplate % (method, + xmlrpclib.dumps(args, allow_none=allowNone)) + if isinstance(self.payload, unicode): + self.payload = self.payload.encode('utf8') + self.deferred = defer.Deferred(canceller) + self.useDateTime = useDateTime + + def parseResponse(self, contents): + if not self.deferred: + return + try: + response = xmlrpclib.loads(contents, + use_datetime=self.useDateTime)[0][0] + except: + deferred, self.deferred = self.deferred, None + deferred.errback(failure.Failure()) + else: + deferred, self.deferred = self.deferred, None + deferred.callback(response) + + def clientConnectionLost(self, _, reason): + if self.deferred is not None: + deferred, self.deferred = self.deferred, None + deferred.errback(reason) + + clientConnectionFailed = clientConnectionLost + + def badStatus(self, status, message): + deferred, self.deferred = self.deferred, None + deferred.errback(ValueError(status, message)) + + + +class Proxy: + """ + A Proxy for making remote XML-RPC calls. + + Pass the URL of the remote XML-RPC server to the constructor. + + Use C{proxy.callRemote('foobar', *args)} to call remote method + 'foobar' with *args. + + @ivar user: The username with which to authenticate with the server + when making calls. If specified, overrides any username information + embedded in C{url}. If not specified, a value may be taken from + C{url} if present. + @type user: L{bytes} or L{None} + + @ivar password: The password with which to authenticate with the server + when making calls. If specified, overrides any password information + embedded in C{url}. If not specified, a value may be taken from + C{url} if present. + @type password: L{bytes} or L{None} + + @ivar allowNone: allow the use of None values in parameters. It's + passed to the underlying L{xmlrpclib} implementation. Defaults to + C{False}. + @type allowNone: C{bool} or L{None} + + @ivar useDateTime: Accept datetime values as datetime.datetime objects. + also passed to the underlying L{xmlrpclib} implementation. Defaults to + C{False}. + @type useDateTime: C{bool} + + @ivar connectTimeout: Number of seconds to wait before assuming the + connection has failed. + @type connectTimeout: C{float} + + @ivar _reactor: The reactor used to create connections. + @type _reactor: Object providing L{twisted.internet.interfaces.IReactorTCP} + + @ivar queryFactory: Object returning a factory for XML-RPC protocol. Mainly + useful for tests. + """ + queryFactory = _QueryFactory + + def __init__(self, url, user=None, password=None, allowNone=False, + useDateTime=False, connectTimeout=30.0, reactor=reactor): + """ + @param url: The URL to which to post method calls. Calls will be made + over SSL if the scheme is HTTPS. If netloc contains username or + password information, these will be used to authenticate, as long as + the C{user} and C{password} arguments are not specified. + @type url: L{bytes} + + """ + scheme, netloc, path, params, query, fragment = urllib_parse.urlparse( + url) + netlocParts = netloc.split(b'@') + if len(netlocParts) == 2: + userpass = netlocParts.pop(0).split(b':') + self.user = userpass.pop(0) + try: + self.password = userpass.pop(0) + except: + self.password = None + else: + self.user = self.password = None + hostport = netlocParts[0].split(b':') + self.host = hostport.pop(0) + try: + self.port = int(hostport.pop(0)) + except: + self.port = None + self.path = path + if self.path in [b'', None]: + self.path = b'/' + self.secure = (scheme == b'https') + if user is not None: + self.user = user + if password is not None: + self.password = password + self.allowNone = allowNone + self.useDateTime = useDateTime + self.connectTimeout = connectTimeout + self._reactor = reactor + + + def callRemote(self, method, *args): + """ + Call remote XML-RPC C{method} with given arguments. + + @return: a L{defer.Deferred} that will fire with the method response, + or a failure if the method failed. Generally, the failure type will + be L{Fault}, but you can also have an C{IndexError} on some buggy + servers giving empty responses. + + If the deferred is cancelled before the request completes, the + connection is closed and the deferred will fire with a + L{defer.CancelledError}. + """ + def cancel(d): + factory.deferred = None + connector.disconnect() + factory = self.queryFactory( + self.path, self.host, method, self.user, + self.password, self.allowNone, args, cancel, self.useDateTime) + + if self.secure: + from twisted.internet import ssl + connector = self._reactor.connectSSL( + nativeString(self.host), self.port or 443, + factory, ssl.ClientContextFactory(), + timeout=self.connectTimeout) + else: + connector = self._reactor.connectTCP( + nativeString(self.host), self.port or 80, factory, + timeout=self.connectTimeout) + return factory.deferred + + +__all__ = [ + "XMLRPC", "Handler", "NoSuchFunction", "Proxy", + + "Fault", "Binary", "Boolean", "DateTime"] |