diff options
author | monster <[email protected]> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <[email protected]> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /contrib/python/future | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) |
fix ya.make
Diffstat (limited to 'contrib/python/future')
145 files changed, 0 insertions, 36038 deletions
diff --git a/contrib/python/future/.dist-info/METADATA b/contrib/python/future/.dist-info/METADATA deleted file mode 100644 index b6f83573fb5..00000000000 --- a/contrib/python/future/.dist-info/METADATA +++ /dev/null @@ -1,110 +0,0 @@ -Metadata-Version: 2.1 -Name: future -Version: 0.18.2 -Summary: Clean single-source support for Python 3 and 2 -Home-page: https://python-future.org -Author: Ed Schofield -Author-email: [email protected] -License: MIT -Keywords: future past python3 migration futurize backport six 2to3 modernize pasteurize 3to2 -Platform: UNKNOWN -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.6 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: License :: OSI Approved -Classifier: License :: OSI Approved :: MIT License -Classifier: Development Status :: 4 - Beta -Classifier: Intended Audience :: Developers -Requires-Python: >=2.6, !=3.0.*, !=3.1.*, !=3.2.* -License-File: LICENSE.txt - - -future: Easy, safe support for Python 2/3 compatibility -======================================================= - -``future`` is the missing compatibility layer between Python 2 and Python -3. It allows you to use a single, clean Python 3.x-compatible codebase to -support both Python 2 and Python 3 with minimal overhead. - -It is designed to be used as follows:: - - from __future__ import (absolute_import, division, - print_function, unicode_literals) - from builtins import ( - bytes, dict, int, list, object, range, str, - ascii, chr, hex, input, next, oct, open, - pow, round, super, - filter, map, zip) - -followed by predominantly standard, idiomatic Python 3 code that then runs -similarly on Python 2.6/2.7 and Python 3.3+. - -The imports have no effect on Python 3. On Python 2, they shadow the -corresponding builtins, which normally have different semantics on Python 3 -versus 2, to provide their Python 3 semantics. - - -Standard library reorganization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``future`` supports the standard library reorganization (PEP 3108) through the -following Py3 interfaces: - - >>> # Top-level packages with Py3 names provided on Py2: - >>> import html.parser - >>> import queue - >>> import tkinter.dialog - >>> import xmlrpc.client - >>> # etc. - - >>> # Aliases provided for extensions to existing Py2 module names: - >>> from future.standard_library import install_aliases - >>> install_aliases() - - >>> from collections import Counter, OrderedDict # backported to Py2.6 - >>> from collections import UserDict, UserList, UserString - >>> import urllib.request - >>> from itertools import filterfalse, zip_longest - >>> from subprocess import getoutput, getstatusoutput - - -Automatic conversion --------------------- - -An included script called `futurize -<http://python-future.org/automatic_conversion.html>`_ aids in converting -code (from either Python 2 or Python 3) to code compatible with both -platforms. It is similar to ``python-modernize`` but goes further in -providing Python 3 compatibility through the use of the backported types -and builtin functions in ``future``. - - -Documentation -------------- - -See: http://python-future.org - - -Credits -------- - -:Author: Ed Schofield, Jordan M. Adler, et al -:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte - Ltd, Singapore. http://pythoncharmers.com -:Others: See docs/credits.rst or http://python-future.org/credits.html - - -Licensing ---------- -Copyright 2013-2019 Python Charmers Pty Ltd, Australia. -The software is distributed under an MIT licence. See LICENSE.txt. - - - diff --git a/contrib/python/future/.dist-info/entry_points.txt b/contrib/python/future/.dist-info/entry_points.txt deleted file mode 100644 index 45d1a880fbd..00000000000 --- a/contrib/python/future/.dist-info/entry_points.txt +++ /dev/null @@ -1,4 +0,0 @@ -[console_scripts] -futurize = libfuturize.main:main -pasteurize = libpasteurize.main:main - diff --git a/contrib/python/future/.dist-info/top_level.txt b/contrib/python/future/.dist-info/top_level.txt deleted file mode 100644 index 58f5843c6c6..00000000000 --- a/contrib/python/future/.dist-info/top_level.txt +++ /dev/null @@ -1,4 +0,0 @@ -future -libfuturize -libpasteurize -past diff --git a/contrib/python/future/_dummy_thread/__init__.py b/contrib/python/future/_dummy_thread/__init__.py deleted file mode 100644 index 63dced6e5e5..00000000000 --- a/contrib/python/future/_dummy_thread/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from dummy_thread import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/_markupbase/__init__.py b/contrib/python/future/_markupbase/__init__.py deleted file mode 100644 index 290906540c0..00000000000 --- a/contrib/python/future/_markupbase/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from markupbase import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/_thread/__init__.py b/contrib/python/future/_thread/__init__.py deleted file mode 100644 index 9f2a51c75a1..00000000000 --- a/contrib/python/future/_thread/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from thread import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/builtins/__init__.py b/contrib/python/future/builtins/__init__.py deleted file mode 100644 index 4f936f28449..00000000000 --- a/contrib/python/future/builtins/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from __builtin__ import * - # Overwrite any old definitions with the equivalent future.builtins ones: - from future.builtins import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/copyreg/__init__.py b/contrib/python/future/copyreg/__init__.py deleted file mode 100644 index 51bd4b9a74f..00000000000 --- a/contrib/python/future/copyreg/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - from copy_reg import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/future/__init__.py b/contrib/python/future/future/__init__.py deleted file mode 100644 index ad419d67e27..00000000000 --- a/contrib/python/future/future/__init__.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -future: Easy, safe support for Python 2/3 compatibility -======================================================= - -``future`` is the missing compatibility layer between Python 2 and Python -3. It allows you to use a single, clean Python 3.x-compatible codebase to -support both Python 2 and Python 3 with minimal overhead. - -It is designed to be used as follows:: - - from __future__ import (absolute_import, division, - print_function, unicode_literals) - from builtins import ( - bytes, dict, int, list, object, range, str, - ascii, chr, hex, input, next, oct, open, - pow, round, super, - filter, map, zip) - -followed by predominantly standard, idiomatic Python 3 code that then runs -similarly on Python 2.6/2.7 and Python 3.3+. - -The imports have no effect on Python 3. On Python 2, they shadow the -corresponding builtins, which normally have different semantics on Python 3 -versus 2, to provide their Python 3 semantics. - - -Standard library reorganization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``future`` supports the standard library reorganization (PEP 3108) through the -following Py3 interfaces: - - >>> # Top-level packages with Py3 names provided on Py2: - >>> import html.parser - >>> import queue - >>> import tkinter.dialog - >>> import xmlrpc.client - >>> # etc. - - >>> # Aliases provided for extensions to existing Py2 module names: - >>> from future.standard_library import install_aliases - >>> install_aliases() - - >>> from collections import Counter, OrderedDict # backported to Py2.6 - >>> from collections import UserDict, UserList, UserString - >>> import urllib.request - >>> from itertools import filterfalse, zip_longest - >>> from subprocess import getoutput, getstatusoutput - - -Automatic conversion --------------------- - -An included script called `futurize -<http://python-future.org/automatic_conversion.html>`_ aids in converting -code (from either Python 2 or Python 3) to code compatible with both -platforms. It is similar to ``python-modernize`` but goes further in -providing Python 3 compatibility through the use of the backported types -and builtin functions in ``future``. - - -Documentation -------------- - -See: http://python-future.org - - -Credits -------- - -:Author: Ed Schofield, Jordan M. Adler, et al -:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte - Ltd, Singapore. http://pythoncharmers.com -:Others: See docs/credits.rst or http://python-future.org/credits.html - - -Licensing ---------- -Copyright 2013-2019 Python Charmers Pty Ltd, Australia. -The software is distributed under an MIT licence. See LICENSE.txt. - -""" - -__title__ = 'future' -__author__ = 'Ed Schofield' -__license__ = 'MIT' -__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd' -__ver_major__ = 0 -__ver_minor__ = 18 -__ver_patch__ = 2 -__ver_sub__ = '' -__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, - __ver_patch__, __ver_sub__) diff --git a/contrib/python/future/future/backports/__init__.py b/contrib/python/future/future/backports/__init__.py deleted file mode 100644 index c71e065354c..00000000000 --- a/contrib/python/future/future/backports/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -future.backports package -""" - -from __future__ import absolute_import - -import sys - -__future_module__ = True -from future.standard_library import import_top_level_modules - - -if sys.version_info[0] >= 3: - import_top_level_modules() - - -from .misc import (ceil, - OrderedDict, - Counter, - ChainMap, - check_output, - count, - recursive_repr, - _count_elements, - cmp_to_key - ) diff --git a/contrib/python/future/future/backports/_markupbase.py b/contrib/python/future/future/backports/_markupbase.py deleted file mode 100644 index d51bfc7ef1c..00000000000 --- a/contrib/python/future/future/backports/_markupbase.py +++ /dev/null @@ -1,422 +0,0 @@ -"""Shared support for scanning document type declarations in HTML and XHTML. - -Backported for python-future from Python 3.3. Reason: ParserBase is an -old-style class in the Python 2.7 source of markupbase.py, which I suspect -might be the cause of sporadic unit-test failures on travis-ci.org with -test_htmlparser.py. The test failures look like this: - - ====================================================================== - -ERROR: test_attr_entity_replacement (future.tests.test_htmlparser.AttributesStrictTestCase) - ----------------------------------------------------------------------- - -Traceback (most recent call last): - File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 661, in test_attr_entity_replacement - [("starttag", "a", [("b", "&><\"'")])]) - File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 93, in _run_check - collector = self.get_collector() - File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 617, in get_collector - return EventCollector(strict=True) - File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 27, in __init__ - html.parser.HTMLParser.__init__(self, *args, **kw) - File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 135, in __init__ - self.reset() - File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 143, in reset - _markupbase.ParserBase.reset(self) - -TypeError: unbound method reset() must be called with ParserBase instance as first argument (got EventCollector instance instead) - -This module is used as a foundation for the html.parser module. It has no -documented public API and should not be used directly. - -""" - -import re - -_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match -_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match -_commentclose = re.compile(r'--\s*>') -_markedsectionclose = re.compile(r']\s*]\s*>') - -# An analysis of the MS-Word extensions is available at -# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf - -_msmarkedsectionclose = re.compile(r']\s*>') - -del re - - -class ParserBase(object): - """Parser base class which provides some common support methods used - by the SGML/HTML and XHTML parsers.""" - - def __init__(self): - if self.__class__ is ParserBase: - raise RuntimeError( - "_markupbase.ParserBase must be subclassed") - - def error(self, message): - raise NotImplementedError( - "subclasses of ParserBase must override error()") - - def reset(self): - self.lineno = 1 - self.offset = 0 - - def getpos(self): - """Return current line number and offset.""" - return self.lineno, self.offset - - # Internal -- update line number and offset. This should be - # called for each piece of data exactly once, in order -- in other - # words the concatenation of all the input strings to this - # function should be exactly the entire input. - def updatepos(self, i, j): - if i >= j: - return j - rawdata = self.rawdata - nlines = rawdata.count("\n", i, j) - if nlines: - self.lineno = self.lineno + nlines - pos = rawdata.rindex("\n", i, j) # Should not fail - self.offset = j-(pos+1) - else: - self.offset = self.offset + j-i - return j - - _decl_otherchars = '' - - # Internal -- parse declaration (for use by subclasses). - def parse_declaration(self, i): - # This is some sort of declaration; in "HTML as - # deployed," this should only be the document type - # declaration ("<!DOCTYPE html...>"). - # ISO 8879:1986, however, has more complex - # declaration syntax for elements in <!...>, including: - # --comment-- - # [marked section] - # name in the following list: ENTITY, DOCTYPE, ELEMENT, - # ATTLIST, NOTATION, SHORTREF, USEMAP, - # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM - rawdata = self.rawdata - j = i + 2 - assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" - if rawdata[j:j+1] == ">": - # the empty comment <!> - return j + 1 - if rawdata[j:j+1] in ("-", ""): - # Start of comment followed by buffer boundary, - # or just a buffer boundary. - return -1 - # A simple, practical version could look like: ((name|stringlit) S*) + '>' - n = len(rawdata) - if rawdata[j:j+2] == '--': #comment - # Locate --.*-- as the body of the comment - return self.parse_comment(i) - elif rawdata[j] == '[': #marked section - # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section - # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA - # Note that this is extended by Microsoft Office "Save as Web" function - # to include [if...] and [endif]. - return self.parse_marked_section(i) - else: #all other declaration elements - decltype, j = self._scan_name(j, i) - if j < 0: - return j - if decltype == "doctype": - self._decl_otherchars = '' - while j < n: - c = rawdata[j] - if c == ">": - # end of declaration syntax - data = rawdata[i+2:j] - if decltype == "doctype": - self.handle_decl(data) - else: - # According to the HTML5 specs sections "8.2.4.44 Bogus - # comment state" and "8.2.4.45 Markup declaration open - # state", a comment token should be emitted. - # Calling unknown_decl provides more flexibility though. - self.unknown_decl(data) - return j + 1 - if c in "\"'": - m = _declstringlit_match(rawdata, j) - if not m: - return -1 # incomplete - j = m.end() - elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": - name, j = self._scan_name(j, i) - elif c in self._decl_otherchars: - j = j + 1 - elif c == "[": - # this could be handled in a separate doctype parser - if decltype == "doctype": - j = self._parse_doctype_subset(j + 1, i) - elif decltype in set(["attlist", "linktype", "link", "element"]): - # must tolerate []'d groups in a content model in an element declaration - # also in data attribute specifications of attlist declaration - # also link type declaration subsets in linktype declarations - # also link attribute specification lists in link declarations - self.error("unsupported '[' char in %s declaration" % decltype) - else: - self.error("unexpected '[' char in declaration") - else: - self.error( - "unexpected %r char in declaration" % rawdata[j]) - if j < 0: - return j - return -1 # incomplete - - # Internal -- parse a marked section - # Override this to handle MS-word extension syntax <![if word]>content<![endif]> - def parse_marked_section(self, i, report=1): - rawdata= self.rawdata - assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()" - sectName, j = self._scan_name( i+3, i ) - if j < 0: - return j - if sectName in set(["temp", "cdata", "ignore", "include", "rcdata"]): - # look for standard ]]> ending - match= _markedsectionclose.search(rawdata, i+3) - elif sectName in set(["if", "else", "endif"]): - # look for MS Office ]> ending - match= _msmarkedsectionclose.search(rawdata, i+3) - else: - self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) - if not match: - return -1 - if report: - j = match.start(0) - self.unknown_decl(rawdata[i+3: j]) - return match.end(0) - - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i, report=1): - rawdata = self.rawdata - if rawdata[i:i+4] != '<!--': - self.error('unexpected call to parse_comment()') - match = _commentclose.search(rawdata, i+4) - if not match: - return -1 - if report: - j = match.start(0) - self.handle_comment(rawdata[i+4: j]) - return match.end(0) - - # Internal -- scan past the internal subset in a <!DOCTYPE declaration, - # returning the index just past any whitespace following the trailing ']'. - def _parse_doctype_subset(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - j = i - while j < n: - c = rawdata[j] - if c == "<": - s = rawdata[j:j+2] - if s == "<": - # end of buffer; incomplete - return -1 - if s != "<!": - self.updatepos(declstartpos, j + 1) - self.error("unexpected char in internal subset (in %r)" % s) - if (j + 2) == n: - # end of buffer; incomplete - return -1 - if (j + 4) > n: - # end of buffer; incomplete - return -1 - if rawdata[j:j+4] == "<!--": - j = self.parse_comment(j, report=0) - if j < 0: - return j - continue - name, j = self._scan_name(j + 2, declstartpos) - if j == -1: - return -1 - if name not in set(["attlist", "element", "entity", "notation"]): - self.updatepos(declstartpos, j + 2) - self.error( - "unknown declaration %r in internal subset" % name) - # handle the individual names - meth = getattr(self, "_parse_doctype_" + name) - j = meth(j, declstartpos) - if j < 0: - return j - elif c == "%": - # parameter entity reference - if (j + 1) == n: - # end of buffer; incomplete - return -1 - s, j = self._scan_name(j + 1, declstartpos) - if j < 0: - return j - if rawdata[j] == ";": - j = j + 1 - elif c == "]": - j = j + 1 - while j < n and rawdata[j].isspace(): - j = j + 1 - if j < n: - if rawdata[j] == ">": - return j - self.updatepos(declstartpos, j) - self.error("unexpected char after internal subset") - else: - return -1 - elif c.isspace(): - j = j + 1 - else: - self.updatepos(declstartpos, j) - self.error("unexpected char %r in internal subset" % c) - # end of buffer reached - return -1 - - # Internal -- scan past <!ELEMENT declarations - def _parse_doctype_element(self, i, declstartpos): - name, j = self._scan_name(i, declstartpos) - if j == -1: - return -1 - # style content model; just skip until '>' - rawdata = self.rawdata - if '>' in rawdata[j:]: - return rawdata.find(">", j) + 1 - return -1 - - # Internal -- scan past <!ATTLIST declarations - def _parse_doctype_attlist(self, i, declstartpos): - rawdata = self.rawdata - name, j = self._scan_name(i, declstartpos) - c = rawdata[j:j+1] - if c == "": - return -1 - if c == ">": - return j + 1 - while 1: - # scan a series of attribute descriptions; simplified: - # name type [value] [#constraint] - name, j = self._scan_name(j, declstartpos) - if j < 0: - return j - c = rawdata[j:j+1] - if c == "": - return -1 - if c == "(": - # an enumerated type; look for ')' - if ")" in rawdata[j:]: - j = rawdata.find(")", j) + 1 - else: - return -1 - while rawdata[j:j+1].isspace(): - j = j + 1 - if not rawdata[j:]: - # end of buffer, incomplete - return -1 - else: - name, j = self._scan_name(j, declstartpos) - c = rawdata[j:j+1] - if not c: - return -1 - if c in "'\"": - m = _declstringlit_match(rawdata, j) - if m: - j = m.end() - else: - return -1 - c = rawdata[j:j+1] - if not c: - return -1 - if c == "#": - if rawdata[j:] == "#": - # end of buffer - return -1 - name, j = self._scan_name(j + 1, declstartpos) - if j < 0: - return j - c = rawdata[j:j+1] - if not c: - return -1 - if c == '>': - # all done - return j + 1 - - # Internal -- scan past <!NOTATION declarations - def _parse_doctype_notation(self, i, declstartpos): - name, j = self._scan_name(i, declstartpos) - if j < 0: - return j - rawdata = self.rawdata - while 1: - c = rawdata[j:j+1] - if not c: - # end of buffer; incomplete - return -1 - if c == '>': - return j + 1 - if c in "'\"": - m = _declstringlit_match(rawdata, j) - if not m: - return -1 - j = m.end() - else: - name, j = self._scan_name(j, declstartpos) - if j < 0: - return j - - # Internal -- scan past <!ENTITY declarations - def _parse_doctype_entity(self, i, declstartpos): - rawdata = self.rawdata - if rawdata[i:i+1] == "%": - j = i + 1 - while 1: - c = rawdata[j:j+1] - if not c: - return -1 - if c.isspace(): - j = j + 1 - else: - break - else: - j = i - name, j = self._scan_name(j, declstartpos) - if j < 0: - return j - while 1: - c = self.rawdata[j:j+1] - if not c: - return -1 - if c in "'\"": - m = _declstringlit_match(rawdata, j) - if m: - j = m.end() - else: - return -1 # incomplete - elif c == ">": - return j + 1 - else: - name, j = self._scan_name(j, declstartpos) - if j < 0: - return j - - # Internal -- scan a name token and the new position and the token, or - # return -1 if we've reached the end of the buffer. - def _scan_name(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - if i == n: - return None, -1 - m = _declname_match(rawdata, i) - if m: - s = m.group() - name = s.strip() - if (i + len(s)) == n: - return None, -1 # end of buffer - return name.lower(), m.end() - else: - self.updatepos(declstartpos, i) - self.error("expected name token at %r" - % rawdata[declstartpos:declstartpos+20]) - - # To be overridden -- handlers for unknown objects - def unknown_decl(self, data): - pass diff --git a/contrib/python/future/future/backports/datetime.py b/contrib/python/future/future/backports/datetime.py deleted file mode 100644 index 3261014e056..00000000000 --- a/contrib/python/future/future/backports/datetime.py +++ /dev/null @@ -1,2152 +0,0 @@ -"""Concrete date/time and related types. - -See http://www.iana.org/time-zones/repository/tz-link.html for -time zone and DST data sources. -""" -from __future__ import division -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import absolute_import -from future.builtins import str -from future.builtins import bytes -from future.builtins import map -from future.builtins import round -from future.builtins import int -from future.builtins import object -from future.utils import native_str, PY2 - -import time as _time -import math as _math - -def _cmp(x, y): - return 0 if x == y else 1 if x > y else -1 - -MINYEAR = 1 -MAXYEAR = 9999 -_MAXORDINAL = 3652059 # date.max.toordinal() - -# Utility functions, adapted from Python's Demo/classes/Dates.py, which -# also assumes the current Gregorian calendar indefinitely extended in -# both directions. Difference: Dates.py calls January 1 of year 0 day -# number 1. The code here calls January 1 of year 1 day number 1. This is -# to match the definition of the "proleptic Gregorian" calendar in Dershowitz -# and Reingold's "Calendrical Calculations", where it's the base calendar -# for all computations. See the book for algorithms for converting between -# proleptic Gregorian ordinals and many other calendar systems. - -_DAYS_IN_MONTH = [None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - -_DAYS_BEFORE_MONTH = [None] -dbm = 0 -for dim in _DAYS_IN_MONTH[1:]: - _DAYS_BEFORE_MONTH.append(dbm) - dbm += dim -del dbm, dim - -def _is_leap(year): - "year -> 1 if leap year, else 0." - return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) - -def _days_before_year(year): - "year -> number of days before January 1st of year." - y = year - 1 - return y*365 + y//4 - y//100 + y//400 - -def _days_in_month(year, month): - "year, month -> number of days in that month in that year." - assert 1 <= month <= 12, month - if month == 2 and _is_leap(year): - return 29 - return _DAYS_IN_MONTH[month] - -def _days_before_month(year, month): - "year, month -> number of days in year preceding first day of month." - assert 1 <= month <= 12, 'month must be in 1..12' - return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) - -def _ymd2ord(year, month, day): - "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." - assert 1 <= month <= 12, 'month must be in 1..12' - dim = _days_in_month(year, month) - assert 1 <= day <= dim, ('day must be in 1..%d' % dim) - return (_days_before_year(year) + - _days_before_month(year, month) + - day) - -_DI400Y = _days_before_year(401) # number of days in 400 years -_DI100Y = _days_before_year(101) # " " " " 100 " -_DI4Y = _days_before_year(5) # " " " " 4 " - -# A 4-year cycle has an extra leap day over what we'd get from pasting -# together 4 single years. -assert _DI4Y == 4 * 365 + 1 - -# Similarly, a 400-year cycle has an extra leap day over what we'd get from -# pasting together 4 100-year cycles. -assert _DI400Y == 4 * _DI100Y + 1 - -# OTOH, a 100-year cycle has one fewer leap day than we'd get from -# pasting together 25 4-year cycles. -assert _DI100Y == 25 * _DI4Y - 1 - -def _ord2ymd(n): - "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1." - - # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years - # repeats exactly every 400 years. The basic strategy is to find the - # closest 400-year boundary at or before n, then work with the offset - # from that boundary to n. Life is much clearer if we subtract 1 from - # n first -- then the values of n at 400-year boundaries are exactly - # those divisible by _DI400Y: - # - # D M Y n n-1 - # -- --- ---- ---------- ---------------- - # 31 Dec -400 -_DI400Y -_DI400Y -1 - # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary - # ... - # 30 Dec 000 -1 -2 - # 31 Dec 000 0 -1 - # 1 Jan 001 1 0 400-year boundary - # 2 Jan 001 2 1 - # 3 Jan 001 3 2 - # ... - # 31 Dec 400 _DI400Y _DI400Y -1 - # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary - n -= 1 - n400, n = divmod(n, _DI400Y) - year = n400 * 400 + 1 # ..., -399, 1, 401, ... - - # Now n is the (non-negative) offset, in days, from January 1 of year, to - # the desired date. Now compute how many 100-year cycles precede n. - # Note that it's possible for n100 to equal 4! In that case 4 full - # 100-year cycles precede the desired day, which implies the desired - # day is December 31 at the end of a 400-year cycle. - n100, n = divmod(n, _DI100Y) - - # Now compute how many 4-year cycles precede it. - n4, n = divmod(n, _DI4Y) - - # And now how many single years. Again n1 can be 4, and again meaning - # that the desired day is December 31 at the end of the 4-year cycle. - n1, n = divmod(n, 365) - - year += n100 * 100 + n4 * 4 + n1 - if n1 == 4 or n100 == 4: - assert n == 0 - return year-1, 12, 31 - - # Now the year is correct, and n is the offset from January 1. We find - # the month via an estimate that's either exact or one too large. - leapyear = n1 == 3 and (n4 != 24 or n100 == 3) - assert leapyear == _is_leap(year) - month = (n + 50) >> 5 - preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear) - if preceding > n: # estimate is too large - month -= 1 - preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear) - n -= preceding - assert 0 <= n < _days_in_month(year, month) - - # Now the year and month are correct, and n is the offset from the - # start of that month: we're done! - return year, month, n+1 - -# Month and day names. For localized versions, see the calendar module. -_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] - - -def _build_struct_time(y, m, d, hh, mm, ss, dstflag): - wday = (_ymd2ord(y, m, d) + 6) % 7 - dnum = _days_before_month(y, m) + d - return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag)) - -def _format_time(hh, mm, ss, us): - # Skip trailing microseconds when us==0. - result = "%02d:%02d:%02d" % (hh, mm, ss) - if us: - result += ".%06d" % us - return result - -# Correctly substitute for %z and %Z escapes in strftime formats. -def _wrap_strftime(object, format, timetuple): - # Don't call utcoffset() or tzname() unless actually needed. - freplace = None # the string to use for %f - zreplace = None # the string to use for %z - Zreplace = None # the string to use for %Z - - # Scan format for %z and %Z escapes, replacing as needed. - newformat = [] - push = newformat.append - i, n = 0, len(format) - while i < n: - ch = format[i] - i += 1 - if ch == '%': - if i < n: - ch = format[i] - i += 1 - if ch == 'f': - if freplace is None: - freplace = '%06d' % getattr(object, - 'microsecond', 0) - newformat.append(freplace) - elif ch == 'z': - if zreplace is None: - zreplace = "" - if hasattr(object, "utcoffset"): - offset = object.utcoffset() - if offset is not None: - sign = '+' - if offset.days < 0: - offset = -offset - sign = '-' - h, m = divmod(offset, timedelta(hours=1)) - assert not m % timedelta(minutes=1), "whole minute" - m //= timedelta(minutes=1) - zreplace = '%c%02d%02d' % (sign, h, m) - assert '%' not in zreplace - newformat.append(zreplace) - elif ch == 'Z': - if Zreplace is None: - Zreplace = "" - if hasattr(object, "tzname"): - s = object.tzname() - if s is not None: - # strftime is going to have at this: escape % - Zreplace = s.replace('%', '%%') - newformat.append(Zreplace) - else: - push('%') - push(ch) - else: - push('%') - else: - push(ch) - newformat = "".join(newformat) - return _time.strftime(newformat, timetuple) - -def _call_tzinfo_method(tzinfo, methname, tzinfoarg): - if tzinfo is None: - return None - return getattr(tzinfo, methname)(tzinfoarg) - -# Just raise TypeError if the arg isn't None or a string. -def _check_tzname(name): - if name is not None and not isinstance(name, str): - raise TypeError("tzinfo.tzname() must return None or string, " - "not '%s'" % type(name)) - -# name is the offset-producing method, "utcoffset" or "dst". -# offset is what it returned. -# If offset isn't None or timedelta, raises TypeError. -# If offset is None, returns None. -# Else offset is checked for being in range, and a whole # of minutes. -# If it is, its integer value is returned. Else ValueError is raised. -def _check_utc_offset(name, offset): - assert name in ("utcoffset", "dst") - if offset is None: - return - if not isinstance(offset, timedelta): - raise TypeError("tzinfo.%s() must return None " - "or timedelta, not '%s'" % (name, type(offset))) - if offset % timedelta(minutes=1) or offset.microseconds: - raise ValueError("tzinfo.%s() must return a whole number " - "of minutes, got %s" % (name, offset)) - if not -timedelta(1) < offset < timedelta(1): - raise ValueError("%s()=%s, must be must be strictly between" - " -timedelta(hours=24) and timedelta(hours=24)" - % (name, offset)) - -def _check_date_fields(year, month, day): - if not isinstance(year, int): - raise TypeError('int expected') - if not MINYEAR <= year <= MAXYEAR: - raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) - if not 1 <= month <= 12: - raise ValueError('month must be in 1..12', month) - dim = _days_in_month(year, month) - if not 1 <= day <= dim: - raise ValueError('day must be in 1..%d' % dim, day) - -def _check_time_fields(hour, minute, second, microsecond): - if not isinstance(hour, int): - raise TypeError('int expected') - if not 0 <= hour <= 23: - raise ValueError('hour must be in 0..23', hour) - if not 0 <= minute <= 59: - raise ValueError('minute must be in 0..59', minute) - if not 0 <= second <= 59: - raise ValueError('second must be in 0..59', second) - if not 0 <= microsecond <= 999999: - raise ValueError('microsecond must be in 0..999999', microsecond) - -def _check_tzinfo_arg(tz): - if tz is not None and not isinstance(tz, tzinfo): - raise TypeError("tzinfo argument must be None or of a tzinfo subclass") - -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) - -class timedelta(object): - """Represent the difference between two datetime objects. - - Supported operators: - - - add, subtract timedelta - - unary plus, minus, abs - - compare to timedelta - - multiply, divide by int - - In addition, datetime supports subtraction of two datetime objects - returning a timedelta, and addition or subtraction of a datetime - and a timedelta giving a datetime. - - Representation: (days, seconds, microseconds). Why? Because I - felt like it. - """ - __slots__ = '_days', '_seconds', '_microseconds' - - def __new__(cls, days=0, seconds=0, microseconds=0, - milliseconds=0, minutes=0, hours=0, weeks=0): - # Doing this efficiently and accurately in C is going to be difficult - # and error-prone, due to ubiquitous overflow possibilities, and that - # C double doesn't have enough bits of precision to represent - # microseconds over 10K years faithfully. The code here tries to make - # explicit where go-fast assumptions can be relied on, in order to - # guide the C implementation; it's way more convoluted than speed- - # ignoring auto-overflow-to-long idiomatic Python could be. - - # XXX Check that all inputs are ints or floats. - - # Final values, all integer. - # s and us fit in 32-bit signed ints; d isn't bounded. - d = s = us = 0 - - # Normalize everything to days, seconds, microseconds. - days += weeks*7 - seconds += minutes*60 + hours*3600 - microseconds += milliseconds*1000 - - # Get rid of all fractions, and normalize s and us. - # Take a deep breath <wink>. - if isinstance(days, float): - dayfrac, days = _math.modf(days) - daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.)) - assert daysecondswhole == int(daysecondswhole) # can't overflow - s = int(daysecondswhole) - assert days == int(days) - d = int(days) - else: - daysecondsfrac = 0.0 - d = days - assert isinstance(daysecondsfrac, float) - assert abs(daysecondsfrac) <= 1.0 - assert isinstance(d, int) - assert abs(s) <= 24 * 3600 - # days isn't referenced again before redefinition - - if isinstance(seconds, float): - secondsfrac, seconds = _math.modf(seconds) - assert seconds == int(seconds) - seconds = int(seconds) - secondsfrac += daysecondsfrac - assert abs(secondsfrac) <= 2.0 - else: - secondsfrac = daysecondsfrac - # daysecondsfrac isn't referenced again - assert isinstance(secondsfrac, float) - assert abs(secondsfrac) <= 2.0 - - assert isinstance(seconds, int) - days, seconds = divmod(seconds, 24*3600) - d += days - s += int(seconds) # can't overflow - assert isinstance(s, int) - assert abs(s) <= 2 * 24 * 3600 - # seconds isn't referenced again before redefinition - - usdouble = secondsfrac * 1e6 - assert abs(usdouble) < 2.1e6 # exact value not critical - # secondsfrac isn't referenced again - - if isinstance(microseconds, float): - microseconds += usdouble - microseconds = round(microseconds, 0) - seconds, microseconds = divmod(microseconds, 1e6) - assert microseconds == int(microseconds) - assert seconds == int(seconds) - days, seconds = divmod(seconds, 24.*3600.) - assert days == int(days) - assert seconds == int(seconds) - d += int(days) - s += int(seconds) # can't overflow - assert isinstance(s, int) - assert abs(s) <= 3 * 24 * 3600 - else: - seconds, microseconds = divmod(microseconds, 1000000) - days, seconds = divmod(seconds, 24*3600) - d += days - s += int(seconds) # can't overflow - assert isinstance(s, int) - assert abs(s) <= 3 * 24 * 3600 - microseconds = float(microseconds) - microseconds += usdouble - microseconds = round(microseconds, 0) - assert abs(s) <= 3 * 24 * 3600 - assert abs(microseconds) < 3.1e6 - - # Just a little bit of carrying possible for microseconds and seconds. - assert isinstance(microseconds, float) - assert int(microseconds) == microseconds - us = int(microseconds) - seconds, us = divmod(us, 1000000) - s += seconds # cant't overflow - assert isinstance(s, int) - days, s = divmod(s, 24*3600) - d += days - - assert isinstance(d, int) - assert isinstance(s, int) and 0 <= s < 24*3600 - assert isinstance(us, int) and 0 <= us < 1000000 - - self = object.__new__(cls) - - self._days = d - self._seconds = s - self._microseconds = us - if abs(d) > 999999999: - raise OverflowError("timedelta # of days is too large: %d" % d) - - return self - - def __repr__(self): - if self._microseconds: - return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__, - self._days, - self._seconds, - self._microseconds) - if self._seconds: - return "%s(%d, %d)" % ('datetime.' + self.__class__.__name__, - self._days, - self._seconds) - return "%s(%d)" % ('datetime.' + self.__class__.__name__, self._days) - - def __str__(self): - mm, ss = divmod(self._seconds, 60) - hh, mm = divmod(mm, 60) - s = "%d:%02d:%02d" % (hh, mm, ss) - if self._days: - def plural(n): - return n, abs(n) != 1 and "s" or "" - s = ("%d day%s, " % plural(self._days)) + s - if self._microseconds: - s = s + ".%06d" % self._microseconds - return s - - def total_seconds(self): - """Total seconds in the duration.""" - return ((self.days * 86400 + self.seconds)*10**6 + - self.microseconds) / 10**6 - - # Read-only field accessors - @property - def days(self): - """days""" - return self._days - - @property - def seconds(self): - """seconds""" - return self._seconds - - @property - def microseconds(self): - """microseconds""" - return self._microseconds - - def __add__(self, other): - if isinstance(other, timedelta): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days + other._days, - self._seconds + other._seconds, - self._microseconds + other._microseconds) - return NotImplemented - - __radd__ = __add__ - - def __sub__(self, other): - if isinstance(other, timedelta): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days - other._days, - self._seconds - other._seconds, - self._microseconds - other._microseconds) - return NotImplemented - - def __rsub__(self, other): - if isinstance(other, timedelta): - return -self + other - return NotImplemented - - def __neg__(self): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(-self._days, - -self._seconds, - -self._microseconds) - - def __pos__(self): - return self - - def __abs__(self): - if self._days < 0: - return -self - else: - return self - - def __mul__(self, other): - if isinstance(other, int): - # for CPython compatibility, we cannot use - # our __class__ here, but need a real timedelta - return timedelta(self._days * other, - self._seconds * other, - self._microseconds * other) - if isinstance(other, float): - a, b = other.as_integer_ratio() - return self * a / b - return NotImplemented - - __rmul__ = __mul__ - - def _to_microseconds(self): - return ((self._days * (24*3600) + self._seconds) * 1000000 + - self._microseconds) - - def __floordiv__(self, other): - if not isinstance(other, (int, timedelta)): - return NotImplemented - usec = self._to_microseconds() - if isinstance(other, timedelta): - return usec // other._to_microseconds() - if isinstance(other, int): - return timedelta(0, 0, usec // other) - - def __truediv__(self, other): - if not isinstance(other, (int, float, timedelta)): - return NotImplemented - usec = self._to_microseconds() - if isinstance(other, timedelta): - return usec / other._to_microseconds() - if isinstance(other, int): - return timedelta(0, 0, usec / other) - if isinstance(other, float): - a, b = other.as_integer_ratio() - return timedelta(0, 0, b * usec / a) - - def __mod__(self, other): - if isinstance(other, timedelta): - r = self._to_microseconds() % other._to_microseconds() - return timedelta(0, 0, r) - return NotImplemented - - def __divmod__(self, other): - if isinstance(other, timedelta): - q, r = divmod(self._to_microseconds(), - other._to_microseconds()) - return q, timedelta(0, 0, r) - return NotImplemented - - # Comparisons of timedelta objects with other. - - def __eq__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) == 0 - else: - return False - - def __ne__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) != 0 - else: - return True - - def __le__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) <= 0 - else: - _cmperror(self, other) - - def __lt__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) < 0 - else: - _cmperror(self, other) - - def __ge__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) >= 0 - else: - _cmperror(self, other) - - def __gt__(self, other): - if isinstance(other, timedelta): - return self._cmp(other) > 0 - else: - _cmperror(self, other) - - def _cmp(self, other): - assert isinstance(other, timedelta) - return _cmp(self._getstate(), other._getstate()) - - def __hash__(self): - return hash(self._getstate()) - - def __bool__(self): - return (self._days != 0 or - self._seconds != 0 or - self._microseconds != 0) - - # Pickle support. - - def _getstate(self): - return (self._days, self._seconds, self._microseconds) - - def __reduce__(self): - return (self.__class__, self._getstate()) - -timedelta.min = timedelta(-999999999) -timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59, - microseconds=999999) -timedelta.resolution = timedelta(microseconds=1) - -class date(object): - """Concrete date type. - - Constructors: - - __new__() - fromtimestamp() - today() - fromordinal() - - Operators: - - __repr__, __str__ - __cmp__, __hash__ - __add__, __radd__, __sub__ (add/radd only with timedelta arg) - - Methods: - - timetuple() - toordinal() - weekday() - isoweekday(), isocalendar(), isoformat() - ctime() - strftime() - - Properties (readonly): - year, month, day - """ - __slots__ = '_year', '_month', '_day' - - def __new__(cls, year, month=None, day=None): - """Constructor. - - Arguments: - - year, month, day (required, base 1) - """ - if (isinstance(year, bytes) and len(year) == 4 and - 1 <= year[2] <= 12 and month is None): # Month is sane - # Pickle support - self = object.__new__(cls) - self.__setstate(year) - return self - _check_date_fields(year, month, day) - self = object.__new__(cls) - self._year = year - self._month = month - self._day = day - return self - - # Additional constructors - - @classmethod - def fromtimestamp(cls, t): - "Construct a date from a POSIX timestamp (like time.time())." - y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) - return cls(y, m, d) - - @classmethod - def today(cls): - "Construct a date from time.time()." - t = _time.time() - return cls.fromtimestamp(t) - - @classmethod - def fromordinal(cls, n): - """Contruct a date from a proleptic Gregorian ordinal. - - January 1 of year 1 is day 1. Only the year, month and day are - non-zero in the result. - """ - y, m, d = _ord2ymd(n) - return cls(y, m, d) - - # Conversions to string - - def __repr__(self): - """Convert to formal string, for repr(). - - >>> dt = datetime(2010, 1, 1) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0)' - - >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) - >>> repr(dt) - 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' - """ - return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__, - self._year, - self._month, - self._day) - # XXX These shouldn't depend on time.localtime(), because that - # clips the usable dates to [1970 .. 2038). At least ctime() is - # easily done without using strftime() -- that's better too because - # strftime("%c", ...) is locale specific. - - - def ctime(self): - "Return ctime() style string." - weekday = self.toordinal() % 7 or 7 - return "%s %s %2d 00:00:00 %04d" % ( - _DAYNAMES[weekday], - _MONTHNAMES[self._month], - self._day, self._year) - - def strftime(self, fmt): - "Format using strftime()." - return _wrap_strftime(self, fmt, self.timetuple()) - - def __format__(self, fmt): - if len(fmt) != 0: - return self.strftime(fmt) - return str(self) - - def isoformat(self): - """Return the date formatted according to ISO. - - This is 'YYYY-MM-DD'. - - References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html - """ - return "%04d-%02d-%02d" % (self._year, self._month, self._day) - - __str__ = isoformat - - # Read-only field accessors - @property - def year(self): - """year (1-9999)""" - return self._year - - @property - def month(self): - """month (1-12)""" - return self._month - - @property - def day(self): - """day (1-31)""" - return self._day - - # Standard conversions, __cmp__, __hash__ (and helpers) - - def timetuple(self): - "Return local time tuple compatible with time.localtime()." - return _build_struct_time(self._year, self._month, self._day, - 0, 0, 0, -1) - - def toordinal(self): - """Return proleptic Gregorian ordinal for the year, month and day. - - January 1 of year 1 is day 1. Only the year, month and day values - contribute to the result. - """ - return _ymd2ord(self._year, self._month, self._day) - - def replace(self, year=None, month=None, day=None): - """Return a new date with new values for the specified fields.""" - if year is None: - year = self._year - if month is None: - month = self._month - if day is None: - day = self._day - _check_date_fields(year, month, day) - return date(year, month, day) - - # Comparisons of date objects with other. - - def __eq__(self, other): - if isinstance(other, date): - return self._cmp(other) == 0 - return NotImplemented - - def __ne__(self, other): - if isinstance(other, date): - return self._cmp(other) != 0 - return NotImplemented - - def __le__(self, other): - if isinstance(other, date): - return self._cmp(other) <= 0 - return NotImplemented - - def __lt__(self, other): - if isinstance(other, date): - return self._cmp(other) < 0 - return NotImplemented - - def __ge__(self, other): - if isinstance(other, date): - return self._cmp(other) >= 0 - return NotImplemented - - def __gt__(self, other): - if isinstance(other, date): - return self._cmp(other) > 0 - return NotImplemented - - def _cmp(self, other): - assert isinstance(other, date) - y, m, d = self._year, self._month, self._day - y2, m2, d2 = other._year, other._month, other._day - return _cmp((y, m, d), (y2, m2, d2)) - - def __hash__(self): - "Hash." - return hash(self._getstate()) - - # Computations - - def __add__(self, other): - "Add a date to a timedelta." - if isinstance(other, timedelta): - o = self.toordinal() + other.days - if 0 < o <= _MAXORDINAL: - return date.fromordinal(o) - raise OverflowError("result out of range") - return NotImplemented - - __radd__ = __add__ - - def __sub__(self, other): - """Subtract two dates, or a date and a timedelta.""" - if isinstance(other, timedelta): - return self + timedelta(-other.days) - if isinstance(other, date): - days1 = self.toordinal() - days2 = other.toordinal() - return timedelta(days1 - days2) - return NotImplemented - - def weekday(self): - "Return day of the week, where Monday == 0 ... Sunday == 6." - return (self.toordinal() + 6) % 7 - - # Day-of-the-week and week-of-the-year, according to ISO - - def isoweekday(self): - "Return day of the week, where Monday == 1 ... Sunday == 7." - # 1-Jan-0001 is a Monday - return self.toordinal() % 7 or 7 - - def isocalendar(self): - """Return a 3-tuple containing ISO year, week number, and weekday. - - The first ISO week of the year is the (Mon-Sun) week - containing the year's first Thursday; everything else derives - from that. - - The first week is 1; Monday is 1 ... Sunday is 7. - - ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm - """ - year = self._year - week1monday = _isoweek1monday(year) - today = _ymd2ord(self._year, self._month, self._day) - # Internally, week and day have origin 0 - week, day = divmod(today - week1monday, 7) - if week < 0: - year -= 1 - week1monday = _isoweek1monday(year) - week, day = divmod(today - week1monday, 7) - elif week >= 52: - if today >= _isoweek1monday(year+1): - year += 1 - week = 0 - return year, week+1, day+1 - - # Pickle support. - - def _getstate(self): - yhi, ylo = divmod(self._year, 256) - return bytes([yhi, ylo, self._month, self._day]), - - def __setstate(self, string): - if len(string) != 4 or not (1 <= string[2] <= 12): - raise TypeError("not enough arguments") - yhi, ylo, self._month, self._day = string - self._year = yhi * 256 + ylo - - def __reduce__(self): - return (self.__class__, self._getstate()) - -_date_class = date # so functions w/ args named "date" can get at the class - -date.min = date(1, 1, 1) -date.max = date(9999, 12, 31) -date.resolution = timedelta(days=1) - -class tzinfo(object): - """Abstract base class for time zone info classes. - - Subclasses must override the name(), utcoffset() and dst() methods. - """ - __slots__ = () - def tzname(self, dt): - "datetime -> string name of time zone." - raise NotImplementedError("tzinfo subclass must override tzname()") - - def utcoffset(self, dt): - "datetime -> minutes east of UTC (negative for west of UTC)" - raise NotImplementedError("tzinfo subclass must override utcoffset()") - - def dst(self, dt): - """datetime -> DST offset in minutes east of UTC. - - Return 0 if DST not in effect. utcoffset() must include the DST - offset. - """ - raise NotImplementedError("tzinfo subclass must override dst()") - - def fromutc(self, dt): - "datetime in UTC -> datetime in local time." - - if not isinstance(dt, datetime): - raise TypeError("fromutc() requires a datetime argument") - if dt.tzinfo is not self: - raise ValueError("dt.tzinfo is not self") - - dtoff = dt.utcoffset() - if dtoff is None: - raise ValueError("fromutc() requires a non-None utcoffset() " - "result") - - # See the long comment block at the end of this file for an - # explanation of this algorithm. - dtdst = dt.dst() - if dtdst is None: - raise ValueError("fromutc() requires a non-None dst() result") - delta = dtoff - dtdst - if delta: - dt += delta - dtdst = dt.dst() - if dtdst is None: - raise ValueError("fromutc(): dt.dst gave inconsistent " - "results; cannot convert") - return dt + dtdst - - # Pickle support. - - def __reduce__(self): - getinitargs = getattr(self, "__getinitargs__", None) - if getinitargs: - args = getinitargs() - else: - args = () - getstate = getattr(self, "__getstate__", None) - if getstate: - state = getstate() - else: - state = getattr(self, "__dict__", None) or None - if state is None: - return (self.__class__, args) - else: - return (self.__class__, args, state) - -_tzinfo_class = tzinfo - -class time(object): - """Time with time zone. - - Constructors: - - __new__() - - Operators: - - __repr__, __str__ - __cmp__, __hash__ - - Methods: - - strftime() - isoformat() - utcoffset() - tzname() - dst() - - Properties (readonly): - hour, minute, second, microsecond, tzinfo - """ - - def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): - """Constructor. - - Arguments: - - hour, minute (required) - second, microsecond (default to zero) - tzinfo (default to None) - """ - self = object.__new__(cls) - if isinstance(hour, bytes) and len(hour) == 6: - # Pickle support - self.__setstate(hour, minute or None) - return self - _check_tzinfo_arg(tzinfo) - _check_time_fields(hour, minute, second, microsecond) - self._hour = hour - self._minute = minute - self._second = second - self._microsecond = microsecond - self._tzinfo = tzinfo - return self - - # Read-only field accessors - @property - def hour(self): - """hour (0-23)""" - return self._hour - - @property - def minute(self): - """minute (0-59)""" - return self._minute - - @property - def second(self): - """second (0-59)""" - return self._second - - @property - def microsecond(self): - """microsecond (0-999999)""" - return self._microsecond - - @property - def tzinfo(self): - """timezone info object""" - return self._tzinfo - - # Standard conversions, __hash__ (and helpers) - - # Comparisons of time objects with other. - - def __eq__(self, other): - if isinstance(other, time): - return self._cmp(other, allow_mixed=True) == 0 - else: - return False - - def __ne__(self, other): - if isinstance(other, time): - return self._cmp(other, allow_mixed=True) != 0 - else: - return True - - def __le__(self, other): - if isinstance(other, time): - return self._cmp(other) <= 0 - else: - _cmperror(self, other) - - def __lt__(self, other): - if isinstance(other, time): - return self._cmp(other) < 0 - else: - _cmperror(self, other) - - def __ge__(self, other): - if isinstance(other, time): - return self._cmp(other) >= 0 - else: - _cmperror(self, other) - - def __gt__(self, other): - if isinstance(other, time): - return self._cmp(other) > 0 - else: - _cmperror(self, other) - - def _cmp(self, other, allow_mixed=False): - assert isinstance(other, time) - mytz = self._tzinfo - ottz = other._tzinfo - myoff = otoff = None - - if mytz is ottz: - base_compare = True - else: - myoff = self.utcoffset() - otoff = other.utcoffset() - base_compare = myoff == otoff - - if base_compare: - return _cmp((self._hour, self._minute, self._second, - self._microsecond), - (other._hour, other._minute, other._second, - other._microsecond)) - if myoff is None or otoff is None: - if allow_mixed: - return 2 # arbitrary non-zero value - else: - raise TypeError("cannot compare naive and aware times") - myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1) - othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1) - return _cmp((myhhmm, self._second, self._microsecond), - (othhmm, other._second, other._microsecond)) - - def __hash__(self): - """Hash.""" - tzoff = self.utcoffset() - if not tzoff: # zero or None - return hash(self._getstate()[0]) - h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff, - timedelta(hours=1)) - assert not m % timedelta(minutes=1), "whole minute" - m //= timedelta(minutes=1) - if 0 <= h < 24: - return hash(time(h, m, self.second, self.microsecond)) - return hash((h, m, self.second, self.microsecond)) - - # Conversion to string - - def _tzstr(self, sep=":"): - """Return formatted timezone offset (+xx:xx) or None.""" - off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - assert not mm % timedelta(minutes=1), "whole minute" - mm //= timedelta(minutes=1) - assert 0 <= hh < 24 - off = "%s%02d%s%02d" % (sign, hh, sep, mm) - return off - - def __repr__(self): - """Convert to formal string, for repr().""" - if self._microsecond != 0: - s = ", %d, %d" % (self._second, self._microsecond) - elif self._second != 0: - s = ", %d" % self._second - else: - s = "" - s= "%s(%d, %d%s)" % ('datetime.' + self.__class__.__name__, - self._hour, self._minute, s) - if self._tzinfo is not None: - assert s[-1:] == ")" - s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" - return s - - def isoformat(self): - """Return the time formatted according to ISO. - - This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if - self.microsecond == 0. - """ - s = _format_time(self._hour, self._minute, self._second, - self._microsecond) - tz = self._tzstr() - if tz: - s += tz - return s - - __str__ = isoformat - - def strftime(self, fmt): - """Format using strftime(). The date part of the timestamp passed - to underlying strftime should not be used. - """ - # The year must be >= 1000 else Python's strftime implementation - # can raise a bogus exception. - timetuple = (1900, 1, 1, - self._hour, self._minute, self._second, - 0, 1, -1) - return _wrap_strftime(self, fmt, timetuple) - - def __format__(self, fmt): - if len(fmt) != 0: - return self.strftime(fmt) - return str(self) - - # Timezone functions - - def utcoffset(self): - """Return the timezone offset in minutes east of UTC (negative west of - UTC).""" - if self._tzinfo is None: - return None - offset = self._tzinfo.utcoffset(None) - _check_utc_offset("utcoffset", offset) - return offset - - def tzname(self): - """Return the timezone name. - - Note that the name is 100% informational -- there's no requirement that - it mean anything in particular. For example, "GMT", "UTC", "-500", - "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. - """ - if self._tzinfo is None: - return None - name = self._tzinfo.tzname(None) - _check_tzname(name) - return name - - def dst(self): - """Return 0 if DST is not in effect, or the DST offset (in minutes - eastward) if DST is in effect. - - This is purely informational; the DST offset has already been added to - the UTC offset returned by utcoffset() if applicable, so there's no - need to consult dst() unless you're interested in displaying the DST - info. - """ - if self._tzinfo is None: - return None - offset = self._tzinfo.dst(None) - _check_utc_offset("dst", offset) - return offset - - def replace(self, hour=None, minute=None, second=None, microsecond=None, - tzinfo=True): - """Return a new time with new values for the specified fields.""" - if hour is None: - hour = self.hour - if minute is None: - minute = self.minute - if second is None: - second = self.second - if microsecond is None: - microsecond = self.microsecond - if tzinfo is True: - tzinfo = self.tzinfo - _check_time_fields(hour, minute, second, microsecond) - _check_tzinfo_arg(tzinfo) - return time(hour, minute, second, microsecond, tzinfo) - - def __bool__(self): - if self.second or self.microsecond: - return True - offset = self.utcoffset() or timedelta(0) - return timedelta(hours=self.hour, minutes=self.minute) != offset - - # Pickle support. - - def _getstate(self): - us2, us3 = divmod(self._microsecond, 256) - us1, us2 = divmod(us2, 256) - basestate = bytes([self._hour, self._minute, self._second, - us1, us2, us3]) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - if len(string) != 6 or string[0] >= 24: - raise TypeError("an integer is required") - (self._hour, self._minute, self._second, - us1, us2, us3) = string - self._microsecond = (((us1 << 8) | us2) << 8) | us3 - if tzinfo is None or isinstance(tzinfo, _tzinfo_class): - self._tzinfo = tzinfo - else: - raise TypeError("bad tzinfo state arg %r" % tzinfo) - - def __reduce__(self): - return (time, self._getstate()) - -_time_class = time # so functions w/ args named "time" can get at the class - -time.min = time(0, 0, 0) -time.max = time(23, 59, 59, 999999) -time.resolution = timedelta(microseconds=1) - -class datetime(date): - """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) - - The year, month and day arguments are required. tzinfo may be None, or an - instance of a tzinfo subclass. The remaining arguments may be ints. - """ - - __slots__ = date.__slots__ + ( - '_hour', '_minute', '_second', - '_microsecond', '_tzinfo') - def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, - microsecond=0, tzinfo=None): - if isinstance(year, bytes) and len(year) == 10: - # Pickle support - self = date.__new__(cls, year[:4]) - self.__setstate(year, month) - return self - _check_tzinfo_arg(tzinfo) - _check_time_fields(hour, minute, second, microsecond) - self = date.__new__(cls, year, month, day) - self._hour = hour - self._minute = minute - self._second = second - self._microsecond = microsecond - self._tzinfo = tzinfo - return self - - # Read-only field accessors - @property - def hour(self): - """hour (0-23)""" - return self._hour - - @property - def minute(self): - """minute (0-59)""" - return self._minute - - @property - def second(self): - """second (0-59)""" - return self._second - - @property - def microsecond(self): - """microsecond (0-999999)""" - return self._microsecond - - @property - def tzinfo(self): - """timezone info object""" - return self._tzinfo - - @classmethod - def fromtimestamp(cls, t, tz=None): - """Construct a datetime from a POSIX timestamp (like time.time()). - - A timezone info object may be passed in as well. - """ - - _check_tzinfo_arg(tz) - - converter = _time.localtime if tz is None else _time.gmtime - - t, frac = divmod(t, 1.0) - us = int(frac * 1e6) - - # If timestamp is less than one microsecond smaller than a - # full second, us can be rounded up to 1000000. In this case, - # roll over to seconds, otherwise, ValueError is raised - # by the constructor. - if us == 1000000: - t += 1 - us = 0 - y, m, d, hh, mm, ss, weekday, jday, dst = converter(t) - ss = min(ss, 59) # clamp out leap seconds if the platform has them - result = cls(y, m, d, hh, mm, ss, us, tz) - if tz is not None: - result = tz.fromutc(result) - return result - - @classmethod - def utcfromtimestamp(cls, t): - "Construct a UTC datetime from a POSIX timestamp (like time.time())." - t, frac = divmod(t, 1.0) - us = int(frac * 1e6) - - # If timestamp is less than one microsecond smaller than a - # full second, us can be rounded up to 1000000. In this case, - # roll over to seconds, otherwise, ValueError is raised - # by the constructor. - if us == 1000000: - t += 1 - us = 0 - y, m, d, hh, mm, ss, weekday, jday, dst = _time.gmtime(t) - ss = min(ss, 59) # clamp out leap seconds if the platform has them - return cls(y, m, d, hh, mm, ss, us) - - # XXX This is supposed to do better than we *can* do by using time.time(), - # XXX if the platform supports a more accurate way. The C implementation - # XXX uses gettimeofday on platforms that have it, but that isn't - # XXX available from Python. So now() may return different results - # XXX across the implementations. - @classmethod - def now(cls, tz=None): - "Construct a datetime from time.time() and optional time zone info." - t = _time.time() - return cls.fromtimestamp(t, tz) - - @classmethod - def utcnow(cls): - "Construct a UTC datetime from time.time()." - t = _time.time() - return cls.utcfromtimestamp(t) - - @classmethod - def combine(cls, date, time): - "Construct a datetime from a given date and a given time." - if not isinstance(date, _date_class): - raise TypeError("date argument must be a date instance") - if not isinstance(time, _time_class): - raise TypeError("time argument must be a time instance") - return cls(date.year, date.month, date.day, - time.hour, time.minute, time.second, time.microsecond, - time.tzinfo) - - def timetuple(self): - "Return local time tuple compatible with time.localtime()." - dst = self.dst() - if dst is None: - dst = -1 - elif dst: - dst = 1 - else: - dst = 0 - return _build_struct_time(self.year, self.month, self.day, - self.hour, self.minute, self.second, - dst) - - def timestamp(self): - "Return POSIX timestamp as float" - if self._tzinfo is None: - return _time.mktime((self.year, self.month, self.day, - self.hour, self.minute, self.second, - -1, -1, -1)) + self.microsecond / 1e6 - else: - return (self - _EPOCH).total_seconds() - - def utctimetuple(self): - "Return UTC time tuple compatible with time.gmtime()." - offset = self.utcoffset() - if offset: - self -= offset - y, m, d = self.year, self.month, self.day - hh, mm, ss = self.hour, self.minute, self.second - return _build_struct_time(y, m, d, hh, mm, ss, 0) - - def date(self): - "Return the date part." - return date(self._year, self._month, self._day) - - def time(self): - "Return the time part, with tzinfo None." - return time(self.hour, self.minute, self.second, self.microsecond) - - def timetz(self): - "Return the time part, with same tzinfo." - return time(self.hour, self.minute, self.second, self.microsecond, - self._tzinfo) - - def replace(self, year=None, month=None, day=None, hour=None, - minute=None, second=None, microsecond=None, tzinfo=True): - """Return a new datetime with new values for the specified fields.""" - if year is None: - year = self.year - if month is None: - month = self.month - if day is None: - day = self.day - if hour is None: - hour = self.hour - if minute is None: - minute = self.minute - if second is None: - second = self.second - if microsecond is None: - microsecond = self.microsecond - if tzinfo is True: - tzinfo = self.tzinfo - _check_date_fields(year, month, day) - _check_time_fields(hour, minute, second, microsecond) - _check_tzinfo_arg(tzinfo) - return datetime(year, month, day, hour, minute, second, - microsecond, tzinfo) - - def astimezone(self, tz=None): - if tz is None: - if self.tzinfo is None: - raise ValueError("astimezone() requires an aware datetime") - ts = (self - _EPOCH) // timedelta(seconds=1) - localtm = _time.localtime(ts) - local = datetime(*localtm[:6]) - try: - # Extract TZ data if available - gmtoff = localtm.tm_gmtoff - zone = localtm.tm_zone - except AttributeError: - # Compute UTC offset and compare with the value implied - # by tm_isdst. If the values match, use the zone name - # implied by tm_isdst. - delta = local - datetime(*_time.gmtime(ts)[:6]) - dst = _time.daylight and localtm.tm_isdst > 0 - gmtoff = -(_time.altzone if dst else _time.timezone) - if delta == timedelta(seconds=gmtoff): - tz = timezone(delta, _time.tzname[dst]) - else: - tz = timezone(delta) - else: - tz = timezone(timedelta(seconds=gmtoff), zone) - - elif not isinstance(tz, tzinfo): - raise TypeError("tz argument must be an instance of tzinfo") - - mytz = self.tzinfo - if mytz is None: - raise ValueError("astimezone() requires an aware datetime") - - if tz is mytz: - return self - - # Convert self to UTC, and attach the new time zone object. - myoffset = self.utcoffset() - if myoffset is None: - raise ValueError("astimezone() requires an aware datetime") - utc = (self - myoffset).replace(tzinfo=tz) - - # Convert from UTC to tz's local time. - return tz.fromutc(utc) - - # Ways to produce a string. - - def ctime(self): - "Return ctime() style string." - weekday = self.toordinal() % 7 or 7 - return "%s %s %2d %02d:%02d:%02d %04d" % ( - _DAYNAMES[weekday], - _MONTHNAMES[self._month], - self._day, - self._hour, self._minute, self._second, - self._year) - - def isoformat(self, sep='T'): - """Return the time formatted according to ISO. - - This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if - self.microsecond == 0. - - If self.tzinfo is not None, the UTC offset is also attached, giving - 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'. - - Optional argument sep specifies the separator between date and - time, default 'T'. - """ - s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, - sep) + - _format_time(self._hour, self._minute, self._second, - self._microsecond)) - off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - assert not mm % timedelta(minutes=1), "whole minute" - mm //= timedelta(minutes=1) - s += "%s%02d:%02d" % (sign, hh, mm) - return s - - def __repr__(self): - """Convert to formal string, for repr().""" - L = [self._year, self._month, self._day, # These are never zero - self._hour, self._minute, self._second, self._microsecond] - if L[-1] == 0: - del L[-1] - if L[-1] == 0: - del L[-1] - s = ", ".join(map(str, L)) - s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s) - if self._tzinfo is not None: - assert s[-1:] == ")" - s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" - return s - - def __str__(self): - "Convert to string, for str()." - return self.isoformat(sep=' ') - - @classmethod - def strptime(cls, date_string, format): - 'string, format -> new datetime parsed from a string (like time.strptime()).' - import _strptime - return _strptime._strptime_datetime(cls, date_string, format) - - def utcoffset(self): - """Return the timezone offset in minutes east of UTC (negative west of - UTC).""" - if self._tzinfo is None: - return None - offset = self._tzinfo.utcoffset(self) - _check_utc_offset("utcoffset", offset) - return offset - - def tzname(self): - """Return the timezone name. - - Note that the name is 100% informational -- there's no requirement that - it mean anything in particular. For example, "GMT", "UTC", "-500", - "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. - """ - name = _call_tzinfo_method(self._tzinfo, "tzname", self) - _check_tzname(name) - return name - - def dst(self): - """Return 0 if DST is not in effect, or the DST offset (in minutes - eastward) if DST is in effect. - - This is purely informational; the DST offset has already been added to - the UTC offset returned by utcoffset() if applicable, so there's no - need to consult dst() unless you're interested in displaying the DST - info. - """ - if self._tzinfo is None: - return None - offset = self._tzinfo.dst(self) - _check_utc_offset("dst", offset) - return offset - - # Comparisons of datetime objects with other. - - def __eq__(self, other): - if isinstance(other, datetime): - return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented - else: - return False - - def __ne__(self, other): - if isinstance(other, datetime): - return self._cmp(other, allow_mixed=True) != 0 - elif not isinstance(other, date): - return NotImplemented - else: - return True - - def __le__(self, other): - if isinstance(other, datetime): - return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __lt__(self, other): - if isinstance(other, datetime): - return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __ge__(self, other): - if isinstance(other, datetime): - return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def __gt__(self, other): - if isinstance(other, datetime): - return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented - else: - _cmperror(self, other) - - def _cmp(self, other, allow_mixed=False): - assert isinstance(other, datetime) - mytz = self._tzinfo - ottz = other._tzinfo - myoff = otoff = None - - if mytz is ottz: - base_compare = True - else: - myoff = self.utcoffset() - otoff = other.utcoffset() - base_compare = myoff == otoff - - if base_compare: - return _cmp((self._year, self._month, self._day, - self._hour, self._minute, self._second, - self._microsecond), - (other._year, other._month, other._day, - other._hour, other._minute, other._second, - other._microsecond)) - if myoff is None or otoff is None: - if allow_mixed: - return 2 # arbitrary non-zero value - else: - raise TypeError("cannot compare naive and aware datetimes") - # XXX What follows could be done more efficiently... - diff = self - other # this will take offsets into account - if diff.days < 0: - return -1 - return diff and 1 or 0 - - def __add__(self, other): - "Add a datetime and a timedelta." - if not isinstance(other, timedelta): - return NotImplemented - delta = timedelta(self.toordinal(), - hours=self._hour, - minutes=self._minute, - seconds=self._second, - microseconds=self._microsecond) - delta += other - hour, rem = divmod(delta.seconds, 3600) - minute, second = divmod(rem, 60) - if 0 < delta.days <= _MAXORDINAL: - return datetime.combine(date.fromordinal(delta.days), - time(hour, minute, second, - delta.microseconds, - tzinfo=self._tzinfo)) - raise OverflowError("result out of range") - - __radd__ = __add__ - - def __sub__(self, other): - "Subtract two datetimes, or a datetime and a timedelta." - if not isinstance(other, datetime): - if isinstance(other, timedelta): - return self + -other - return NotImplemented - - days1 = self.toordinal() - days2 = other.toordinal() - secs1 = self._second + self._minute * 60 + self._hour * 3600 - secs2 = other._second + other._minute * 60 + other._hour * 3600 - base = timedelta(days1 - days2, - secs1 - secs2, - self._microsecond - other._microsecond) - if self._tzinfo is other._tzinfo: - return base - myoff = self.utcoffset() - otoff = other.utcoffset() - if myoff == otoff: - return base - if myoff is None or otoff is None: - raise TypeError("cannot mix naive and timezone-aware time") - return base + otoff - myoff - - def __hash__(self): - tzoff = self.utcoffset() - if tzoff is None: - return hash(self._getstate()[0]) - days = _ymd2ord(self.year, self.month, self.day) - seconds = self.hour * 3600 + self.minute * 60 + self.second - return hash(timedelta(days, seconds, self.microsecond) - tzoff) - - # Pickle support. - - def _getstate(self): - yhi, ylo = divmod(self._year, 256) - us2, us3 = divmod(self._microsecond, 256) - us1, us2 = divmod(us2, 256) - basestate = bytes([yhi, ylo, self._month, self._day, - self._hour, self._minute, self._second, - us1, us2, us3]) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - (yhi, ylo, self._month, self._day, self._hour, - self._minute, self._second, us1, us2, us3) = string - self._year = yhi * 256 + ylo - self._microsecond = (((us1 << 8) | us2) << 8) | us3 - if tzinfo is None or isinstance(tzinfo, _tzinfo_class): - self._tzinfo = tzinfo - else: - raise TypeError("bad tzinfo state arg %r" % tzinfo) - - def __reduce__(self): - return (self.__class__, self._getstate()) - - -datetime.min = datetime(1, 1, 1) -datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999) -datetime.resolution = timedelta(microseconds=1) - - -def _isoweek1monday(year): - # Helper to calculate the day number of the Monday starting week 1 - # XXX This could be done more efficiently - THURSDAY = 3 - firstday = _ymd2ord(year, 1, 1) - firstweekday = (firstday + 6) % 7 # See weekday() above - week1monday = firstday - firstweekday - if firstweekday > THURSDAY: - week1monday += 7 - return week1monday - -class timezone(tzinfo): - __slots__ = '_offset', '_name' - - # Sentinel value to disallow None - _Omitted = object() - def __new__(cls, offset, name=_Omitted): - if not isinstance(offset, timedelta): - raise TypeError("offset must be a timedelta") - if name is cls._Omitted: - if not offset: - return cls.utc - name = None - elif not isinstance(name, str): - ### - # For Python-Future: - if PY2 and isinstance(name, native_str): - name = name.decode() - else: - raise TypeError("name must be a string") - ### - if not cls._minoffset <= offset <= cls._maxoffset: - raise ValueError("offset must be a timedelta" - " strictly between -timedelta(hours=24) and" - " timedelta(hours=24).") - if (offset.microseconds != 0 or - offset.seconds % 60 != 0): - raise ValueError("offset must be a timedelta" - " representing a whole number of minutes") - return cls._create(offset, name) - - @classmethod - def _create(cls, offset, name=None): - self = tzinfo.__new__(cls) - self._offset = offset - self._name = name - return self - - def __getinitargs__(self): - """pickle support""" - if self._name is None: - return (self._offset,) - return (self._offset, self._name) - - def __eq__(self, other): - if type(other) != timezone: - return False - return self._offset == other._offset - - def __hash__(self): - return hash(self._offset) - - def __repr__(self): - """Convert to formal string, for repr(). - - >>> tz = timezone.utc - >>> repr(tz) - 'datetime.timezone.utc' - >>> tz = timezone(timedelta(hours=-5), 'EST') - >>> repr(tz) - "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')" - """ - if self is self.utc: - return 'datetime.timezone.utc' - if self._name is None: - return "%s(%r)" % ('datetime.' + self.__class__.__name__, - self._offset) - return "%s(%r, %r)" % ('datetime.' + self.__class__.__name__, - self._offset, self._name) - - def __str__(self): - return self.tzname(None) - - def utcoffset(self, dt): - if isinstance(dt, datetime) or dt is None: - return self._offset - raise TypeError("utcoffset() argument must be a datetime instance" - " or None") - - def tzname(self, dt): - if isinstance(dt, datetime) or dt is None: - if self._name is None: - return self._name_from_offset(self._offset) - return self._name - raise TypeError("tzname() argument must be a datetime instance" - " or None") - - def dst(self, dt): - if isinstance(dt, datetime) or dt is None: - return None - raise TypeError("dst() argument must be a datetime instance" - " or None") - - def fromutc(self, dt): - if isinstance(dt, datetime): - if dt.tzinfo is not self: - raise ValueError("fromutc: dt.tzinfo " - "is not self") - return dt + self._offset - raise TypeError("fromutc() argument must be a datetime instance" - " or None") - - _maxoffset = timedelta(hours=23, minutes=59) - _minoffset = -_maxoffset - - @staticmethod - def _name_from_offset(delta): - if delta < timedelta(0): - sign = '-' - delta = -delta - else: - sign = '+' - hours, rest = divmod(delta, timedelta(hours=1)) - minutes = rest // timedelta(minutes=1) - return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes) - -timezone.utc = timezone._create(timedelta(0)) -timezone.min = timezone._create(timezone._minoffset) -timezone.max = timezone._create(timezone._maxoffset) -_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) -""" -Some time zone algebra. For a datetime x, let - x.n = x stripped of its timezone -- its naive time. - x.o = x.utcoffset(), and assuming that doesn't raise an exception or - return None - x.d = x.dst(), and assuming that doesn't raise an exception or - return None - x.s = x's standard offset, x.o - x.d - -Now some derived rules, where k is a duration (timedelta). - -1. x.o = x.s + x.d - This follows from the definition of x.s. - -2. If x and y have the same tzinfo member, x.s = y.s. - This is actually a requirement, an assumption we need to make about - sane tzinfo classes. - -3. The naive UTC time corresponding to x is x.n - x.o. - This is again a requirement for a sane tzinfo class. - -4. (x+k).s = x.s - This follows from #2, and that datimetimetz+timedelta preserves tzinfo. - -5. (x+k).n = x.n + k - Again follows from how arithmetic is defined. - -Now we can explain tz.fromutc(x). Let's assume it's an interesting case -(meaning that the various tzinfo methods exist, and don't blow up or return -None when called). - -The function wants to return a datetime y with timezone tz, equivalent to x. -x is already in UTC. - -By #3, we want - - y.n - y.o = x.n [1] - -The algorithm starts by attaching tz to x.n, and calling that y. So -x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] -becomes true; in effect, we want to solve [2] for k: - - (y+k).n - (y+k).o = x.n [2] - -By #1, this is the same as - - (y+k).n - ((y+k).s + (y+k).d) = x.n [3] - -By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. -Substituting that into [3], - - x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving - k - (y+k).s - (y+k).d = 0; rearranging, - k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so - k = y.s - (y+k).d - -On the RHS, (y+k).d can't be computed directly, but y.s can be, and we -approximate k by ignoring the (y+k).d term at first. Note that k can't be -very large, since all offset-returning methods return a duration of magnitude -less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must -be 0, so ignoring it has no consequence then. - -In any case, the new value is - - z = y + y.s [4] - -It's helpful to step back at look at [4] from a higher level: it's simply -mapping from UTC to tz's standard time. - -At this point, if - - z.n - z.o = x.n [5] - -we have an equivalent time, and are almost done. The insecurity here is -at the start of daylight time. Picture US Eastern for concreteness. The wall -time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good -sense then. The docs ask that an Eastern tzinfo class consider such a time to -be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST -on the day DST starts. We want to return the 1:MM EST spelling because that's -the only spelling that makes sense on the local wall clock. - -In fact, if [5] holds at this point, we do have the standard-time spelling, -but that takes a bit of proof. We first prove a stronger result. What's the -difference between the LHS and RHS of [5]? Let - - diff = x.n - (z.n - z.o) [6] - -Now - z.n = by [4] - (y + y.s).n = by #5 - y.n + y.s = since y.n = x.n - x.n + y.s = since z and y are have the same tzinfo member, - y.s = z.s by #2 - x.n + z.s - -Plugging that back into [6] gives - - diff = - x.n - ((x.n + z.s) - z.o) = expanding - x.n - x.n - z.s + z.o = cancelling - - z.s + z.o = by #2 - z.d - -So diff = z.d. - -If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time -spelling we wanted in the endcase described above. We're done. Contrarily, -if z.d = 0, then we have a UTC equivalent, and are also done. - -If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to -add to z (in effect, z is in tz's standard time, and we need to shift the -local clock into tz's daylight time). - -Let - - z' = z + z.d = z + diff [7] - -and we can again ask whether - - z'.n - z'.o = x.n [8] - -If so, we're done. If not, the tzinfo class is insane, according to the -assumptions we've made. This also requires a bit of proof. As before, let's -compute the difference between the LHS and RHS of [8] (and skipping some of -the justifications for the kinds of substitutions we've done several times -already): - - diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] - x.n - (z.n + diff - z'.o) = replacing diff via [6] - x.n - (z.n + x.n - (z.n - z.o) - z'.o) = - x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n - - z.n + z.n - z.o + z'.o = cancel z.n - - z.o + z'.o = #1 twice - -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo - z'.d - z.d - -So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, -we've found the UTC-equivalent so are done. In fact, we stop with [7] and -return z', not bothering to compute z'.d. - -How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by -a dst() offset, and starting *from* a time already in DST (we know z.d != 0), -would have to change the result dst() returns: we start in DST, and moving -a little further into it takes us out of DST. - -There isn't a sane case where this can happen. The closest it gets is at -the end of DST, where there's an hour in UTC with no spelling in a hybrid -tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During -that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM -UTC) because the docs insist on that, but 0:MM is taken as being in daylight -time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local -clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in -standard time. Since that's what the local clock *does*, we want to map both -UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous -in local time, but so it goes -- it's the way the local clock works. - -When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, -so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. -z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] -(correctly) concludes that z' is not UTC-equivalent to x. - -Because we know z.d said z was in daylight time (else [5] would have held and -we would have stopped then), and we know z.d != z'.d (else [8] would have held -and we have stopped then), and there are only 2 possible values dst() can -return in Eastern, it follows that z'.d must be 0 (which it is in the example, -but the reasoning doesn't depend on the example -- it depends on there being -two possible dst() outcomes, one zero and the other non-zero). Therefore -z' must be in standard time, and is the spelling we want in this case. - -Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is -concerned (because it takes z' as being in standard time rather than the -daylight time we intend here), but returning it gives the real-life "local -clock repeats an hour" behavior when mapping the "unspellable" UTC hour into -tz. - -When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with -the 1:MM standard time spelling we want. - -So how can this break? One of the assumptions must be violated. Two -possibilities: - -1) [2] effectively says that y.s is invariant across all y belong to a given - time zone. This isn't true if, for political reasons or continental drift, - a region decides to change its base offset from UTC. - -2) There may be versions of "double daylight" time where the tail end of - the analysis gives up a step too early. I haven't thought about that - enough to say. - -In any case, it's clear that the default fromutc() is strong enough to handle -"almost all" time zones: so long as the standard offset is invariant, it -doesn't matter if daylight time transition points change from year to year, or -if daylight time is skipped in some years; it doesn't matter how large or -small dst() may get within its bounds; and it doesn't even matter if some -perverse time zone returns a negative dst()). So a breaking case must be -pretty bizarre, and a tzinfo subclass can override fromutc() if it is. -""" -try: - from _datetime import * -except ImportError: - pass -else: - # Clean up unused names - del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH, - _DI100Y, _DI400Y, _DI4Y, _MAXORDINAL, _MONTHNAMES, - _build_struct_time, _call_tzinfo_method, _check_date_fields, - _check_time_fields, _check_tzinfo_arg, _check_tzname, - _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month, - _days_before_year, _days_in_month, _format_time, _is_leap, - _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class, - _wrap_strftime, _ymd2ord) - # XXX Since import * above excludes names that start with _, - # docstring does not get overwritten. In the future, it may be - # appropriate to maintain a single module level docstring and - # remove the following line. - from _datetime import __doc__ diff --git a/contrib/python/future/future/backports/email/__init__.py b/contrib/python/future/future/backports/email/__init__.py deleted file mode 100644 index f9523bc10bd..00000000000 --- a/contrib/python/future/future/backports/email/__init__.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -""" -Backport of the Python 3.3 email package for Python-Future. - -A package for parsing, handling, and generating email messages. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -# Install the surrogate escape handler here because this is used by many -# modules in the email package. -from future.utils import surrogateescape -surrogateescape.register_surrogateescape() -# (Should this be done globally by ``future``?) - - -__version__ = '5.1.0' - -__all__ = [ - 'base64mime', - 'charset', - 'encoders', - 'errors', - 'feedparser', - 'generator', - 'header', - 'iterators', - 'message', - 'message_from_file', - 'message_from_binary_file', - 'message_from_string', - 'message_from_bytes', - 'mime', - 'parser', - 'quoprimime', - 'utils', - ] - - - -# Some convenience routines. Don't import Parser and Message as side-effects -# of importing email since those cascadingly import most of the rest of the -# email package. -def message_from_string(s, *args, **kws): - """Parse a string into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import Parser - return Parser(*args, **kws).parsestr(s) - -def message_from_bytes(s, *args, **kws): - """Parse a bytes string into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import BytesParser - return BytesParser(*args, **kws).parsebytes(s) - -def message_from_file(fp, *args, **kws): - """Read a file and parse its contents into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import Parser - return Parser(*args, **kws).parse(fp) - -def message_from_binary_file(fp, *args, **kws): - """Read a binary file and parse its contents into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import BytesParser - return BytesParser(*args, **kws).parse(fp) diff --git a/contrib/python/future/future/backports/email/_encoded_words.py b/contrib/python/future/future/backports/email/_encoded_words.py deleted file mode 100644 index 7c4a5291466..00000000000 --- a/contrib/python/future/future/backports/email/_encoded_words.py +++ /dev/null @@ -1,232 +0,0 @@ -""" Routines for manipulating RFC2047 encoded words. - -This is currently a package-private API, but will be considered for promotion -to a public API if there is demand. - -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes -from future.builtins import chr -from future.builtins import int -from future.builtins import str - -# An ecoded word looks like this: -# -# =?charset[*lang]?cte?encoded_string?= -# -# for more information about charset see the charset module. Here it is one -# of the preferred MIME charset names (hopefully; you never know when parsing). -# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In -# theory other letters could be used for other encodings, but in practice this -# (almost?) never happens. There could be a public API for adding entries -# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is -# Base64. The meaning of encoded_string should be obvious. 'lang' is optional -# as indicated by the brackets (they are not part of the syntax) but is almost -# never encountered in practice. -# -# The general interface for a CTE decoder is that it takes the encoded_string -# as its argument, and returns a tuple (cte_decoded_string, defects). The -# cte_decoded_string is the original binary that was encoded using the -# specified cte. 'defects' is a list of MessageDefect instances indicating any -# problems encountered during conversion. 'charset' and 'lang' are the -# corresponding strings extracted from the EW, case preserved. -# -# The general interface for a CTE encoder is that it takes a binary sequence -# as input and returns the cte_encoded_string, which is an ascii-only string. -# -# Each decoder must also supply a length function that takes the binary -# sequence as its argument and returns the length of the resulting encoded -# string. -# -# The main API functions for the module are decode, which calls the decoder -# referenced by the cte specifier, and encode, which adds the appropriate -# RFC 2047 "chrome" to the encoded string, and can optionally automatically -# select the shortest possible encoding. See their docstrings below for -# details. - -import re -import base64 -import binascii -import functools -from string import ascii_letters, digits -from future.backports.email import errors - -__all__ = ['decode_q', - 'encode_q', - 'decode_b', - 'encode_b', - 'len_q', - 'len_b', - 'decode', - 'encode', - ] - -# -# Quoted Printable -# - -# regex based decoder. -_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) - -def decode_q(encoded): - encoded = bytes(encoded.replace(b'_', b' ')) - return _q_byte_subber(encoded), [] - - -# dict mapping bytes to their encoded form -class _QByteMap(dict): - - safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')) - - def __missing__(self, key): - if key in self.safe: - self[key] = chr(key) - else: - self[key] = "={:02X}".format(key) - return self[key] - -_q_byte_map = _QByteMap() - -# In headers spaces are mapped to '_'. -_q_byte_map[ord(' ')] = '_' - -def encode_q(bstring): - return str(''.join(_q_byte_map[x] for x in bytes(bstring))) - -def len_q(bstring): - return sum(len(_q_byte_map[x]) for x in bytes(bstring)) - - -# -# Base64 -# - -def decode_b(encoded): - defects = [] - pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded - try: - # The validate kwarg to b64decode is not supported in Py2.x - if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded): - raise binascii.Error('Non-base64 digit found') - return base64.b64decode(padded_encoded), defects - except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] - # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: - try: - return base64.b64decode(encoded+b'='*i), defects - except (binascii.Error, TypeError): # Py2 raises a TypeError - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") - -def encode_b(bstring): - return base64.b64encode(bstring).decode('ascii') - -def len_b(bstring): - groups_of_3, leftover = divmod(len(bstring), 3) - # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. - return groups_of_3 * 4 + (4 if leftover else 0) - - -_cte_decoders = { - 'q': decode_q, - 'b': decode_b, - } - -def decode(ew): - """Decode encoded word and return (string, charset, lang, defects) tuple. - - An RFC 2047/2243 encoded word has the form: - - =?charset*lang?cte?encoded_string?= - - where '*lang' may be omitted but the other parts may not be. - - This function expects exactly such a string (that is, it does not check the - syntax and may raise errors if the string is not well formed), and returns - the encoded_string decoded first from its Content Transfer Encoding and - then from the resulting bytes into unicode using the specified charset. If - the cte-decoded string does not successfully decode using the specified - character set, a defect is added to the defects list and the unknown octets - are replaced by the unicode 'unknown' character \uFDFF. - - The specified charset and language are returned. The default for language, - which is rarely if ever encountered, is the empty string. - - """ - _, charset, cte, cte_string, _ = str(ew).split('?') - charset, _, lang = charset.partition('*') - cte = cte.lower() - # Recover the original bytes and do CTE decoding. - bstring = cte_string.encode('ascii', 'surrogateescape') - bstring, defects = _cte_decoders[cte](bstring) - # Turn the CTE decoded bytes into unicode. - try: - string = bstring.decode(charset) - except UnicodeError: - defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) - string = bstring.decode(charset, 'surrogateescape') - except LookupError: - string = bstring.decode('ascii', 'surrogateescape') - if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) - return string, charset, lang, defects - - -_cte_encoders = { - 'q': encode_q, - 'b': encode_b, - } - -_cte_encode_length = { - 'q': len_q, - 'b': len_b, - } - -def encode(string, charset='utf-8', encoding=None, lang=''): - """Encode string using the CTE encoding that produces the shorter result. - - Produces an RFC 2047/2243 encoded word of the form: - - =?charset*lang?cte?encoded_string?= - - where '*lang' is omitted unless the 'lang' parameter is given a value. - Optional argument charset (defaults to utf-8) specifies the charset to use - to encode the string to binary before CTE encoding it. Optional argument - 'encoding' is the cte specifier for the encoding that should be used ('q' - or 'b'); if it is None (the default) the encoding which produces the - shortest encoded sequence is used, except that 'q' is preferred if it is up - to five characters longer. Optional argument 'lang' (default '') gives the - RFC 2243 language string to specify in the encoded word. - - """ - string = str(string) - if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') - else: - bstring = string.encode(charset) - if encoding is None: - qlen = _cte_encode_length['q'](bstring) - blen = _cte_encode_length['b'](bstring) - # Bias toward q. 5 is arbitrary. - encoding = 'q' if qlen - blen < 5 else 'b' - encoded = _cte_encoders[encoding](bstring) - if lang: - lang = '*' + lang - return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded) diff --git a/contrib/python/future/future/backports/email/_header_value_parser.py b/contrib/python/future/future/backports/email/_header_value_parser.py deleted file mode 100644 index 43957edc12f..00000000000 --- a/contrib/python/future/future/backports/email/_header_value_parser.py +++ /dev/null @@ -1,2965 +0,0 @@ -"""Header value parser implementing various email-related RFC parsing rules. - -The parsing methods defined in this module implement various email related -parsing rules. Principal among them is RFC 5322, which is the followon -to RFC 2822 and primarily a clarification of the former. It also implements -RFC 2047 encoded word decoding. - -RFC 5322 goes to considerable trouble to maintain backward compatibility with -RFC 822 in the parse phase, while cleaning up the structure on the generation -phase. This parser supports correct RFC 5322 generation by tagging white space -as folding white space only when folding is allowed in the non-obsolete rule -sets. Actually, the parser is even more generous when accepting input than RFC -5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages. -Where possible deviations from the standard are annotated on the 'defects' -attribute of tokens that deviate. - -The general structure of the parser follows RFC 5322, and uses its terminology -where there is a direct correspondence. Where the implementation requires a -somewhat different structure than that used by the formal grammar, new terms -that mimic the closest existing terms are used. Thus, it really helps to have -a copy of RFC 5322 handy when studying this code. - -Input to the parser is a string that has already been unfolded according to -RFC 5322 rules. According to the RFC this unfolding is the very first step, and -this parser leaves the unfolding step to a higher level message parser, which -will have already detected the line breaks that need unfolding while -determining the beginning and end of each header. - -The output of the parser is a TokenList object, which is a list subclass. A -TokenList is a recursive data structure. The terminal nodes of the structure -are Terminal objects, which are subclasses of str. These do not correspond -directly to terminal objects in the formal grammar, but are instead more -practical higher level combinations of true terminals. - -All TokenList and Terminal objects have a 'value' attribute, which produces the -semantically meaningful value of that part of the parse subtree. The value of -all whitespace tokens (no matter how many sub-tokens they may contain) is a -single space, as per the RFC rules. This includes 'CFWS', which is herein -included in the general class of whitespace tokens. There is one exception to -the rule that whitespace tokens are collapsed into single spaces in values: in -the value of a 'bare-quoted-string' (a quoted-string with no leading or -trailing whitespace), any whitespace that appeared between the quotation marks -is preserved in the returned value. Note that in all Terminal strings quoted -pairs are turned into their unquoted values. - -All TokenList and Terminal objects also have a string value, which attempts to -be a "canonical" representation of the RFC-compliant form of the substring that -produced the parsed subtree, including minimal use of quoted pair quoting. -Whitespace runs are not collapsed. - -Comment tokens also have a 'content' attribute providing the string found -between the parens (including any nested comments) with whitespace preserved. - -All TokenList and Terminal objects have a 'defects' attribute which is a -possibly empty list all of the defects found while creating the token. Defects -may appear on any token in the tree, and a composite list of all defects in the -subtree is available through the 'all_defects' attribute of any node. (For -Terminal notes x.defects == x.all_defects.) - -Each object in a parse tree is called a 'token', and each has a 'token_type' -attribute that gives the name from the RFC 5322 grammar that it represents. -Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that -may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters. -It is returned in place of lists of (ctext/quoted-pair) and -(qtext/quoted-pair). - -XXX: provide complete list of token types. -""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import int, range, str, super, list - -import re -from collections import namedtuple, OrderedDict - -from future.backports.urllib.parse import (unquote, unquote_to_bytes) -from future.backports.email import _encoded_words as _ew -from future.backports.email import errors -from future.backports.email import utils - -# -# Useful constants and functions -# - -WSP = set(' \t') -CFWS_LEADER = WSP | set('(') -SPECIALS = set(r'()<>@,:;.\"[]') -ATOM_ENDS = SPECIALS | WSP -DOT_ATOM_ENDS = ATOM_ENDS - set('.') -# '.', '"', and '(' do not end phrases in order to support obs-phrase -PHRASE_ENDS = SPECIALS - set('."(') -TSPECIALS = (SPECIALS | set('/?=')) - set('.') -TOKEN_ENDS = TSPECIALS | WSP -ASPECIALS = TSPECIALS | set("*'%") -ATTRIBUTE_ENDS = ASPECIALS | WSP -EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') - -def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' - -# -# Accumulator for header folding -# - -class _Folded(object): - - def __init__(self, maxlen, policy): - self.maxlen = maxlen - self.policy = policy - self.lastlen = 0 - self.stickyspace = None - self.firstline = True - self.done = [] - self.current = list() # uses l.clear() - - def newline(self): - self.done.extend(self.current) - self.done.append(self.policy.linesep) - self.current.clear() - self.lastlen = 0 - - def finalize(self): - if self.current: - self.newline() - - def __str__(self): - return ''.join(self.done) - - def append(self, stoken): - self.current.append(stoken) - - def append_if_fits(self, token, stoken=None): - if stoken is None: - stoken = str(token) - l = len(stoken) - if self.stickyspace is not None: - stickyspace_len = len(self.stickyspace) - if self.lastlen + stickyspace_len + l <= self.maxlen: - self.current.append(self.stickyspace) - self.lastlen += stickyspace_len - self.current.append(stoken) - self.lastlen += l - self.stickyspace = None - self.firstline = False - return True - if token.has_fws: - ws = token.pop_leading_fws() - if ws is not None: - self.stickyspace += str(ws) - stickyspace_len += len(ws) - token._fold(self) - return True - if stickyspace_len and l + 1 <= self.maxlen: - margin = self.maxlen - l - if 0 < margin < stickyspace_len: - trim = stickyspace_len - margin - self.current.append(self.stickyspace[:trim]) - self.stickyspace = self.stickyspace[trim:] - stickyspace_len = trim - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.lastlen = l + stickyspace_len - self.stickyspace = None - self.firstline = False - return True - if not self.firstline: - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.stickyspace = None - self.firstline = False - return True - if self.lastlen + l <= self.maxlen: - self.current.append(stoken) - self.lastlen += l - return True - if l < self.maxlen: - self.newline() - self.current.append(stoken) - self.lastlen = l - return True - return False - -# -# TokenList and its subclasses -# - -class TokenList(list): - - token_type = None - - def __init__(self, *args, **kw): - super(TokenList, self).__init__(*args, **kw) - self.defects = [] - - def __str__(self): - return ''.join(str(x) for x in self) - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, - super(TokenList, self).__repr__()) - - @property - def value(self): - return ''.join(x.value for x in self if x.value) - - @property - def all_defects(self): - return sum((x.all_defects for x in self), self.defects) - - # - # Folding API - # - # parts(): - # - # return a list of objects that constitute the "higher level syntactic - # objects" specified by the RFC as the best places to fold a header line. - # The returned objects must include leading folding white space, even if - # this means mutating the underlying parse tree of the object. Each object - # is only responsible for returning *its* parts, and should not drill down - # to any lower level except as required to meet the leading folding white - # space constraint. - # - # _fold(folded): - # - # folded: the result accumulator. This is an instance of _Folded. - # (XXX: I haven't finished factoring this out yet, the folding code - # pretty much uses this as a state object.) When the folded.current - # contains as much text as will fit, the _fold method should call - # folded.newline. - # folded.lastlen: the current length of the test stored in folded.current. - # folded.maxlen: The maximum number of characters that may appear on a - # folded line. Differs from the policy setting in that "no limit" is - # represented by +inf, which means it can be used in the trivially - # logical fashion in comparisons. - # - # Currently no subclasses implement parts, and I think this will remain - # true. A subclass only needs to implement _fold when the generic version - # isn't sufficient. _fold will need to be implemented primarily when it is - # possible for encoded words to appear in the specialized token-list, since - # there is no generic algorithm that can know where exactly the encoded - # words are allowed. A _fold implementation is responsible for filling - # lines in the same general way that the top level _fold does. It may, and - # should, call the _fold method of sub-objects in a similar fashion to that - # of the top level _fold. - # - # XXX: I'm hoping it will be possible to factor the existing code further - # to reduce redundancy and make the logic clearer. - - @property - def parts(self): - klass = self.__class__ - this = list() - for token in self: - if token.startswith_fws(): - if this: - yield this[0] if len(this)==1 else klass(this) - this.clear() - end_ws = token.pop_trailing_ws() - this.append(token) - if end_ws: - yield klass(this) - this = [end_ws] - if this: - yield this[0] if len(this)==1 else klass(this) - - def startswith_fws(self): - return self[0].startswith_fws() - - def pop_leading_fws(self): - if self[0].token_type == 'fws': - return self.pop(0) - return self[0].pop_leading_fws() - - def pop_trailing_ws(self): - if self[-1].token_type == 'cfws': - return self.pop(-1) - return self[-1].pop_trailing_ws() - - @property - def has_fws(self): - for part in self: - if part.has_fws: - return True - return False - - def has_leading_comment(self): - return self[0].has_leading_comment() - - @property - def comments(self): - comments = [] - for token in self: - comments.extend(token.comments) - return comments - - def fold(self, **_3to2kwargs): - # max_line_length 0/None means no limit, ie: infinitely long. - policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - maxlen = policy.max_line_length or float("+inf") - folded = _Folded(maxlen, policy) - self._fold(folded) - folded.finalize() - return str(folded) - - def as_encoded_word(self, charset): - # This works only for things returned by 'parts', which include - # the leading fws, if any, that should be used. - res = [] - ws = self.pop_leading_fws() - if ws: - res.append(ws) - trailer = self.pop(-1) if self[-1].token_type=='fws' else '' - res.append(_ew.encode(str(self), charset)) - res.append(trailer) - return ''.join(res) - - def cte_encode(self, charset, policy): - res = [] - for part in self: - res.append(part.cte_encode(charset, policy)) - return ''.join(res) - - def _fold(self, folded): - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - # XXX: this should be a policy setting - charset = 'utf-8' - tstr = part.cte_encode(charset, folded.policy) - tlen = len(tstr) - if folded.append_if_fits(part, tstr): - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - # Peel off the leading whitespace and make it sticky, to - # avoid infinite recursion. - folded.stickyspace = str(part.pop(0)) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # There are no fold points in this one; it is too long for a single - # line and can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - - def pprint(self, indent=''): - print('\n'.join(self._pp(indent=''))) - - def ppstr(self, indent=''): - return '\n'.join(self._pp(indent='')) - - def _pp(self, indent=''): - yield '{}{}/{}('.format( - indent, - self.__class__.__name__, - self.token_type) - for token in self: - if not hasattr(token, '_pp'): - yield (indent + ' !! invalid element in token ' - 'list: {!r}'.format(token)) - else: - for line in token._pp(indent+' '): - yield line - if self.defects: - extra = ' Defects: {}'.format(self.defects) - else: - extra = '' - yield '{}){}'.format(indent, extra) - - -class WhiteSpaceTokenList(TokenList): - - @property - def value(self): - return ' ' - - @property - def comments(self): - return [x.content for x in self if x.token_type=='comment'] - - -class UnstructuredTokenList(TokenList): - - token_type = 'unstructured' - - def _fold(self, folded): - if any(x.token_type=='encoded-word' for x in self): - return self._fold_encoded(folded) - # Here we can have either a pure ASCII string that may or may not - # have surrogateescape encoded bytes, or a unicode string. - last_ew = None - for part in self.parts: - tstr = str(part) - is_ew = False - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None: - # We've already done an EW, combine this one with it - # if there's room. - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - oldlastlen = sum(len(x) for x in folded.current[:last_ew]) - schunk = str(chunk) - lchunk = len(schunk) - if oldlastlen + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = oldlastlen + lchunk - continue - tstr = part.as_encoded_word(charset) - is_ew = True - if folded.append_if_fits(part, tstr): - if is_ew: - last_ew = len(folded.current) - 1 - continue - if is_ew or last_ew: - # It's too big to fit on the line, but since we've - # got encoded words we can use encoded word folding. - part._fold_as_ew(folded) - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - folded.stickyspace = str(ws) - if folded.append_if_fits(part): - continue - if part.has_fws: - part.fold(folded) - continue - # It can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - last_ew = None - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - if last_ew is None: - res.append(part.cte_encode(charset, policy)) - last_ew = len(res) - else: - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res.append(tl.as_encoded_word()) - return ''.join(res) - - -class Phrase(TokenList): - - token_type = 'phrase' - - def _fold(self, folded): - # As with Unstructured, we can have pure ASCII with or without - # surrogateescape encoded bytes, or we could have unicode. But this - # case is more complicated, since we have to deal with the various - # sub-token types and how they can be composed in the face of - # unicode-that-needs-CTE-encoding, and the fact that if a token a - # comment that becomes a barrier across which we can't compose encoded - # words. - last_ew = None - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - has_ew = False - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None and not part.has_leading_comment(): - # We've already done an EW, let's see if we can combine - # this one with it. The last_ew logic ensures that all we - # have at this point is atoms, no comments or quoted - # strings. So we can treat the text between the last - # encoded word and the content of this token as - # unstructured text, and things will work correctly. But - # we have to strip off any trailing comment on this token - # first, and if it is a quoted string we have to pull out - # the content (we're encoding it, so it no longer needs to - # be quoted). - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - schunk = str(chunk) - lchunk = len(schunk) - if last_ew + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = sum(len(x) for x in folded.current) - continue - tstr = part.as_encoded_word(charset) - tlen = len(tstr) - has_ew = True - if folded.append_if_fits(part, tstr): - if has_ew and not part.comments: - last_ew = len(folded.current) - 1 - elif part.comments or part.token_type == 'quoted-string': - # If a comment is involved we can't combine EWs. And if a - # quoted string is involved, it's not worth the effort to - # try to combine them. - last_ew = None - continue - part._fold(folded) - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - is_ew = False - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - is_ew = True - if last_ew is None: - if not part.comments: - last_ew = len(res) - res.append(part.cte_encode(charset, policy)) - elif not part.has_leading_comment(): - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res[last_ew:] = [tl.as_encoded_word(charset)] - if part.comments or (not is_ew and part.token_type == 'quoted-string'): - last_ew = None - return ''.join(res) - -class Word(TokenList): - - token_type = 'word' - - -class CFWSList(WhiteSpaceTokenList): - - token_type = 'cfws' - - def has_leading_comment(self): - return bool(self.comments) - - -class Atom(TokenList): - - token_type = 'atom' - - -class Token(TokenList): - - token_type = 'token' - - -class EncodedWord(TokenList): - - token_type = 'encoded-word' - cte = None - charset = None - lang = None - - @property - def encoded(self): - if self.cte is not None: - return self.cte - _ew.encode(str(self), self.charset) - - - -class QuotedString(TokenList): - - token_type = 'quoted-string' - - @property - def content(self): - for x in self: - if x.token_type == 'bare-quoted-string': - return x.value - - @property - def quoted_value(self): - res = [] - for x in self: - if x.token_type == 'bare-quoted-string': - res.append(str(x)) - else: - res.append(x.value) - return ''.join(res) - - @property - def stripped_value(self): - for token in self: - if token.token_type == 'bare-quoted-string': - return token.value - - -class BareQuotedString(QuotedString): - - token_type = 'bare-quoted-string' - - def __str__(self): - return quote_string(''.join(str(x) for x in self)) - - @property - def value(self): - return ''.join(str(x) for x in self) - - -class Comment(WhiteSpaceTokenList): - - token_type = 'comment' - - def __str__(self): - return ''.join(sum([ - ["("], - [self.quote(x) for x in self], - [")"], - ], [])) - - def quote(self, value): - if value.token_type == 'comment': - return str(value) - return str(value).replace('\\', '\\\\').replace( - '(', '\(').replace( - ')', '\)') - - @property - def content(self): - return ''.join(str(x) for x in self) - - @property - def comments(self): - return [self.content] - -class AddressList(TokenList): - - token_type = 'address-list' - - @property - def addresses(self): - return [x for x in self if x.token_type=='address'] - - @property - def mailboxes(self): - return sum((x.mailboxes - for x in self if x.token_type=='address'), []) - - @property - def all_mailboxes(self): - return sum((x.all_mailboxes - for x in self if x.token_type=='address'), []) - - -class Address(TokenList): - - token_type = 'address' - - @property - def display_name(self): - if self[0].token_type == 'group': - return self[0].display_name - - @property - def mailboxes(self): - if self[0].token_type == 'mailbox': - return [self[0]] - elif self[0].token_type == 'invalid-mailbox': - return [] - return self[0].mailboxes - - @property - def all_mailboxes(self): - if self[0].token_type == 'mailbox': - return [self[0]] - elif self[0].token_type == 'invalid-mailbox': - return [self[0]] - return self[0].all_mailboxes - -class MailboxList(TokenList): - - token_type = 'mailbox-list' - - @property - def mailboxes(self): - return [x for x in self if x.token_type=='mailbox'] - - @property - def all_mailboxes(self): - return [x for x in self - if x.token_type in ('mailbox', 'invalid-mailbox')] - - -class GroupList(TokenList): - - token_type = 'group-list' - - @property - def mailboxes(self): - if not self or self[0].token_type != 'mailbox-list': - return [] - return self[0].mailboxes - - @property - def all_mailboxes(self): - if not self or self[0].token_type != 'mailbox-list': - return [] - return self[0].all_mailboxes - - -class Group(TokenList): - - token_type = "group" - - @property - def mailboxes(self): - if self[2].token_type != 'group-list': - return [] - return self[2].mailboxes - - @property - def all_mailboxes(self): - if self[2].token_type != 'group-list': - return [] - return self[2].all_mailboxes - - @property - def display_name(self): - return self[0].display_name - - -class NameAddr(TokenList): - - token_type = 'name-addr' - - @property - def display_name(self): - if len(self) == 1: - return None - return self[0].display_name - - @property - def local_part(self): - return self[-1].local_part - - @property - def domain(self): - return self[-1].domain - - @property - def route(self): - return self[-1].route - - @property - def addr_spec(self): - return self[-1].addr_spec - - -class AngleAddr(TokenList): - - token_type = 'angle-addr' - - @property - def local_part(self): - for x in self: - if x.token_type == 'addr-spec': - return x.local_part - - @property - def domain(self): - for x in self: - if x.token_type == 'addr-spec': - return x.domain - - @property - def route(self): - for x in self: - if x.token_type == 'obs-route': - return x.domains - - @property - def addr_spec(self): - for x in self: - if x.token_type == 'addr-spec': - return x.addr_spec - else: - return '<>' - - -class ObsRoute(TokenList): - - token_type = 'obs-route' - - @property - def domains(self): - return [x.domain for x in self if x.token_type == 'domain'] - - -class Mailbox(TokenList): - - token_type = 'mailbox' - - @property - def display_name(self): - if self[0].token_type == 'name-addr': - return self[0].display_name - - @property - def local_part(self): - return self[0].local_part - - @property - def domain(self): - return self[0].domain - - @property - def route(self): - if self[0].token_type == 'name-addr': - return self[0].route - - @property - def addr_spec(self): - return self[0].addr_spec - - -class InvalidMailbox(TokenList): - - token_type = 'invalid-mailbox' - - @property - def display_name(self): - return None - - local_part = domain = route = addr_spec = display_name - - -class Domain(TokenList): - - token_type = 'domain' - - @property - def domain(self): - return ''.join(super(Domain, self).value.split()) - - -class DotAtom(TokenList): - - token_type = 'dot-atom' - - -class DotAtomText(TokenList): - - token_type = 'dot-atom-text' - - -class AddrSpec(TokenList): - - token_type = 'addr-spec' - - @property - def local_part(self): - return self[0].local_part - - @property - def domain(self): - if len(self) < 3: - return None - return self[-1].domain - - @property - def value(self): - if len(self) < 3: - return self[0].value - return self[0].value.rstrip()+self[1].value+self[2].value.lstrip() - - @property - def addr_spec(self): - nameset = set(self.local_part) - if len(nameset) > len(nameset-DOT_ATOM_ENDS): - lp = quote_string(self.local_part) - else: - lp = self.local_part - if self.domain is not None: - return lp + '@' + self.domain - return lp - - -class ObsLocalPart(TokenList): - - token_type = 'obs-local-part' - - -class DisplayName(Phrase): - - token_type = 'display-name' - - @property - def display_name(self): - res = TokenList(self) - if res[0].token_type == 'cfws': - res.pop(0) - else: - if res[0][0].token_type == 'cfws': - res[0] = TokenList(res[0][1:]) - if res[-1].token_type == 'cfws': - res.pop() - else: - if res[-1][-1].token_type == 'cfws': - res[-1] = TokenList(res[-1][:-1]) - return res.value - - @property - def value(self): - quote = False - if self.defects: - quote = True - else: - for x in self: - if x.token_type == 'quoted-string': - quote = True - if quote: - pre = post = '' - if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': - pre = ' ' - if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': - post = ' ' - return pre+quote_string(self.display_name)+post - else: - return super(DisplayName, self).value - - -class LocalPart(TokenList): - - token_type = 'local-part' - - @property - def value(self): - if self[0].token_type == "quoted-string": - return self[0].quoted_value - else: - return self[0].value - - @property - def local_part(self): - # Strip whitespace from front, back, and around dots. - res = [DOT] - last = DOT - last_is_tl = False - for tok in self[0] + [DOT]: - if tok.token_type == 'cfws': - continue - if (last_is_tl and tok.token_type == 'dot' and - last[-1].token_type == 'cfws'): - res[-1] = TokenList(last[:-1]) - is_tl = isinstance(tok, TokenList) - if (is_tl and last.token_type == 'dot' and - tok[0].token_type == 'cfws'): - res.append(TokenList(tok[1:])) - else: - res.append(tok) - last = res[-1] - last_is_tl = is_tl - res = TokenList(res[1:-1]) - return res.value - - -class DomainLiteral(TokenList): - - token_type = 'domain-literal' - - @property - def domain(self): - return ''.join(super(DomainLiteral, self).value.split()) - - @property - def ip(self): - for x in self: - if x.token_type == 'ptext': - return x.value - - -class MIMEVersion(TokenList): - - token_type = 'mime-version' - major = None - minor = None - - -class Parameter(TokenList): - - token_type = 'parameter' - sectioned = False - extended = False - charset = 'us-ascii' - - @property - def section_number(self): - # Because the first token, the attribute (name) eats CFWS, the second - # token is always the section if there is one. - return self[1].number if self.sectioned else 0 - - @property - def param_value(self): - # This is part of the "handle quoted extended parameters" hack. - for token in self: - if token.token_type == 'value': - return token.stripped_value - if token.token_type == 'quoted-string': - for token in token: - if token.token_type == 'bare-quoted-string': - for token in token: - if token.token_type == 'value': - return token.stripped_value - return '' - - -class InvalidParameter(Parameter): - - token_type = 'invalid-parameter' - - -class Attribute(TokenList): - - token_type = 'attribute' - - @property - def stripped_value(self): - for token in self: - if token.token_type.endswith('attrtext'): - return token.value - -class Section(TokenList): - - token_type = 'section' - number = None - - -class Value(TokenList): - - token_type = 'value' - - @property - def stripped_value(self): - token = self[0] - if token.token_type == 'cfws': - token = self[1] - if token.token_type.endswith( - ('quoted-string', 'attribute', 'extended-attribute')): - return token.stripped_value - return self.value - - -class MimeParameters(TokenList): - - token_type = 'mime-parameters' - - @property - def params(self): - # The RFC specifically states that the ordering of parameters is not - # guaranteed and may be reordered by the transport layer. So we have - # to assume the RFC 2231 pieces can come in any order. However, we - # output them in the order that we first see a given name, which gives - # us a stable __str__. - params = OrderedDict() - for token in self: - if not token.token_type.endswith('parameter'): - continue - if token[0].token_type != 'attribute': - continue - name = token[0].value.strip() - if name not in params: - params[name] = [] - params[name].append((token.section_number, token)) - for name, parts in params.items(): - parts = sorted(parts) - # XXX: there might be more recovery we could do here if, for - # example, this is really a case of a duplicate attribute name. - value_parts = [] - charset = parts[0][1].charset - for i, (section_number, param) in enumerate(parts): - if section_number != i: - param.defects.append(errors.InvalidHeaderDefect( - "inconsistent multipart parameter numbering")) - value = param.param_value - if param.extended: - try: - value = unquote_to_bytes(value) - except UnicodeEncodeError: - # source had surrogate escaped bytes. What we do now - # is a bit of an open question. I'm not sure this is - # the best choice, but it is what the old algorithm did - value = unquote(value, encoding='latin-1') - else: - try: - value = value.decode(charset, 'surrogateescape') - except LookupError: - # XXX: there should really be a custom defect for - # unknown character set to make it easy to find, - # because otherwise unknown charset is a silent - # failure. - value = value.decode('us-ascii', 'surrogateescape') - if utils._has_surrogates(value): - param.defects.append(errors.UndecodableBytesDefect()) - value_parts.append(value) - value = ''.join(value_parts) - yield name, value - - def __str__(self): - params = [] - for name, value in self.params: - if value: - params.append('{}={}'.format(name, quote_string(value))) - else: - params.append(name) - params = '; '.join(params) - return ' ' + params if params else '' - - -class ParameterizedHeaderValue(TokenList): - - @property - def params(self): - for token in reversed(self): - if token.token_type == 'mime-parameters': - return token.params - return {} - - @property - def parts(self): - if self and self[-1].token_type == 'mime-parameters': - # We don't want to start a new line if all of the params don't fit - # after the value, so unwrap the parameter list. - return TokenList(self[:-1] + self[-1]) - return TokenList(self).parts - - -class ContentType(ParameterizedHeaderValue): - - token_type = 'content-type' - maintype = 'text' - subtype = 'plain' - - -class ContentDisposition(ParameterizedHeaderValue): - - token_type = 'content-disposition' - content_disposition = None - - -class ContentTransferEncoding(TokenList): - - token_type = 'content-transfer-encoding' - cte = '7bit' - - -class HeaderLabel(TokenList): - - token_type = 'header-label' - - -class Header(TokenList): - - token_type = 'header' - - def _fold(self, folded): - folded.append(str(self.pop(0))) - folded.lastlen = len(folded.current[0]) - # The first line of the header is different from all others: we don't - # want to start a new object on a new line if it has any fold points in - # it that would allow part of it to be on the first header line. - # Further, if the first fold point would fit on the new line, we want - # to do that, but if it doesn't we want to put it on the first line. - # Folded supports this via the stickyspace attribute. If this - # attribute is not None, it does the special handling. - folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' - rest = self.pop(0) - if self: - raise ValueError("Malformed Header token list") - rest._fold(folded) - - -# -# Terminal classes and instances -# - -class Terminal(str): - - def __new__(cls, value, token_type): - self = super(Terminal, cls).__new__(cls, value) - self.token_type = token_type - self.defects = [] - return self - - def __repr__(self): - return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__()) - - @property - def all_defects(self): - return list(self.defects) - - def _pp(self, indent=''): - return ["{}{}/{}({}){}".format( - indent, - self.__class__.__name__, - self.token_type, - super(Terminal, self).__repr__(), - '' if not self.defects else ' {}'.format(self.defects), - )] - - def cte_encode(self, charset, policy): - value = str(self) - try: - value.encode('us-ascii') - return value - except UnicodeEncodeError: - return _ew.encode(value, charset) - - def pop_trailing_ws(self): - # This terminates the recursion. - return None - - def pop_leading_fws(self): - # This terminates the recursion. - return None - - @property - def comments(self): - return [] - - def has_leading_comment(self): - return False - - def __getnewargs__(self): - return(str(self), self.token_type) - - -class WhiteSpaceTerminal(Terminal): - - @property - def value(self): - return ' ' - - def startswith_fws(self): - return True - - has_fws = True - - -class ValueTerminal(Terminal): - - @property - def value(self): - return self - - def startswith_fws(self): - return False - - has_fws = False - - def as_encoded_word(self, charset): - return _ew.encode(str(self), charset) - - -class EWWhiteSpaceTerminal(WhiteSpaceTerminal): - - @property - def value(self): - return '' - - @property - def encoded(self): - return self[:] - - def __str__(self): - return '' - - has_fws = True - - -# XXX these need to become classes and used as instances so -# that a program can't change them in a parse tree and screw -# up other parse trees. Maybe should have tests for that, too. -DOT = ValueTerminal('.', 'dot') -ListSeparator = ValueTerminal(',', 'list-separator') -RouteComponentMarker = ValueTerminal('@', 'route-component-marker') - -# -# Parser -# - -"""Parse strings according to RFC822/2047/2822/5322 rules. - -This is a stateless parser. Each get_XXX function accepts a string and -returns either a Terminal or a TokenList representing the RFC object named -by the method and a string containing the remaining unparsed characters -from the input. Thus a parser method consumes the next syntactic construct -of a given type and returns a token representing the construct plus the -unparsed remainder of the input string. - -For example, if the first element of a structured header is a 'phrase', -then: - - phrase, value = get_phrase(value) - -returns the complete phrase from the start of the string value, plus any -characters left in the string after the phrase is removed. - -""" - -_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split -_non_atom_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall -_non_token_end_matcher = re.compile(r"[^{}]+".format( - ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( - '\\','\\\\').replace(']','\]'))).match - -def _validate_xtext(xtext): - """If input token contains ASCII non-printables, register a defect.""" - - non_printables = _non_printable_finder(xtext) - if non_printables: - xtext.defects.append(errors.NonPrintableDefect(non_printables)) - if utils._has_surrogates(xtext): - xtext.defects.append(errors.UndecodableBytesDefect( - "Non-ASCII characters found in header token")) - -def _get_ptext_to_endchars(value, endchars): - """Scan printables/quoted-pairs until endchars and return unquoted ptext. - - This function turns a run of qcontent, ccontent-without-comments, or - dtext-with-quoted-printables into a single string by unquoting any - quoted printables. It returns the string, the remaining value, and - a flag that is True iff there were any quoted printables decoded. - - """ - _3to2list = list(_wsp_splitter(value, 1)) - fragment, remainder, = _3to2list[:1] + [_3to2list[1:]] - vchars = [] - escape = False - had_qp = False - for pos in range(len(fragment)): - if fragment[pos] == '\\': - if escape: - escape = False - had_qp = True - else: - escape = True - continue - if escape: - escape = False - elif fragment[pos] in endchars: - break - vchars.append(fragment[pos]) - else: - pos = pos + 1 - return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp - -def _decode_ew_run(value): - """ Decode a run of RFC2047 encoded words. - - _decode_ew_run(value) -> (text, value, defects) - - Scans the supplied value for a run of tokens that look like they are RFC - 2047 encoded words, decodes those words into text according to RFC 2047 - rules (whitespace between encoded words is discarded), and returns the text - and the remaining value (including any leading whitespace on the remaining - value), as well as a list of any defects encountered while decoding. The - input value may not have any leading whitespace. - - """ - res = [] - defects = [] - last_ws = '' - while value: - try: - tok, ws, value = _wsp_splitter(value, 1) - except ValueError: - tok, ws, value = value, '', '' - if not (tok.startswith('=?') and tok.endswith('?=')): - return ''.join(res), last_ws + tok + ws + value, defects - text, charset, lang, new_defects = _ew.decode(tok) - res.append(text) - defects.extend(new_defects) - last_ws = ws - return ''.join(res), last_ws, defects - -def get_fws(value): - """FWS = 1*WSP - - This isn't the RFC definition. We're using fws to represent tokens where - folding can be done, but when we are parsing the *un*folding has already - been done so we don't need to watch out for CRLF. - - """ - newvalue = value.lstrip() - fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') - return fws, newvalue - -def get_encoded_word(value): - """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" - - """ - ew = EncodedWord() - if not value.startswith('=?'): - raise errors.HeaderParseError( - "expected encoded word but found {}".format(value)) - _3to2list1 = list(value[2:].split('?=', 1)) - tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]] - if tok == value[2:]: - raise errors.HeaderParseError( - "expected encoded word but found {}".format(value)) - remstr = ''.join(remainder) - if remstr[:2].isdigit(): - _3to2list3 = list(remstr.split('?=', 1)) - rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]] - tok = tok + '?=' + rest - if len(tok.split()) > 1: - ew.defects.append(errors.InvalidHeaderDefect( - "whitespace inside encoded word")) - ew.cte = value - value = ''.join(remainder) - try: - text, charset, lang, defects = _ew.decode('=?' + tok + '?=') - except ValueError: - raise errors.HeaderParseError( - "encoded word format invalid: '{}'".format(ew.cte)) - ew.charset = charset - ew.lang = lang - ew.defects.extend(defects) - while text: - if text[0] in WSP: - token, text = get_fws(text) - ew.append(token) - continue - _3to2list5 = list(_wsp_splitter(text, 1)) - chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]] - vtext = ValueTerminal(chars, 'vtext') - _validate_xtext(vtext) - ew.append(vtext) - text = ''.join(remainder) - return ew, value - -def get_unstructured(value): - """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct - obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS) - obs-utext = %d0 / obs-NO-WS-CTL / LF / CR - - obs-NO-WS-CTL is control characters except WSP/CR/LF. - - So, basically, we have printable runs, plus control characters or nulls in - the obsolete syntax, separated by whitespace. Since RFC 2047 uses the - obsolete syntax in its specification, but requires whitespace on either - side of the encoded words, I can see no reason to need to separate the - non-printable-non-whitespace from the printable runs if they occur, so we - parse this into xtext tokens separated by WSP tokens. - - Because an 'unstructured' value must by definition constitute the entire - value, this 'get' routine does not return a remaining value, only the - parsed TokenList. - - """ - # XXX: but what about bare CR and LF? They might signal the start or - # end of an encoded word. YAGNI for now, since out current parsers - # will never send us strings with bard CR or LF. - - unstructured = UnstructuredTokenList() - while value: - if value[0] in WSP: - token, value = get_fws(value) - unstructured.append(token) - continue - if value.startswith('=?'): - try: - token, value = get_encoded_word(value) - except errors.HeaderParseError: - pass - else: - have_ws = True - if len(unstructured) > 0: - if unstructured[-1].token_type != 'fws': - unstructured.defects.append(errors.InvalidHeaderDefect( - "missing whitespace before encoded word")) - have_ws = False - if have_ws and len(unstructured) > 1: - if unstructured[-2].token_type == 'encoded-word': - unstructured[-1] = EWWhiteSpaceTerminal( - unstructured[-1], 'fws') - unstructured.append(token) - continue - _3to2list7 = list(_wsp_splitter(value, 1)) - tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]] - vtext = ValueTerminal(tok, 'vtext') - _validate_xtext(vtext) - unstructured.append(vtext) - value = ''.join(remainder) - return unstructured - -def get_qp_ctext(value): - """ctext = <printable ascii except \ ( )> - - This is not the RFC ctext, since we are handling nested comments in comment - and unquoting quoted-pairs here. We allow anything except the '()' - characters, but if we find any ASCII other than the RFC defined printable - ASCII an NonPrintableDefect is added to the token's defects list. Since - quoted pairs are converted to their unquoted values, what is returned is - a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value - is ' '. - - """ - ptext, value, _ = _get_ptext_to_endchars(value, '()') - ptext = WhiteSpaceTerminal(ptext, 'ptext') - _validate_xtext(ptext) - return ptext, value - -def get_qcontent(value): - """qcontent = qtext / quoted-pair - - We allow anything except the DQUOTE character, but if we find any ASCII - other than the RFC defined printable ASCII an NonPrintableDefect is - added to the token's defects list. Any quoted pairs are converted to their - unquoted values, so what is returned is a 'ptext' token. In this case it - is a ValueTerminal. - - """ - ptext, value, _ = _get_ptext_to_endchars(value, '"') - ptext = ValueTerminal(ptext, 'ptext') - _validate_xtext(ptext) - return ptext, value - -def get_atext(value): - """atext = <matches _atext_matcher> - - We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to - the token's defects list if we find non-atext characters. - """ - m = _non_atom_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected atext but found '{}'".format(value)) - atext = m.group() - value = value[len(atext):] - atext = ValueTerminal(atext, 'atext') - _validate_xtext(atext) - return atext, value - -def get_bare_quoted_string(value): - """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE - - A quoted-string without the leading or trailing white space. Its - value is the text between the quote marks, with whitespace - preserved and quoted pairs decoded. - """ - if value[0] != '"': - raise errors.HeaderParseError( - "expected '\"' but found '{}'".format(value)) - bare_quoted_string = BareQuotedString() - value = value[1:] - while value and value[0] != '"': - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_qcontent(value) - bare_quoted_string.append(token) - if not value: - bare_quoted_string.defects.append(errors.InvalidHeaderDefect( - "end of header inside quoted string")) - return bare_quoted_string, value - return bare_quoted_string, value[1:] - -def get_comment(value): - """comment = "(" *([FWS] ccontent) [FWS] ")" - ccontent = ctext / quoted-pair / comment - - We handle nested comments here, and quoted-pair in our qp-ctext routine. - """ - if value and value[0] != '(': - raise errors.HeaderParseError( - "expected '(' but found '{}'".format(value)) - comment = Comment() - value = value[1:] - while value and value[0] != ")": - if value[0] in WSP: - token, value = get_fws(value) - elif value[0] == '(': - token, value = get_comment(value) - else: - token, value = get_qp_ctext(value) - comment.append(token) - if not value: - comment.defects.append(errors.InvalidHeaderDefect( - "end of header inside comment")) - return comment, value - return comment, value[1:] - -def get_cfws(value): - """CFWS = (1*([FWS] comment) [FWS]) / FWS - - """ - cfws = CFWSList() - while value and value[0] in CFWS_LEADER: - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_comment(value) - cfws.append(token) - return cfws, value - -def get_quoted_string(value): - """quoted-string = [CFWS] <bare-quoted-string> [CFWS] - - 'bare-quoted-string' is an intermediate class defined by this - parser and not by the RFC grammar. It is the quoted string - without any attached CFWS. - """ - quoted_string = QuotedString() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - quoted_string.append(token) - token, value = get_bare_quoted_string(value) - quoted_string.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - quoted_string.append(token) - return quoted_string, value - -def get_atom(value): - """atom = [CFWS] 1*atext [CFWS] - - """ - atom = Atom() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - atom.append(token) - if value and value[0] in ATOM_ENDS: - raise errors.HeaderParseError( - "expected atom but found '{}'".format(value)) - token, value = get_atext(value) - atom.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - atom.append(token) - return atom, value - -def get_dot_atom_text(value): - """ dot-text = 1*atext *("." 1*atext) - - """ - dot_atom_text = DotAtomText() - if not value or value[0] in ATOM_ENDS: - raise errors.HeaderParseError("expected atom at a start of " - "dot-atom-text but found '{}'".format(value)) - while value and value[0] not in ATOM_ENDS: - token, value = get_atext(value) - dot_atom_text.append(token) - if value and value[0] == '.': - dot_atom_text.append(DOT) - value = value[1:] - if dot_atom_text[-1] is DOT: - raise errors.HeaderParseError("expected atom at end of dot-atom-text " - "but found '{}'".format('.'+value)) - return dot_atom_text, value - -def get_dot_atom(value): - """ dot-atom = [CFWS] dot-atom-text [CFWS] - - """ - dot_atom = DotAtom() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - dot_atom.append(token) - token, value = get_dot_atom_text(value) - dot_atom.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - dot_atom.append(token) - return dot_atom, value - -def get_word(value): - """word = atom / quoted-string - - Either atom or quoted-string may start with CFWS. We have to peel off this - CFWS first to determine which type of word to parse. Afterward we splice - the leading CFWS, if any, into the parsed sub-token. - - If neither an atom or a quoted-string is found before the next special, a - HeaderParseError is raised. - - The token returned is either an Atom or a QuotedString, as appropriate. - This means the 'word' level of the formal grammar is not represented in the - parse tree; this is because having that extra layer when manipulating the - parse tree is more confusing than it is helpful. - - """ - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - else: - leader = None - if value[0]=='"': - token, value = get_quoted_string(value) - elif value[0] in SPECIALS: - raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' " - "but found '{}'".format(value)) - else: - token, value = get_atom(value) - if leader is not None: - token[:0] = [leader] - return token, value - -def get_phrase(value): - """ phrase = 1*word / obs-phrase - obs-phrase = word *(word / "." / CFWS) - - This means a phrase can be a sequence of words, periods, and CFWS in any - order as long as it starts with at least one word. If anything other than - words is detected, an ObsoleteHeaderDefect is added to the token's defect - list. We also accept a phrase that starts with CFWS followed by a dot; - this is registered as an InvalidHeaderDefect, since it is not supported by - even the obsolete grammar. - - """ - phrase = Phrase() - try: - token, value = get_word(value) - phrase.append(token) - except errors.HeaderParseError: - phrase.defects.append(errors.InvalidHeaderDefect( - "phrase does not start with word")) - while value and value[0] not in PHRASE_ENDS: - if value[0]=='.': - phrase.append(DOT) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "period in 'phrase'")) - value = value[1:] - else: - try: - token, value = get_word(value) - except errors.HeaderParseError: - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "comment found without atom")) - else: - raise - phrase.append(token) - return phrase, value - -def get_local_part(value): - """ local-part = dot-atom / quoted-string / obs-local-part - - """ - local_part = LocalPart() - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected local-part but found '{}'".format(value)) - try: - token, value = get_dot_atom(value) - except errors.HeaderParseError: - try: - token, value = get_word(value) - except errors.HeaderParseError: - if value[0] != '\\' and value[0] in PHRASE_ENDS: - raise - token = TokenList() - if leader is not None: - token[:0] = [leader] - local_part.append(token) - if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): - obs_local_part, value = get_obs_local_part(str(local_part) + value) - if obs_local_part.token_type == 'invalid-obs-local-part': - local_part.defects.append(errors.InvalidHeaderDefect( - "local-part is not dot-atom, quoted-string, or obs-local-part")) - else: - local_part.defects.append(errors.ObsoleteHeaderDefect( - "local-part is not a dot-atom (contains CFWS)")) - local_part[0] = obs_local_part - try: - local_part.value.encode('ascii') - except UnicodeEncodeError: - local_part.defects.append(errors.NonASCIILocalPartDefect( - "local-part contains non-ASCII characters)")) - return local_part, value - -def get_obs_local_part(value): - """ obs-local-part = word *("." word) - """ - obs_local_part = ObsLocalPart() - last_non_ws_was_dot = False - while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): - if value[0] == '.': - if last_non_ws_was_dot: - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "invalid repeated '.'")) - obs_local_part.append(DOT) - last_non_ws_was_dot = True - value = value[1:] - continue - elif value[0]=='\\': - obs_local_part.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "'\\' character outside of quoted-string/ccontent")) - last_non_ws_was_dot = False - continue - if obs_local_part and obs_local_part[-1].token_type != 'dot': - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "missing '.' between words")) - try: - token, value = get_word(value) - last_non_ws_was_dot = False - except errors.HeaderParseError: - if value[0] not in CFWS_LEADER: - raise - token, value = get_cfws(value) - obs_local_part.append(token) - if (obs_local_part[0].token_type == 'dot' or - obs_local_part[0].token_type=='cfws' and - obs_local_part[1].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid leading '.' in local part")) - if (obs_local_part[-1].token_type == 'dot' or - obs_local_part[-1].token_type=='cfws' and - obs_local_part[-2].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid trailing '.' in local part")) - if obs_local_part.defects: - obs_local_part.token_type = 'invalid-obs-local-part' - return obs_local_part, value - -def get_dtext(value): - """ dtext = <printable ascii except \ [ ]> / obs-dtext - obs-dtext = obs-NO-WS-CTL / quoted-pair - - We allow anything except the excluded characters, but if we find any - ASCII other than the RFC defined printable ASCII an NonPrintableDefect is - added to the token's defects list. Quoted pairs are converted to their - unquoted values, so what is returned is a ptext token, in this case a - ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is - added to the returned token's defect list. - - """ - ptext, value, had_qp = _get_ptext_to_endchars(value, '[]') - ptext = ValueTerminal(ptext, 'ptext') - if had_qp: - ptext.defects.append(errors.ObsoleteHeaderDefect( - "quoted printable found in domain-literal")) - _validate_xtext(ptext) - return ptext, value - -def _check_for_early_dl_end(value, domain_literal): - if value: - return False - domain_literal.append(errors.InvalidHeaderDefect( - "end of input inside domain-literal")) - domain_literal.append(ValueTerminal(']', 'domain-literal-end')) - return True - -def get_domain_literal(value): - """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] - - """ - domain_literal = DomainLiteral() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - domain_literal.append(token) - if not value: - raise errors.HeaderParseError("expected domain-literal") - if value[0] != '[': - raise errors.HeaderParseError("expected '[' at start of domain-literal " - "but found '{}'".format(value)) - value = value[1:] - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) - if value[0] in WSP: - token, value = get_fws(value) - domain_literal.append(token) - token, value = get_dtext(value) - domain_literal.append(token) - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - if value[0] in WSP: - token, value = get_fws(value) - domain_literal.append(token) - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - if value[0] != ']': - raise errors.HeaderParseError("expected ']' at end of domain-literal " - "but found '{}'".format(value)) - domain_literal.append(ValueTerminal(']', 'domain-literal-end')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - domain_literal.append(token) - return domain_literal, value - -def get_domain(value): - """ domain = dot-atom / domain-literal / obs-domain - obs-domain = atom *("." atom)) - - """ - domain = Domain() - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected domain but found '{}'".format(value)) - if value[0] == '[': - token, value = get_domain_literal(value) - if leader is not None: - token[:0] = [leader] - domain.append(token) - return domain, value - try: - token, value = get_dot_atom(value) - except errors.HeaderParseError: - token, value = get_atom(value) - if leader is not None: - token[:0] = [leader] - domain.append(token) - if value and value[0] == '.': - domain.defects.append(errors.ObsoleteHeaderDefect( - "domain is not a dot-atom (contains CFWS)")) - if domain[0].token_type == 'dot-atom': - domain[:] = domain[0] - while value and value[0] == '.': - domain.append(DOT) - token, value = get_atom(value[1:]) - domain.append(token) - return domain, value - -def get_addr_spec(value): - """ addr-spec = local-part "@" domain - - """ - addr_spec = AddrSpec() - token, value = get_local_part(value) - addr_spec.append(token) - if not value or value[0] != '@': - addr_spec.defects.append(errors.InvalidHeaderDefect( - "add-spec local part with no domain")) - return addr_spec, value - addr_spec.append(ValueTerminal('@', 'address-at-symbol')) - token, value = get_domain(value[1:]) - addr_spec.append(token) - return addr_spec, value - -def get_obs_route(value): - """ obs-route = obs-domain-list ":" - obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain]) - - Returns an obs-route token with the appropriate sub-tokens (that is, - there is no obs-domain-list in the parse tree). - """ - obs_route = ObsRoute() - while value and (value[0]==',' or value[0] in CFWS_LEADER): - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - obs_route.append(token) - elif value[0] == ',': - obs_route.append(ListSeparator) - value = value[1:] - if not value or value[0] != '@': - raise errors.HeaderParseError( - "expected obs-route domain but found '{}'".format(value)) - obs_route.append(RouteComponentMarker) - token, value = get_domain(value[1:]) - obs_route.append(token) - while value and value[0]==',': - obs_route.append(ListSeparator) - value = value[1:] - if not value: - break - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - obs_route.append(token) - if value[0] == '@': - obs_route.append(RouteComponentMarker) - token, value = get_domain(value[1:]) - obs_route.append(token) - if not value: - raise errors.HeaderParseError("end of header while parsing obs-route") - if value[0] != ':': - raise errors.HeaderParseError( "expected ':' marking end of " - "obs-route but found '{}'".format(value)) - obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker')) - return obs_route, value[1:] - -def get_angle_addr(value): - """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr - obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] - - """ - angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - angle_addr.append(token) - if not value or value[0] != '<': - raise errors.HeaderParseError( - "expected angle-addr but found '{}'".format(value)) - angle_addr.append(ValueTerminal('<', 'angle-addr-start')) - value = value[1:] - # Although it is not legal per RFC5322, SMTP uses '<>' in certain - # circumstances. - if value[0] == '>': - angle_addr.append(ValueTerminal('>', 'angle-addr-end')) - angle_addr.defects.append(errors.InvalidHeaderDefect( - "null addr-spec in angle-addr")) - value = value[1:] - return angle_addr, value - try: - token, value = get_addr_spec(value) - except errors.HeaderParseError: - try: - token, value = get_obs_route(value) - angle_addr.defects.append(errors.ObsoleteHeaderDefect( - "obsolete route specification in angle-addr")) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected addr-spec or obs-route but found '{}'".format(value)) - angle_addr.append(token) - token, value = get_addr_spec(value) - angle_addr.append(token) - if value and value[0] == '>': - value = value[1:] - else: - angle_addr.defects.append(errors.InvalidHeaderDefect( - "missing trailing '>' on angle-addr")) - angle_addr.append(ValueTerminal('>', 'angle-addr-end')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - angle_addr.append(token) - return angle_addr, value - -def get_display_name(value): - """ display-name = phrase - - Because this is simply a name-rule, we don't return a display-name - token containing a phrase, but rather a display-name token with - the content of the phrase. - - """ - display_name = DisplayName() - token, value = get_phrase(value) - display_name.extend(token[:]) - display_name.defects = token.defects[:] - return display_name, value - - -def get_name_addr(value): - """ name-addr = [display-name] angle-addr - - """ - name_addr = NameAddr() - # Both the optional display name and the angle-addr can start with cfws. - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(leader)) - if value[0] != '<': - if value[0] in PHRASE_ENDS: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(value)) - token, value = get_display_name(value) - if not value: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(token)) - if leader is not None: - token[0][:0] = [leader] - leader = None - name_addr.append(token) - token, value = get_angle_addr(value) - if leader is not None: - token[:0] = [leader] - name_addr.append(token) - return name_addr, value - -def get_mailbox(value): - """ mailbox = name-addr / addr-spec - - """ - # The only way to figure out if we are dealing with a name-addr or an - # addr-spec is to try parsing each one. - mailbox = Mailbox() - try: - token, value = get_name_addr(value) - except errors.HeaderParseError: - try: - token, value = get_addr_spec(value) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected mailbox but found '{}'".format(value)) - if any(isinstance(x, errors.InvalidHeaderDefect) - for x in token.all_defects): - mailbox.token_type = 'invalid-mailbox' - mailbox.append(token) - return mailbox, value - -def get_invalid_mailbox(value, endchars): - """ Read everything up to one of the chars in endchars. - - This is outside the formal grammar. The InvalidMailbox TokenList that is - returned acts like a Mailbox, but the data attributes are None. - - """ - invalid_mailbox = InvalidMailbox() - while value and value[0] not in endchars: - if value[0] in PHRASE_ENDS: - invalid_mailbox.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_mailbox.append(token) - return invalid_mailbox, value - -def get_mailbox_list(value): - """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) - - For this routine we go outside the formal grammar in order to improve error - handling. We recognize the end of the mailbox list only at the end of the - value or at a ';' (the group terminator). This is so that we can turn - invalid mailboxes into InvalidMailbox tokens and continue parsing any - remaining valid mailboxes. We also allow all mailbox entries to be null, - and this condition is handled appropriately at a higher level. - - """ - mailbox_list = MailboxList() - while value and value[0] != ';': - try: - token, value = get_mailbox(value) - mailbox_list.append(token) - except errors.HeaderParseError: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value or value[0] in ',;': - mailbox_list.append(leader) - mailbox_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in mailbox-list")) - else: - token, value = get_invalid_mailbox(value, ',;') - if leader is not None: - token[:0] = [leader] - mailbox_list.append(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - elif value[0] == ',': - mailbox_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in mailbox-list")) - else: - token, value = get_invalid_mailbox(value, ',;') - if leader is not None: - token[:0] = [leader] - mailbox_list.append(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - if value and value[0] not in ',;': - # Crap after mailbox; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = mailbox_list[-1] - mailbox.token_type = 'invalid-mailbox' - token, value = get_invalid_mailbox(value, ',;') - mailbox.extend(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - if value and value[0] == ',': - mailbox_list.append(ListSeparator) - value = value[1:] - return mailbox_list, value - - -def get_group_list(value): - """ group-list = mailbox-list / CFWS / obs-group-list - obs-group-list = 1*([CFWS] ",") [CFWS] - - """ - group_list = GroupList() - if not value: - group_list.defects.append(errors.InvalidHeaderDefect( - "end of header before group-list")) - return group_list, value - leader = None - if value and value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - # This should never happen in email parsing, since CFWS-only is a - # legal alternative to group-list in a group, which is the only - # place group-list appears. - group_list.defects.append(errors.InvalidHeaderDefect( - "end of header in group-list")) - group_list.append(leader) - return group_list, value - if value[0] == ';': - group_list.append(leader) - return group_list, value - token, value = get_mailbox_list(value) - if len(token.all_mailboxes)==0: - if leader is not None: - group_list.append(leader) - group_list.extend(token) - group_list.defects.append(errors.ObsoleteHeaderDefect( - "group-list with empty entries")) - return group_list, value - if leader is not None: - token[:0] = [leader] - group_list.append(token) - return group_list, value - -def get_group(value): - """ group = display-name ":" [group-list] ";" [CFWS] - - """ - group = Group() - token, value = get_display_name(value) - if not value or value[0] != ':': - raise errors.HeaderParseError("expected ':' at end of group " - "display name but found '{}'".format(value)) - group.append(token) - group.append(ValueTerminal(':', 'group-display-name-terminator')) - value = value[1:] - if value and value[0] == ';': - group.append(ValueTerminal(';', 'group-terminator')) - return group, value[1:] - token, value = get_group_list(value) - group.append(token) - if not value: - group.defects.append(errors.InvalidHeaderDefect( - "end of header in group")) - if value[0] != ';': - raise errors.HeaderParseError( - "expected ';' at end of group but found {}".format(value)) - group.append(ValueTerminal(';', 'group-terminator')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - group.append(token) - return group, value - -def get_address(value): - """ address = mailbox / group - - Note that counter-intuitively, an address can be either a single address or - a list of addresses (a group). This is why the returned Address object has - a 'mailboxes' attribute which treats a single address as a list of length - one. When you need to differentiate between to two cases, extract the single - element, which is either a mailbox or a group token. - - """ - # The formal grammar isn't very helpful when parsing an address. mailbox - # and group, especially when allowing for obsolete forms, start off very - # similarly. It is only when you reach one of @, <, or : that you know - # what you've got. So, we try each one in turn, starting with the more - # likely of the two. We could perhaps make this more efficient by looking - # for a phrase and then branching based on the next character, but that - # would be a premature optimization. - address = Address() - try: - token, value = get_group(value) - except errors.HeaderParseError: - try: - token, value = get_mailbox(value) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected address but found '{}'".format(value)) - address.append(token) - return address, value - -def get_address_list(value): - """ address_list = (address *("," address)) / obs-addr-list - obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) - - We depart from the formal grammar here by continuing to parse until the end - of the input, assuming the input to be entirely composed of an - address-list. This is always true in email parsing, and allows us - to skip invalid addresses to parse additional valid ones. - - """ - address_list = AddressList() - while value: - try: - token, value = get_address(value) - address_list.append(token) - except errors.HeaderParseError as err: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value or value[0] == ',': - address_list.append(leader) - address_list.defects.append(errors.ObsoleteHeaderDefect( - "address-list entry with no content")) - else: - token, value = get_invalid_mailbox(value, ',') - if leader is not None: - token[:0] = [leader] - address_list.append(Address([token])) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - elif value[0] == ',': - address_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in address-list")) - else: - token, value = get_invalid_mailbox(value, ',') - if leader is not None: - token[:0] = [leader] - address_list.append(Address([token])) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - if value and value[0] != ',': - # Crap after address; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = address_list[-1][0] - mailbox.token_type = 'invalid-mailbox' - token, value = get_invalid_mailbox(value, ',') - mailbox.extend(token) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) - value = value[1:] - return address_list, value - -# -# XXX: As I begin to add additional header parsers, I'm realizing we probably -# have two level of parser routines: the get_XXX methods that get a token in -# the grammar, and parse_XXX methods that parse an entire field value. So -# get_address_list above should really be a parse_ method, as probably should -# be get_unstructured. -# - -def parse_mime_version(value): - """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS] - - """ - # The [CFWS] is implicit in the RFC 2045 BNF. - # XXX: This routine is a bit verbose, should factor out a get_int method. - mime_version = MIMEVersion() - if not value: - mime_version.defects.append(errors.HeaderMissingRequiredValue( - "Missing MIME version number (eg: 1.0)")) - return mime_version - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value: - mime_version.defects.append(errors.HeaderMissingRequiredValue( - "Expected MIME version number but found only CFWS")) - digits = '' - while value and value[0] != '.' and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] - if not digits.isdigit(): - mime_version.defects.append(errors.InvalidHeaderDefect( - "Expected MIME major version number but found {!r}".format(digits))) - mime_version.append(ValueTerminal(digits, 'xtext')) - else: - mime_version.major = int(digits) - mime_version.append(ValueTerminal(digits, 'digits')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value or value[0] != '.': - if mime_version.major is not None: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Incomplete MIME version; found only major number")) - if value: - mime_version.append(ValueTerminal(value, 'xtext')) - return mime_version - mime_version.append(ValueTerminal('.', 'version-separator')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value: - if mime_version.major is not None: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Incomplete MIME version; found only major number")) - return mime_version - digits = '' - while value and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] - if not digits.isdigit(): - mime_version.defects.append(errors.InvalidHeaderDefect( - "Expected MIME minor version number but found {!r}".format(digits))) - mime_version.append(ValueTerminal(digits, 'xtext')) - else: - mime_version.minor = int(digits) - mime_version.append(ValueTerminal(digits, 'digits')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if value: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Excess non-CFWS text after MIME version")) - mime_version.append(ValueTerminal(value, 'xtext')) - return mime_version - -def get_invalid_parameter(value): - """ Read everything up to the next ';'. - - This is outside the formal grammar. The InvalidParameter TokenList that is - returned acts like a Parameter, but the data attributes are None. - - """ - invalid_parameter = InvalidParameter() - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - invalid_parameter.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_parameter.append(token) - return invalid_parameter, value - -def get_ttext(value): - """ttext = <matches _ttext_matcher> - - We allow any non-TOKEN_ENDS in ttext, but add defects to the token's - defects list if we find non-ttext characters. We also register defects for - *any* non-printables even though the RFC doesn't exclude all of them, - because we follow the spirit of RFC 5322. - - """ - m = _non_token_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected ttext but found '{}'".format(value)) - ttext = m.group() - value = value[len(ttext):] - ttext = ValueTerminal(ttext, 'ttext') - _validate_xtext(ttext) - return ttext, value - -def get_token(value): - """token = [CFWS] 1*ttext [CFWS] - - The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or - tspecials. We also exclude tabs even though the RFC doesn't. - - The RFC implies the CFWS but is not explicit about it in the BNF. - - """ - mtoken = Token() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mtoken.append(token) - if value and value[0] in TOKEN_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_ttext(value) - mtoken.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mtoken.append(token) - return mtoken, value - -def get_attrtext(value): - """attrtext = 1*(any non-ATTRIBUTE_ENDS character) - - We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the - token's defects list if we find non-attrtext characters. We also register - defects for *any* non-printables even though the RFC doesn't exclude all of - them, because we follow the spirit of RFC 5322. - - """ - m = _non_attribute_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected attrtext but found {!r}".format(value)) - attrtext = m.group() - value = value[len(attrtext):] - attrtext = ValueTerminal(attrtext, 'attrtext') - _validate_xtext(attrtext) - return attrtext, value - -def get_attribute(value): - """ [CFWS] 1*attrtext [CFWS] - - This version of the BNF makes the CFWS explicit, and as usual we use a - value terminal for the actual run of characters. The RFC equivalent of - attrtext is the token characters, with the subtraction of '*', "'", and '%'. - We include tab in the excluded set just as we do for token. - - """ - attribute = Attribute() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - if value and value[0] in ATTRIBUTE_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_attrtext(value) - attribute.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - return attribute, value - -def get_extended_attrtext(value): - """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') - - This is a special parsing routine so that we get a value that - includes % escapes as a single string (which we decode as a single - string later). - - """ - m = _non_extended_attribute_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected extended attrtext but found {!r}".format(value)) - attrtext = m.group() - value = value[len(attrtext):] - attrtext = ValueTerminal(attrtext, 'extended-attrtext') - _validate_xtext(attrtext) - return attrtext, value - -def get_extended_attribute(value): - """ [CFWS] 1*extended_attrtext [CFWS] - - This is like the non-extended version except we allow % characters, so that - we can pick up an encoded value as a single string. - - """ - # XXX: should we have an ExtendedAttribute TokenList? - attribute = Attribute() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - if value and value[0] in EXTENDED_ATTRIBUTE_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_extended_attrtext(value) - attribute.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - return attribute, value - -def get_section(value): - """ '*' digits - - The formal BNF is more complicated because leading 0s are not allowed. We - check for that and add a defect. We also assume no CFWS is allowed between - the '*' and the digits, though the RFC is not crystal clear on that. - The caller should already have dealt with leading CFWS. - - """ - section = Section() - if not value or value[0] != '*': - raise errors.HeaderParseError("Expected section but found {}".format( - value)) - section.append(ValueTerminal('*', 'section-marker')) - value = value[1:] - if not value or not value[0].isdigit(): - raise errors.HeaderParseError("Expected section number but " - "found {}".format(value)) - digits = '' - while value and value[0].isdigit(): - digits += value[0] - value = value[1:] - if digits[0] == '0' and digits != '0': - section.defects.append(errors.InvalidHeaderError("section number" - "has an invalid leading 0")) - section.number = int(digits) - section.append(ValueTerminal(digits, 'digits')) - return section, value - - -def get_value(value): - """ quoted-string / attribute - - """ - v = Value() - if not value: - raise errors.HeaderParseError("Expected value but found end of string") - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError("Expected value but found " - "only {}".format(leader)) - if value[0] == '"': - token, value = get_quoted_string(value) - else: - token, value = get_extended_attribute(value) - if leader is not None: - token[:0] = [leader] - v.append(token) - return v, value - -def get_parameter(value): - """ attribute [section] ["*"] [CFWS] "=" value - - The CFWS is implied by the RFC but not made explicit in the BNF. This - simplified form of the BNF from the RFC is made to conform with the RFC BNF - through some extra checks. We do it this way because it makes both error - recovery and working with the resulting parse tree easier. - """ - # It is possible CFWS would also be implicitly allowed between the section - # and the 'extended-attribute' marker (the '*') , but we've never seen that - # in the wild and we will therefore ignore the possibility. - param = Parameter() - token, value = get_attribute(value) - param.append(token) - if not value or value[0] == ';': - param.defects.append(errors.InvalidHeaderDefect("Parameter contains " - "name ({}) but no value".format(token))) - return param, value - if value[0] == '*': - try: - token, value = get_section(value) - param.sectioned = True - param.append(token) - except errors.HeaderParseError: - pass - if not value: - raise errors.HeaderParseError("Incomplete parameter") - if value[0] == '*': - param.append(ValueTerminal('*', 'extended-parameter-marker')) - value = value[1:] - param.extended = True - if value[0] != '=': - raise errors.HeaderParseError("Parameter not followed by '='") - param.append(ValueTerminal('=', 'parameter-separator')) - value = value[1:] - leader = None - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - param.append(token) - remainder = None - appendto = param - if param.extended and value and value[0] == '"': - # Now for some serious hackery to handle the common invalid case of - # double quotes around an extended value. We also accept (with defect) - # a value marked as encoded that isn't really. - qstring, remainder = get_quoted_string(value) - inner_value = qstring.stripped_value - semi_valid = False - if param.section_number == 0: - if inner_value and inner_value[0] == "'": - semi_valid = True - else: - token, rest = get_attrtext(inner_value) - if rest and rest[0] == "'": - semi_valid = True - else: - try: - token, rest = get_extended_attrtext(inner_value) - except: - pass - else: - if not rest: - semi_valid = True - if semi_valid: - param.defects.append(errors.InvalidHeaderDefect( - "Quoted string value for extended parameter is invalid")) - param.append(qstring) - for t in qstring: - if t.token_type == 'bare-quoted-string': - t[:] = [] - appendto = t - break - value = inner_value - else: - remainder = None - param.defects.append(errors.InvalidHeaderDefect( - "Parameter marked as extended but appears to have a " - "quoted string value that is non-encoded")) - if value and value[0] == "'": - token = None - else: - token, value = get_value(value) - if not param.extended or param.section_number > 0: - if not value or value[0] != "'": - appendto.append(token) - if remainder is not None: - assert not value, value - value = remainder - return param, value - param.defects.append(errors.InvalidHeaderDefect( - "Apparent initial-extended-value but attribute " - "was not marked as extended or was not initial section")) - if not value: - # Assume the charset/lang is missing and the token is the value. - param.defects.append(errors.InvalidHeaderDefect( - "Missing required charset/lang delimiters")) - appendto.append(token) - if remainder is None: - return param, value - else: - if token is not None: - for t in token: - if t.token_type == 'extended-attrtext': - break - t.token_type == 'attrtext' - appendto.append(t) - param.charset = t.value - if value[0] != "'": - raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " - "delimiter, but found {!r}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) - value = value[1:] - if value and value[0] != "'": - token, value = get_attrtext(value) - appendto.append(token) - param.lang = token.value - if not value or value[0] != "'": - raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " - "delimiter, but found {}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) - value = value[1:] - if remainder is not None: - # Treat the rest of value as bare quoted string content. - v = Value() - while value: - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_qcontent(value) - v.append(token) - token = v - else: - token, value = get_value(value) - appendto.append(token) - if remainder is not None: - assert not value, value - value = remainder - return param, value - -def parse_mime_parameters(value): - """ parameter *( ";" parameter ) - - That BNF is meant to indicate this routine should only be called after - finding and handling the leading ';'. There is no corresponding rule in - the formal RFC grammar, but it is more convenient for us for the set of - parameters to be treated as its own TokenList. - - This is 'parse' routine because it consumes the reminaing value, but it - would never be called to parse a full header. Instead it is called to - parse everything after the non-parameter value of a specific MIME header. - - """ - mime_parameters = MimeParameters() - while value: - try: - token, value = get_parameter(value) - mime_parameters.append(token) - except errors.HeaderParseError as err: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - mime_parameters.append(leader) - return mime_parameters - if value[0] == ';': - if leader is not None: - mime_parameters.append(leader) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "parameter entry with no content")) - else: - token, value = get_invalid_parameter(value) - if leader: - token[:0] = [leader] - mime_parameters.append(token) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "invalid parameter {!r}".format(token))) - if value and value[0] != ';': - # Junk after the otherwise valid parameter. Mark it as - # invalid, but it will have a value. - param = mime_parameters[-1] - param.token_type = 'invalid-parameter' - token, value = get_invalid_parameter(value) - param.extend(token) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "parameter with invalid trailing text {!r}".format(token))) - if value: - # Must be a ';' at this point. - mime_parameters.append(ValueTerminal(';', 'parameter-separator')) - value = value[1:] - return mime_parameters - -def _find_mime_parameters(tokenlist, value): - """Do our best to find the parameters in an invalid MIME header - - """ - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - tokenlist.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - tokenlist.append(token) - if not value: - return - tokenlist.append(ValueTerminal(';', 'parameter-separator')) - tokenlist.append(parse_mime_parameters(value[1:])) - -def parse_content_type_header(value): - """ maintype "/" subtype *( ";" parameter ) - - The maintype and substype are tokens. Theoretically they could - be checked against the official IANA list + x-token, but we - don't do that. - """ - ctype = ContentType() - recover = False - if not value: - ctype.defects.append(errors.HeaderMissingRequiredValue( - "Missing content type specification")) - return ctype - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content maintype but found {!r}".format(value))) - _find_mime_parameters(ctype, value) - return ctype - ctype.append(token) - # XXX: If we really want to follow the formal grammer we should make - # mantype and subtype specialized TokenLists here. Probably not worth it. - if not value or value[0] != '/': - ctype.defects.append(errors.InvalidHeaderDefect( - "Invalid content type")) - if value: - _find_mime_parameters(ctype, value) - return ctype - ctype.maintype = token.value.strip().lower() - ctype.append(ValueTerminal('/', 'content-type-separator')) - value = value[1:] - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content subtype but found {!r}".format(value))) - _find_mime_parameters(ctype, value) - return ctype - ctype.append(token) - ctype.subtype = token.value.strip().lower() - if not value: - return ctype - if value[0] != ';': - ctype.defects.append(errors.InvalidHeaderDefect( - "Only parameters are valid after content type, but " - "found {!r}".format(value))) - # The RFC requires that a syntactically invalid content-type be treated - # as text/plain. Perhaps we should postel this, but we should probably - # only do that if we were checking the subtype value against IANA. - del ctype.maintype, ctype.subtype - _find_mime_parameters(ctype, value) - return ctype - ctype.append(ValueTerminal(';', 'parameter-separator')) - ctype.append(parse_mime_parameters(value[1:])) - return ctype - -def parse_content_disposition_header(value): - """ disposition-type *( ";" parameter ) - - """ - disp_header = ContentDisposition() - if not value: - disp_header.defects.append(errors.HeaderMissingRequiredValue( - "Missing content disposition")) - return disp_header - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content disposition but found {!r}".format(value))) - _find_mime_parameters(disp_header, value) - return disp_header - disp_header.append(token) - disp_header.content_disposition = token.value.strip().lower() - if not value: - return disp_header - if value[0] != ';': - disp_header.defects.append(errors.InvalidHeaderDefect( - "Only parameters are valid after content disposition, but " - "found {!r}".format(value))) - _find_mime_parameters(disp_header, value) - return disp_header - disp_header.append(ValueTerminal(';', 'parameter-separator')) - disp_header.append(parse_mime_parameters(value[1:])) - return disp_header - -def parse_content_transfer_encoding_header(value): - """ mechanism - - """ - # We should probably validate the values, since the list is fixed. - cte_header = ContentTransferEncoding() - if not value: - cte_header.defects.append(errors.HeaderMissingRequiredValue( - "Missing content transfer encoding")) - return cte_header - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content trnasfer encoding but found {!r}".format(value))) - else: - cte_header.append(token) - cte_header.cte = token.value.strip().lower() - if not value: - return cte_header - while value: - cte_header.defects.append(errors.InvalidHeaderDefect( - "Extra text after content transfer encoding")) - if value[0] in PHRASE_ENDS: - cte_header.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - cte_header.append(token) - return cte_header diff --git a/contrib/python/future/future/backports/email/_parseaddr.py b/contrib/python/future/future/backports/email/_parseaddr.py deleted file mode 100644 index 5b50cc6bd1d..00000000000 --- a/contrib/python/future/future/backports/email/_parseaddr.py +++ /dev/null @@ -1,546 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Contact: [email protected] - -"""Email address parsing code. - -Lifted directly from rfc822.py. This should eventually be rewritten. -""" - -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import int - -__all__ = [ - 'mktime_tz', - 'parsedate', - 'parsedate_tz', - 'quote', - ] - -import time, calendar - -SPACE = ' ' -EMPTYSTRING = '' -COMMASPACE = ', ' - -# Parse a date field -_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', - 'aug', 'sep', 'oct', 'nov', 'dec', - 'january', 'february', 'march', 'april', 'may', 'june', 'july', - 'august', 'september', 'october', 'november', 'december'] - -_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - -# The timezone table does not include the military time zones defined -# in RFC822, other than Z. According to RFC1123, the description in -# RFC822 gets the signs wrong, so we can't rely on any such time -# zones. RFC1123 recommends that numeric timezone indicators be used -# instead of timezone names. - -_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, - 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) - 'EST': -500, 'EDT': -400, # Eastern - 'CST': -600, 'CDT': -500, # Central - 'MST': -700, 'MDT': -600, # Mountain - 'PST': -800, 'PDT': -700 # Pacific - } - - -def parsedate_tz(data): - """Convert a date string to a time tuple. - - Accounts for military timezones. - """ - res = _parsedate_tz(data) - if not res: - return - if res[9] is None: - res[9] = 0 - return tuple(res) - -def _parsedate_tz(data): - """Convert date to extended time tuple. - - The last (additional) element is the time zone offset in seconds, except if - the timezone was specified as -0000. In that case the last element is - None. This indicates a UTC timestamp that explicitly declaims knowledge of - the source timezone, as opposed to a +0000 timestamp that indicates the - source timezone really was UTC. - - """ - if not data: - return - data = data.split() - # The FWS after the comma after the day-of-week is optional, so search and - # adjust for this. - if data[0].endswith(',') or data[0].lower() in _daynames: - # There's a dayname here. Skip it - del data[0] - else: - i = data[0].rfind(',') - if i >= 0: - data[0] = data[0][i+1:] - if len(data) == 3: # RFC 850 date, deprecated - stuff = data[0].split('-') - if len(stuff) == 3: - data = stuff + data[1:] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i == -1: - i = s.find('-') - if i > 0: - data[3:] = [s[:i], s[i:]] - else: - data.append('') # Dummy tz - if len(data) < 5: - return None - data = data[:5] - [dd, mm, yy, tm, tz] = data - mm = mm.lower() - if mm not in _monthnames: - dd, mm = mm, dd.lower() - if mm not in _monthnames: - return None - mm = _monthnames.index(mm) + 1 - if mm > 12: - mm -= 12 - if dd[-1] == ',': - dd = dd[:-1] - i = yy.find(':') - if i > 0: - yy, tm = tm, yy - if yy[-1] == ',': - yy = yy[:-1] - if not yy[0].isdigit(): - yy, tz = tz, yy - if tm[-1] == ',': - tm = tm[:-1] - tm = tm.split(':') - if len(tm) == 2: - [thh, tmm] = tm - tss = '0' - elif len(tm) == 3: - [thh, tmm, tss] = tm - elif len(tm) == 1 and '.' in tm[0]: - # Some non-compliant MUAs use '.' to separate time elements. - tm = tm[0].split('.') - if len(tm) == 2: - [thh, tmm] = tm - tss = 0 - elif len(tm) == 3: - [thh, tmm, tss] = tm - else: - return None - try: - yy = int(yy) - dd = int(dd) - thh = int(thh) - tmm = int(tmm) - tss = int(tss) - except ValueError: - return None - # Check for a yy specified in two-digit format, then convert it to the - # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for - # the time module. - if yy < 100: - # The year is between 1969 and 1999 (inclusive). - if yy > 68: - yy += 1900 - # The year is between 2000 and 2068 (inclusive). - else: - yy += 2000 - tzoffset = None - tz = tz.upper() - if tz in _timezones: - tzoffset = _timezones[tz] - else: - try: - tzoffset = int(tz) - except ValueError: - pass - if tzoffset==0 and tz.startswith('-'): - tzoffset = None - # Convert a timezone offset into seconds ; -0500 -> -18000 - if tzoffset: - if tzoffset < 0: - tzsign = -1 - tzoffset = -tzoffset - else: - tzsign = 1 - tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) - # Daylight Saving Time flag is set to -1, since DST is unknown. - return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] - - -def parsedate(data): - """Convert a time string to a time tuple.""" - t = parsedate_tz(data) - if isinstance(t, tuple): - return t[:9] - else: - return t - - -def mktime_tz(data): - """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" - if data[9] is None: - # No zone info, so localtime is better assumption than GMT - return time.mktime(data[:8] + (-1,)) - else: - t = calendar.timegm(data) - return t - data[9] - - -def quote(str): - """Prepare string to be used in a quoted string. - - Turns backslash and double quote characters into quoted pairs. These - are the only characters that need to be quoted inside a quoted string. - Does not add the surrounding double quotes. - """ - return str.replace('\\', '\\\\').replace('"', '\\"') - - -class AddrlistClass(object): - """Address parser class by Ben Escoto. - - To understand what this class does, it helps to have a copy of RFC 2822 in - front of you. - - Note: this class interface is deprecated and may be removed in the future. - Use email.utils.AddressList instead. - """ - - def __init__(self, field): - """Initialize a new instance. - - `field' is an unparsed address header field, containing - one or more addresses. - """ - self.specials = '()<>@,:;.\"[]' - self.pos = 0 - self.LWS = ' \t' - self.CR = '\r\n' - self.FWS = self.LWS + self.CR - self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. - self.phraseends = self.atomends.replace('.', '') - self.field = field - self.commentlist = [] - - def gotonext(self): - """Skip white space and extract comments.""" - wslist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS + '\n\r': - if self.field[self.pos] not in '\n\r': - wslist.append(self.field[self.pos]) - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - else: - break - return EMPTYSTRING.join(wslist) - - def getaddrlist(self): - """Parse all addresses. - - Returns a list containing all of the addresses. - """ - result = [] - while self.pos < len(self.field): - ad = self.getaddress() - if ad: - result += ad - else: - result.append(('', '')) - return result - - def getaddress(self): - """Parse the next address.""" - self.commentlist = [] - self.gotonext() - - oldpos = self.pos - oldcl = self.commentlist - plist = self.getphraselist() - - self.gotonext() - returnlist = [] - - if self.pos >= len(self.field): - # Bad email address technically, no domain. - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - - elif self.field[self.pos] in '.@': - # email address is just an addrspec - # this isn't very efficient since we start over - self.pos = oldpos - self.commentlist = oldcl - addrspec = self.getaddrspec() - returnlist = [(SPACE.join(self.commentlist), addrspec)] - - elif self.field[self.pos] == ':': - # address is a group - returnlist = [] - - fieldlen = len(self.field) - self.pos += 1 - while self.pos < len(self.field): - self.gotonext() - if self.pos < fieldlen and self.field[self.pos] == ';': - self.pos += 1 - break - returnlist = returnlist + self.getaddress() - - elif self.field[self.pos] == '<': - # Address is a phrase then a route addr - routeaddr = self.getrouteaddr() - - if self.commentlist: - returnlist = [(SPACE.join(plist) + ' (' + - ' '.join(self.commentlist) + ')', routeaddr)] - else: - returnlist = [(SPACE.join(plist), routeaddr)] - - else: - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - elif self.field[self.pos] in self.specials: - self.pos += 1 - - self.gotonext() - if self.pos < len(self.field) and self.field[self.pos] == ',': - self.pos += 1 - return returnlist - - def getrouteaddr(self): - """Parse a route address (Return-path value). - - This method just skips all the route stuff and returns the addrspec. - """ - if self.field[self.pos] != '<': - return - - expectroute = False - self.pos += 1 - self.gotonext() - adlist = '' - while self.pos < len(self.field): - if expectroute: - self.getdomain() - expectroute = False - elif self.field[self.pos] == '>': - self.pos += 1 - break - elif self.field[self.pos] == '@': - self.pos += 1 - expectroute = True - elif self.field[self.pos] == ':': - self.pos += 1 - else: - adlist = self.getaddrspec() - self.pos += 1 - break - self.gotonext() - - return adlist - - def getaddrspec(self): - """Parse an RFC 2822 addr-spec.""" - aslist = [] - - self.gotonext() - while self.pos < len(self.field): - preserve_ws = True - if self.field[self.pos] == '.': - if aslist and not aslist[-1].strip(): - aslist.pop() - aslist.append('.') - self.pos += 1 - preserve_ws = False - elif self.field[self.pos] == '"': - aslist.append('"%s"' % quote(self.getquote())) - elif self.field[self.pos] in self.atomends: - if aslist and not aslist[-1].strip(): - aslist.pop() - break - else: - aslist.append(self.getatom()) - ws = self.gotonext() - if preserve_ws and ws: - aslist.append(ws) - - if self.pos >= len(self.field) or self.field[self.pos] != '@': - return EMPTYSTRING.join(aslist) - - aslist.append('@') - self.pos += 1 - self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() - - def getdomain(self): - """Get the complete domain name from an address.""" - sdlist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] == '[': - sdlist.append(self.getdomainliteral()) - elif self.field[self.pos] == '.': - self.pos += 1 - sdlist.append('.') - elif self.field[self.pos] in self.atomends: - break - else: - sdlist.append(self.getatom()) - return EMPTYSTRING.join(sdlist) - - def getdelimited(self, beginchar, endchars, allowcomments=True): - """Parse a header fragment delimited by special characters. - - `beginchar' is the start character for the fragment. - If self is not looking at an instance of `beginchar' then - getdelimited returns the empty string. - - `endchars' is a sequence of allowable end-delimiting characters. - Parsing stops when one of these is encountered. - - If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed - within the parsed fragment. - """ - if self.field[self.pos] != beginchar: - return '' - - slist = [''] - quote = False - self.pos += 1 - while self.pos < len(self.field): - if quote: - slist.append(self.field[self.pos]) - quote = False - elif self.field[self.pos] in endchars: - self.pos += 1 - break - elif allowcomments and self.field[self.pos] == '(': - slist.append(self.getcomment()) - continue # have already advanced pos from getcomment - elif self.field[self.pos] == '\\': - quote = True - else: - slist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(slist) - - def getquote(self): - """Get a quote-delimited fragment from self's field.""" - return self.getdelimited('"', '"\r', False) - - def getcomment(self): - """Get a parenthesis-delimited fragment from self's field.""" - return self.getdelimited('(', ')\r', True) - - def getdomainliteral(self): - """Parse an RFC 2822 domain-literal.""" - return '[%s]' % self.getdelimited('[', ']\r', False) - - def getatom(self, atomends=None): - """Parse an RFC 2822 atom. - - Optional atomends specifies a different set of end token delimiters - (the default is to use self.atomends). This is used e.g. in - getphraselist() since phrase endings must not include the `.' (which - is legal in phrases).""" - atomlist = [''] - if atomends is None: - atomends = self.atomends - - while self.pos < len(self.field): - if self.field[self.pos] in atomends: - break - else: - atomlist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(atomlist) - - def getphraselist(self): - """Parse a sequence of RFC 2822 phrases. - - A phrase is a sequence of words, which are in turn either RFC 2822 - atoms or quoted-strings. Phrases are canonicalized by squeezing all - runs of continuous whitespace into one space. - """ - plist = [] - - while self.pos < len(self.field): - if self.field[self.pos] in self.FWS: - self.pos += 1 - elif self.field[self.pos] == '"': - plist.append(self.getquote()) - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] in self.phraseends: - break - else: - plist.append(self.getatom(self.phraseends)) - - return plist - -class AddressList(AddrlistClass): - """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" - def __init__(self, field): - AddrlistClass.__init__(self, field) - if field: - self.addresslist = self.getaddrlist() - else: - self.addresslist = [] - - def __len__(self): - return len(self.addresslist) - - def __add__(self, other): - # Set union - newaddr = AddressList(None) - newaddr.addresslist = self.addresslist[:] - for x in other.addresslist: - if not x in self.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __iadd__(self, other): - # Set union, in-place - for x in other.addresslist: - if not x in self.addresslist: - self.addresslist.append(x) - return self - - def __sub__(self, other): - # Set difference - newaddr = AddressList(None) - for x in self.addresslist: - if not x in other.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __isub__(self, other): - # Set difference, in-place - for x in other.addresslist: - if x in self.addresslist: - self.addresslist.remove(x) - return self - - def __getitem__(self, index): - # Make indexing, slices, and 'in' work - return self.addresslist[index] diff --git a/contrib/python/future/future/backports/email/_policybase.py b/contrib/python/future/future/backports/email/_policybase.py deleted file mode 100644 index c66aea90021..00000000000 --- a/contrib/python/future/future/backports/email/_policybase.py +++ /dev/null @@ -1,365 +0,0 @@ -"""Policy framework for the email package. - -Allows fine grained feature control of how the package parses and emits data. -""" -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import super -from future.builtins import str -from future.utils import with_metaclass - -import abc -from future.backports.email import header -from future.backports.email import charset as _charset -from future.backports.email.utils import _has_surrogates - -__all__ = [ - 'Policy', - 'Compat32', - 'compat32', - ] - - -class _PolicyBase(object): - - """Policy Object basic framework. - - This class is useless unless subclassed. A subclass should define - class attributes with defaults for any values that are to be - managed by the Policy object. The constructor will then allow - non-default values to be set for these attributes at instance - creation time. The instance will be callable, taking these same - attributes keyword arguments, and returning a new instance - identical to the called instance except for those values changed - by the keyword arguments. Instances may be added, yielding new - instances with any non-default values from the right hand - operand overriding those in the left hand operand. That is, - - A + B == A(<non-default values of B>) - - The repr of an instance can be used to reconstruct the object - if and only if the repr of the values can be used to reconstruct - those values. - - """ - - def __init__(self, **kw): - """Create new Policy, possibly overriding some defaults. - - See class docstring for a list of overridable attributes. - - """ - for name, value in kw.items(): - if hasattr(self, name): - super(_PolicyBase,self).__setattr__(name, value) - else: - raise TypeError( - "{!r} is an invalid keyword argument for {}".format( - name, self.__class__.__name__)) - - def __repr__(self): - args = [ "{}={!r}".format(name, value) - for name, value in self.__dict__.items() ] - return "{}({})".format(self.__class__.__name__, ', '.join(args)) - - def clone(self, **kw): - """Return a new instance with specified attributes changed. - - The new instance has the same attribute values as the current object, - except for the changes passed in as keyword arguments. - - """ - newpolicy = self.__class__.__new__(self.__class__) - for attr, value in self.__dict__.items(): - object.__setattr__(newpolicy, attr, value) - for attr, value in kw.items(): - if not hasattr(self, attr): - raise TypeError( - "{!r} is an invalid keyword argument for {}".format( - attr, self.__class__.__name__)) - object.__setattr__(newpolicy, attr, value) - return newpolicy - - def __setattr__(self, name, value): - if hasattr(self, name): - msg = "{!r} object attribute {!r} is read-only" - else: - msg = "{!r} object has no attribute {!r}" - raise AttributeError(msg.format(self.__class__.__name__, name)) - - def __add__(self, other): - """Non-default values from right operand override those from left. - - The object returned is a new instance of the subclass. - - """ - return self.clone(**other.__dict__) - - -def _append_doc(doc, added_doc): - doc = doc.rsplit('\n', 1)[0] - added_doc = added_doc.split('\n', 1)[1] - return doc + '\n' + added_doc - -def _extend_docstrings(cls): - if cls.__doc__ and cls.__doc__.startswith('+'): - cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__) - for name, attr in cls.__dict__.items(): - if attr.__doc__ and attr.__doc__.startswith('+'): - for c in (c for base in cls.__bases__ for c in base.mro()): - doc = getattr(getattr(c, name), '__doc__') - if doc: - attr.__doc__ = _append_doc(doc, attr.__doc__) - break - return cls - - -class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)): - - r"""Controls for how messages are interpreted and formatted. - - Most of the classes and many of the methods in the email package accept - Policy objects as parameters. A Policy object contains a set of values and - functions that control how input is interpreted and how output is rendered. - For example, the parameter 'raise_on_defect' controls whether or not an RFC - violation results in an error being raised or not, while 'max_line_length' - controls the maximum length of output lines when a Message is serialized. - - Any valid attribute may be overridden when a Policy is created by passing - it as a keyword argument to the constructor. Policy objects are immutable, - but a new Policy object can be created with only certain values changed by - calling the Policy instance with keyword arguments. Policy objects can - also be added, producing a new Policy object in which the non-default - attributes set in the right hand operand overwrite those specified in the - left operand. - - Settable attributes: - - raise_on_defect -- If true, then defects should be raised as errors. - Default: False. - - linesep -- string containing the value to use as separation - between output lines. Default '\n'. - - cte_type -- Type of allowed content transfer encodings - - 7bit -- ASCII only - 8bit -- Content-Transfer-Encoding: 8bit is allowed - - Default: 8bit. Also controls the disposition of - (RFC invalid) binary data in headers; see the - documentation of the binary_fold method. - - max_line_length -- maximum length of lines, excluding 'linesep', - during serialization. None or 0 means no line - wrapping is done. Default is 78. - - """ - - raise_on_defect = False - linesep = '\n' - cte_type = '8bit' - max_line_length = 78 - - def handle_defect(self, obj, defect): - """Based on policy, either raise defect or call register_defect. - - handle_defect(obj, defect) - - defect should be a Defect subclass, but in any case must be an - Exception subclass. obj is the object on which the defect should be - registered if it is not raised. If the raise_on_defect is True, the - defect is raised as an error, otherwise the object and the defect are - passed to register_defect. - - This method is intended to be called by parsers that discover defects. - The email package parsers always call it with Defect instances. - - """ - if self.raise_on_defect: - raise defect - self.register_defect(obj, defect) - - def register_defect(self, obj, defect): - """Record 'defect' on 'obj'. - - Called by handle_defect if raise_on_defect is False. This method is - part of the Policy API so that Policy subclasses can implement custom - defect handling. The default implementation calls the append method of - the defects attribute of obj. The objects used by the email package by - default that get passed to this method will always have a defects - attribute with an append method. - - """ - obj.defects.append(defect) - - def header_max_count(self, name): - """Return the maximum allowed number of headers named 'name'. - - Called when a header is added to a Message object. If the returned - value is not 0 or None, and there are already a number of headers with - the name 'name' equal to the value returned, a ValueError is raised. - - Because the default behavior of Message's __setitem__ is to append the - value to the list of headers, it is easy to create duplicate headers - without realizing it. This method allows certain headers to be limited - in the number of instances of that header that may be added to a - Message programmatically. (The limit is not observed by the parser, - which will faithfully produce as many headers as exist in the message - being parsed.) - - The default implementation returns None for all header names. - """ - return None - - @abc.abstractmethod - def header_source_parse(self, sourcelines): - """Given a list of linesep terminated strings constituting the lines of - a single header, return the (name, value) tuple that should be stored - in the model. The input lines should retain their terminating linesep - characters. The lines passed in by the email package may contain - surrogateescaped binary data. - """ - raise NotImplementedError - - @abc.abstractmethod - def header_store_parse(self, name, value): - """Given the header name and the value provided by the application - program, return the (name, value) that should be stored in the model. - """ - raise NotImplementedError - - @abc.abstractmethod - def header_fetch_parse(self, name, value): - """Given the header name and the value from the model, return the value - to be returned to the application program that is requesting that - header. The value passed in by the email package may contain - surrogateescaped binary data if the lines were parsed by a BytesParser. - The returned value should not contain any surrogateescaped data. - - """ - raise NotImplementedError - - @abc.abstractmethod - def fold(self, name, value): - """Given the header name and the value from the model, return a string - containing linesep characters that implement the folding of the header - according to the policy controls. The value passed in by the email - package may contain surrogateescaped binary data if the lines were - parsed by a BytesParser. The returned value should not contain any - surrogateescaped data. - - """ - raise NotImplementedError - - @abc.abstractmethod - def fold_binary(self, name, value): - """Given the header name and the value from the model, return binary - data containing linesep characters that implement the folding of the - header according to the policy controls. The value passed in by the - email package may contain surrogateescaped binary data. - - """ - raise NotImplementedError - - -@_extend_docstrings -class Compat32(Policy): - - """+ - This particular policy is the backward compatibility Policy. It - replicates the behavior of the email package version 5.1. - """ - - def _sanitize_header(self, name, value): - # If the header value contains surrogates, return a Header using - # the unknown-8bit charset to encode the bytes as encoded words. - if not isinstance(value, str): - # Assume it is already a header object - return value - if _has_surrogates(value): - return header.Header(value, charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - return value - - def header_source_parse(self, sourcelines): - """+ - The name is parsed as everything up to the ':' and returned unmodified. - The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and - stripping any trailing carriage return or linefeed characters. - - """ - name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) - return (name, value.rstrip('\r\n')) - - def header_store_parse(self, name, value): - """+ - The name and value are returned unmodified. - """ - return (name, value) - - def header_fetch_parse(self, name, value): - """+ - If the value contains binary data, it is converted into a Header object - using the unknown-8bit charset. Otherwise it is returned unmodified. - """ - return self._sanitize_header(name, value) - - def fold(self, name, value): - """+ - Headers are folded using the Header folding algorithm, which preserves - existing line breaks in the value, and wraps each resulting line to the - max_line_length. Non-ASCII binary data are CTE encoded using the - unknown-8bit charset. - - """ - return self._fold(name, value, sanitize=True) - - def fold_binary(self, name, value): - """+ - Headers are folded using the Header folding algorithm, which preserves - existing line breaks in the value, and wraps each resulting line to the - max_line_length. If cte_type is 7bit, non-ascii binary data is CTE - encoded using the unknown-8bit charset. Otherwise the original source - header is used, with its existing line breaks and/or binary data. - - """ - folded = self._fold(name, value, sanitize=self.cte_type=='7bit') - return folded.encode('ascii', 'surrogateescape') - - def _fold(self, name, value, sanitize): - parts = [] - parts.append('%s: ' % name) - if isinstance(value, str): - if _has_surrogates(value): - if sanitize: - h = header.Header(value, - charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - parts.append(value) - h = None - else: - h = header.Header(value, header_name=name) - else: - # Assume it is a Header-like object. - h = value - if h is not None: - parts.append(h.encode(linesep=self.linesep, - maxlinelen=self.max_line_length)) - parts.append(self.linesep) - return ''.join(parts) - - -compat32 = Compat32() diff --git a/contrib/python/future/future/backports/email/base64mime.py b/contrib/python/future/future/backports/email/base64mime.py deleted file mode 100644 index 416d612e012..00000000000 --- a/contrib/python/future/future/backports/email/base64mime.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Author: Ben Gertzfield -# Contact: [email protected] - -"""Base64 content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit -characters encoding known as Base64. - -It is used in the MIME standards for email to attach images, audio, and text -using some 8-bit character sets to messages. - -This module provides an interface to encode and decode both headers and bodies -with Base64 encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:, From:, Cc:, etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character conversion -necessary for proper internationalized headers; it only does dumb encoding and -decoding. To deal with the various line wrapping issues, use the email.header -module. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import range -from future.builtins import bytes - -__all__ = [ - 'body_decode', - 'body_encode', - 'decode', - 'decodestring', - 'header_encode', - 'header_length', - ] - - -from base64 import b64encode -from binascii import b2a_base64, a2b_base64 - -CRLF = '\r\n' -NL = '\n' -EMPTYSTRING = '' - -# See also Charset.py -MISC_LEN = 7 - - -# Helpers -def header_length(bytearray): - """Return the length of s when it is encoded with base64.""" - groups_of_3, leftover = divmod(len(bytearray), 3) - # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. - n = groups_of_3 * 4 - if leftover: - n += 4 - return n - - -def header_encode(header_bytes, charset='iso-8859-1'): - """Encode a single header line with Base64 encoding in a given charset. - - charset names the character set to use to encode the header. It defaults - to iso-8859-1. Base64 encoding is defined in RFC 2045. - """ - if not header_bytes: - return "" - if isinstance(header_bytes, str): - header_bytes = header_bytes.encode(charset) - encoded = b64encode(header_bytes).decode("ascii") - return '=?%s?b?%s?=' % (charset, encoded) - - -def body_encode(s, maxlinelen=76, eol=NL): - r"""Encode a string with base64. - - Each line will be wrapped at, at most, maxlinelen characters (defaults to - 76 characters). - - Each line of encoded text will end with eol, which defaults to "\n". Set - this to "\r\n" if you will be using the result of this function directly - in an email. - """ - if not s: - return s - - encvec = [] - max_unencoded = maxlinelen * 3 // 4 - for i in range(0, len(s), max_unencoded): - # BAW: should encode() inherit b2a_base64()'s dubious behavior in - # adding a newline to the encoded string? - enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") - if enc.endswith(NL) and eol != NL: - enc = enc[:-1] + eol - encvec.append(enc) - return EMPTYSTRING.join(encvec) - - -def decode(string): - """Decode a raw base64 string, returning a bytes object. - - This function does not parse a full MIME header value encoded with - base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high - level email.header class for that functionality. - """ - if not string: - return bytes() - elif isinstance(string, str): - return a2b_base64(string.encode('raw-unicode-escape')) - else: - return a2b_base64(string) - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode diff --git a/contrib/python/future/future/backports/email/charset.py b/contrib/python/future/future/backports/email/charset.py deleted file mode 100644 index 2385ce68f33..00000000000 --- a/contrib/python/future/future/backports/email/charset.py +++ /dev/null @@ -1,409 +0,0 @@ -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import str -from future.builtins import next - -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: [email protected] - -__all__ = [ - 'Charset', - 'add_alias', - 'add_charset', - 'add_codec', - ] - -from functools import partial - -from future.backports import email -from future.backports.email import errors -from future.backports.email.encoders import encode_7or8bit - - -# Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers - -# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 -RFC2047_CHROME_LEN = 7 - -DEFAULT_CHARSET = 'us-ascii' -UNKNOWN8BIT = 'unknown-8bit' -EMPTYSTRING = '' - - -# Defaults -CHARSETS = { - # input header enc body enc output conv - 'iso-8859-1': (QP, QP, None), - 'iso-8859-2': (QP, QP, None), - 'iso-8859-3': (QP, QP, None), - 'iso-8859-4': (QP, QP, None), - # iso-8859-5 is Cyrillic, and not especially used - # iso-8859-6 is Arabic, also not particularly used - # iso-8859-7 is Greek, QP will not make it readable - # iso-8859-8 is Hebrew, QP will not make it readable - 'iso-8859-9': (QP, QP, None), - 'iso-8859-10': (QP, QP, None), - # iso-8859-11 is Thai, QP will not make it readable - 'iso-8859-13': (QP, QP, None), - 'iso-8859-14': (QP, QP, None), - 'iso-8859-15': (QP, QP, None), - 'iso-8859-16': (QP, QP, None), - 'windows-1252':(QP, QP, None), - 'viscii': (QP, QP, None), - 'us-ascii': (None, None, None), - 'big5': (BASE64, BASE64, None), - 'gb2312': (BASE64, BASE64, None), - 'euc-jp': (BASE64, None, 'iso-2022-jp'), - 'shift_jis': (BASE64, None, 'iso-2022-jp'), - 'iso-2022-jp': (BASE64, None, None), - 'koi8-r': (BASE64, BASE64, None), - 'utf-8': (SHORTEST, BASE64, 'utf-8'), - } - -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', - 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - - -# Map charsets to their Unicode codec strings. -CODEC_MAP = { - 'gb2312': 'eucgb2312_cn', - 'big5': 'big5_tw', - # Hack: We don't want *any* conversion for stuff marked us-ascii, as all - # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. - # Let that stuff pass through without conversion to/from Unicode. - 'us-ascii': None, - } - - -# Convenience functions for extending the above mappings -def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): - """Add character set properties to the global registry. - - charset is the input character set, and must be the canonical name of a - character set. - - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for - the shortest of qp or base64 encoding, or None for no encoding. SHORTEST - is only valid for header_enc. It describes how message headers and - message bodies in the input charset are to be encoded. Default is no - encoding. - - Optional output_charset is the character set that the output should be - in. Conversions will proceed from input charset, to Unicode, to the - output charset when the method Charset.convert() is called. The default - is to output in the same character set as the input. - - Both input_charset and output_charset must have Unicode codec entries in - the module's charset-to-codec mapping; use add_codec(charset, codecname) - to add codecs the module does not know about. See the codecs module's - documentation for more information. - """ - if body_enc == SHORTEST: - raise ValueError('SHORTEST not allowed for body_enc') - CHARSETS[charset] = (header_enc, body_enc, output_charset) - - -def add_alias(alias, canonical): - """Add a character set alias. - - alias is the alias name, e.g. latin-1 - canonical is the character set's canonical name, e.g. iso-8859-1 - """ - ALIASES[alias] = canonical - - -def add_codec(charset, codecname): - """Add a codec that map characters in the given charset to/from Unicode. - - charset is the canonical name of a character set. codecname is the name - of a Python codec, as appropriate for the second argument to the unicode() - built-in, or to the encode() method of a Unicode string. - """ - CODEC_MAP[charset] = codecname - - -# Convenience function for encoding strings, taking into account -# that they might be unknown-8bit (ie: have surrogate-escaped bytes) -def _encode(string, codec): - string = str(string) - if codec == UNKNOWN8BIT: - return string.encode('ascii', 'surrogateescape') - else: - return string.encode(codec) - - -class Charset(object): - """Map character sets to their email properties. - - This class provides information about the requirements imposed on email - for a specific character set. It also provides convenience routines for - converting between character sets, given the availability of the - applicable codecs. Given a character set, it will do its best to provide - information on how to use that character set in an email in an - RFC-compliant way. - - Certain character sets must be encoded with quoted-printable or base64 - when used in email headers or bodies. Certain character sets must be - converted outright, and are not allowed in email. Instances of this - module expose the following information about a character set: - - input_charset: The initial character set specified. Common aliases - are converted to their `official' email names (e.g. latin_1 - is converted to iso-8859-1). Defaults to 7-bit us-ascii. - - header_encoding: If the character set must be encoded before it can be - used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of - QP or BASE64 encoding. Otherwise, it will be None. - - body_encoding: Same as header_encoding, but describes the encoding for the - mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for - body_encoding. - - output_charset: Some character sets must be converted before they can be - used in email headers or bodies. If the input_charset is - one of them, this attribute will contain the name of the - charset output will be converted to. Otherwise, it will - be None. - - input_codec: The name of the Python codec used to convert the - input_charset to Unicode. If no conversion codec is - necessary, this attribute will be None. - - output_codec: The name of the Python codec used to convert Unicode - to the output_charset. If no conversion codec is necessary, - this attribute will have the same value as the input_codec. - """ - def __init__(self, input_charset=DEFAULT_CHARSET): - # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to - # unicode because its .lower() is locale insensitive. If the argument - # is already a unicode, we leave it at that, but ensure that the - # charset is ASCII, as the standard (RFC XXX) requires. - try: - if isinstance(input_charset, str): - input_charset.encode('ascii') - else: - input_charset = str(input_charset, 'ascii') - except UnicodeError: - raise errors.CharsetError(input_charset) - input_charset = input_charset.lower() - # Set the input charset after filtering through the aliases - self.input_charset = ALIASES.get(input_charset, input_charset) - # We can try to guess which encoding and conversion to use by the - # charset_map dictionary. Try that first, but let the user override - # it. - henc, benc, conv = CHARSETS.get(self.input_charset, - (SHORTEST, BASE64, None)) - if not conv: - conv = self.input_charset - # Set the attributes, allowing the arguments to override the default. - self.header_encoding = henc - self.body_encoding = benc - self.output_charset = ALIASES.get(conv, conv) - # Now set the codecs. If one isn't defined for input_charset, - # guess and try a Unicode codec with the same name as input_codec. - self.input_codec = CODEC_MAP.get(self.input_charset, - self.input_charset) - self.output_codec = CODEC_MAP.get(self.output_charset, - self.output_charset) - - def __str__(self): - return self.input_charset.lower() - - __repr__ = __str__ - - def __eq__(self, other): - return str(self) == str(other).lower() - - def __ne__(self, other): - return not self.__eq__(other) - - def get_body_encoding(self): - """Return the content-transfer-encoding used for body encoding. - - This is either the string `quoted-printable' or `base64' depending on - the encoding used, or it is a function in which case you should call - the function with a single argument, the Message object being - encoded. The function should then set the Content-Transfer-Encoding - header itself to whatever is appropriate. - - Returns "quoted-printable" if self.body_encoding is QP. - Returns "base64" if self.body_encoding is BASE64. - Returns conversion function otherwise. - """ - assert self.body_encoding != SHORTEST - if self.body_encoding == QP: - return 'quoted-printable' - elif self.body_encoding == BASE64: - return 'base64' - else: - return encode_7or8bit - - def get_output_charset(self): - """Return the output character set. - - This is self.output_charset if that is not None, otherwise it is - self.input_charset. - """ - return self.output_charset or self.input_charset - - def header_encode(self, string): - """Header-encode a string by converting it first to bytes. - - The type of encoding (base64 or quoted-printable) will be based on - this charset's `header_encoding`. - - :param string: A unicode string for the header. It must be possible - to encode this string to bytes using the character set's - output codec. - :return: The encoded string, with RFC 2047 chrome. - """ - codec = self.output_codec or 'us-ascii' - header_bytes = _encode(string, codec) - # 7bit/8bit encodings return the string unchanged (modulo conversions) - encoder_module = self._get_encoder(header_bytes) - if encoder_module is None: - return string - return encoder_module.header_encode(header_bytes, codec) - - def header_encode_lines(self, string, maxlengths): - """Header-encode a string by converting it first to bytes. - - This is similar to `header_encode()` except that the string is fit - into maximum line lengths as given by the argument. - - :param string: A unicode string for the header. It must be possible - to encode this string to bytes using the character set's - output codec. - :param maxlengths: Maximum line length iterator. Each element - returned from this iterator will provide the next maximum line - length. This parameter is used as an argument to built-in next() - and should never be exhausted. The maximum line lengths should - not count the RFC 2047 chrome. These line lengths are only a - hint; the splitter does the best it can. - :return: Lines of encoded strings, each with RFC 2047 chrome. - """ - # See which encoding we should use. - codec = self.output_codec or 'us-ascii' - header_bytes = _encode(string, codec) - encoder_module = self._get_encoder(header_bytes) - encoder = partial(encoder_module.header_encode, charset=codec) - # Calculate the number of characters that the RFC 2047 chrome will - # contribute to each line. - charset = self.get_output_charset() - extra = len(charset) + RFC2047_CHROME_LEN - # Now comes the hard part. We must encode bytes but we can't split on - # bytes because some character sets are variable length and each - # encoded word must stand on its own. So the problem is you have to - # encode to bytes to figure out this word's length, but you must split - # on characters. This causes two problems: first, we don't know how - # many octets a specific substring of unicode characters will get - # encoded to, and second, we don't know how many ASCII characters - # those octets will get encoded to. Unless we try it. Which seems - # inefficient. In the interest of being correct rather than fast (and - # in the hope that there will be few encoded headers in any such - # message), brute force it. :( - lines = [] - current_line = [] - maxlen = next(maxlengths) - extra - for character in string: - current_line.append(character) - this_line = EMPTYSTRING.join(current_line) - length = encoder_module.header_length(_encode(this_line, charset)) - if length > maxlen: - # This last character doesn't fit so pop it off. - current_line.pop() - # Does nothing fit on the first line? - if not lines and not current_line: - lines.append(None) - else: - separator = (' ' if lines else '') - joined_line = EMPTYSTRING.join(current_line) - header_bytes = _encode(joined_line, codec) - lines.append(encoder(header_bytes)) - current_line = [character] - maxlen = next(maxlengths) - extra - joined_line = EMPTYSTRING.join(current_line) - header_bytes = _encode(joined_line, codec) - lines.append(encoder(header_bytes)) - return lines - - def _get_encoder(self, header_bytes): - if self.header_encoding == BASE64: - return email.base64mime - elif self.header_encoding == QP: - return email.quoprimime - elif self.header_encoding == SHORTEST: - len64 = email.base64mime.header_length(header_bytes) - lenqp = email.quoprimime.header_length(header_bytes) - if len64 < lenqp: - return email.base64mime - else: - return email.quoprimime - else: - return None - - def body_encode(self, string): - """Body-encode a string by converting it first to bytes. - - The type of encoding (base64 or quoted-printable) will be based on - self.body_encoding. If body_encoding is None, we assume the - output charset is a 7bit encoding, so re-encoding the decoded - string using the ascii codec produces the correct string version - of the content. - """ - if not string: - return string - if self.body_encoding is BASE64: - if isinstance(string, str): - string = string.encode(self.output_charset) - return email.base64mime.body_encode(string) - elif self.body_encoding is QP: - # quopromime.body_encode takes a string, but operates on it as if - # it were a list of byte codes. For a (minimal) history on why - # this is so, see changeset 0cf700464177. To correctly encode a - # character set, then, we must turn it into pseudo bytes via the - # latin1 charset, which will encode any byte as a single code point - # between 0 and 255, which is what body_encode is expecting. - if isinstance(string, str): - string = string.encode(self.output_charset) - string = string.decode('latin1') - return email.quoprimime.body_encode(string) - else: - if isinstance(string, str): - string = string.encode(self.output_charset).decode('ascii') - return string diff --git a/contrib/python/future/future/backports/email/encoders.py b/contrib/python/future/future/backports/email/encoders.py deleted file mode 100644 index 15d2eb4650c..00000000000 --- a/contrib/python/future/future/backports/email/encoders.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Encodings and related functions.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import str - -__all__ = [ - 'encode_7or8bit', - 'encode_base64', - 'encode_noop', - 'encode_quopri', - ] - - -try: - from base64 import encodebytes as _bencode -except ImportError: - # Py2 compatibility. TODO: test this! - from base64 import encodestring as _bencode -from quopri import encodestring as _encodestring - - -def _qencode(s): - enc = _encodestring(s, quotetabs=True) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') - - -def encode_base64(msg): - """Encode the message's payload in Base64. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = str(_bencode(orig), 'ascii') - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'base64' - - -def encode_quopri(msg): - """Encode the message's payload in quoted-printable. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _qencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'quoted-printable' - - -def encode_7or8bit(msg): - """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" - orig = msg.get_payload() - if orig is None: - # There's no payload. For backwards compatibility we use 7bit - msg['Content-Transfer-Encoding'] = '7bit' - return - # We play a trick to make this go fast. If encoding/decode to ASCII - # succeeds, we know the data must be 7bit, otherwise treat it as 8bit. - try: - if isinstance(orig, str): - orig.encode('ascii') - else: - orig.decode('ascii') - except UnicodeError: - charset = msg.get_charset() - output_cset = charset and charset.output_charset - # iso-2022-* is non-ASCII but encodes to a 7-bit representation - if output_cset and output_cset.lower().startswith('iso-2022-'): - msg['Content-Transfer-Encoding'] = '7bit' - else: - msg['Content-Transfer-Encoding'] = '8bit' - else: - msg['Content-Transfer-Encoding'] = '7bit' - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) - - -def encode_noop(msg): - """Do nothing.""" - # Well, not quite *nothing*: in Python3 we have to turn bytes into a string - # in our internal surrogateescaped form in order to keep the model - # consistent. - orig = msg.get_payload() - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) diff --git a/contrib/python/future/future/backports/email/errors.py b/contrib/python/future/future/backports/email/errors.py deleted file mode 100644 index 0fe599cf0a7..00000000000 --- a/contrib/python/future/future/backports/email/errors.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""email package exception classes.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super - - -class MessageError(Exception): - """Base class for errors in the email package.""" - - -class MessageParseError(MessageError): - """Base class for message parsing errors.""" - - -class HeaderParseError(MessageParseError): - """Error while parsing headers.""" - - -class BoundaryError(MessageParseError): - """Couldn't find terminating boundary.""" - - -class MultipartConversionError(MessageError, TypeError): - """Conversion to a multipart is prohibited.""" - - -class CharsetError(MessageError): - """An illegal charset was given.""" - - -# These are parsing defects which the parser was able to work around. -class MessageDefect(ValueError): - """Base class for a message defect.""" - - def __init__(self, line=None): - if line is not None: - super().__init__(line) - self.line = line - -class NoBoundaryInMultipartDefect(MessageDefect): - """A message claimed to be a multipart but had no boundary parameter.""" - -class StartBoundaryNotFoundDefect(MessageDefect): - """The claimed start boundary was never found.""" - -class CloseBoundaryNotFoundDefect(MessageDefect): - """A start boundary was found, but not the corresponding close boundary.""" - -class FirstHeaderLineIsContinuationDefect(MessageDefect): - """A message had a continuation line as its first header line.""" - -class MisplacedEnvelopeHeaderDefect(MessageDefect): - """A 'Unix-from' header was found in the middle of a header block.""" - -class MissingHeaderBodySeparatorDefect(MessageDefect): - """Found line with no leading whitespace and no colon before blank line.""" -# XXX: backward compatibility, just in case (it was never emitted). -MalformedHeaderDefect = MissingHeaderBodySeparatorDefect - -class MultipartInvariantViolationDefect(MessageDefect): - """A message claimed to be a multipart but no subparts were found.""" - -class InvalidMultipartContentTransferEncodingDefect(MessageDefect): - """An invalid content transfer encoding was set on the multipart itself.""" - -class UndecodableBytesDefect(MessageDefect): - """Header contained bytes that could not be decoded""" - -class InvalidBase64PaddingDefect(MessageDefect): - """base64 encoded sequence had an incorrect length""" - -class InvalidBase64CharactersDefect(MessageDefect): - """base64 encoded sequence had characters not in base64 alphabet""" - -# These errors are specific to header parsing. - -class HeaderDefect(MessageDefect): - """Base class for a header defect.""" - - def __init__(self, *args, **kw): - super().__init__(*args, **kw) - -class InvalidHeaderDefect(HeaderDefect): - """Header is not valid, message gives details.""" - -class HeaderMissingRequiredValue(HeaderDefect): - """A header that must have a value had none""" - -class NonPrintableDefect(HeaderDefect): - """ASCII characters outside the ascii-printable range found""" - - def __init__(self, non_printables): - super().__init__(non_printables) - self.non_printables = non_printables - - def __str__(self): - return ("the following ASCII non-printables found in header: " - "{}".format(self.non_printables)) - -class ObsoleteHeaderDefect(HeaderDefect): - """Header uses syntax declared obsolete by RFC 5322""" - -class NonASCIILocalPartDefect(HeaderDefect): - """local_part contains non-ASCII characters""" - # This defect only occurs during unicode parsing, not when - # parsing messages decoded from binary. diff --git a/contrib/python/future/future/backports/email/feedparser.py b/contrib/python/future/future/backports/email/feedparser.py deleted file mode 100644 index 935c26e3179..00000000000 --- a/contrib/python/future/future/backports/email/feedparser.py +++ /dev/null @@ -1,525 +0,0 @@ -# Copyright (C) 2004-2006 Python Software Foundation -# Authors: Baxter, Wouters and Warsaw -# Contact: [email protected] - -"""FeedParser - An email feed parser. - -The feed parser implements an interface for incrementally parsing an email -message, line by line. This has advantages for certain applications, such as -those reading email messages off a socket. - -FeedParser.feed() is the primary interface for pushing new data into the -parser. It returns when there's nothing more it can do with the available -data. When you have no more data to push into the parser, call .close(). -This completes the parsing and returns the root message object. - -The other advantage of this parser is that it will never raise a parsing -exception. Instead, when it finds something unexpected, it adds a 'defect' to -the current message. Defects are just instances that live on the message -object's .defects attribute. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import object, range, super -from future.utils import implements_iterator, PY3 - -__all__ = ['FeedParser', 'BytesFeedParser'] - -import re - -from future.backports.email import errors -from future.backports.email import message -from future.backports.email._policybase import compat32 - -NLCRE = re.compile('\r\n|\r|\n') -NLCRE_bol = re.compile('(\r\n|\r|\n)') -NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') -NLCRE_crack = re.compile('(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character -# except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') -EMPTYSTRING = '' -NL = '\n' - -NeedMoreData = object() - - -# @implements_iterator -class BufferedSubFile(object): - """A file-ish object that can have new data loaded into it. - - You can also push and pop line-matching predicates onto a stack. When the - current predicate matches the current line, a false EOF response - (i.e. empty string) is returned instead. This lets the parser adhere to a - simple abstraction -- it parses until EOF closes the current message. - """ - def __init__(self): - # The last partial line pushed into this object. - self._partial = '' - # The list of full, pushed lines, in reverse order - self._lines = [] - # The stack of false-EOF checking predicates. - self._eofstack = [] - # A flag indicating whether the file has been closed or not. - self._closed = False - - def push_eof_matcher(self, pred): - self._eofstack.append(pred) - - def pop_eof_matcher(self): - return self._eofstack.pop() - - def close(self): - # Don't forget any trailing partial line. - self._lines.append(self._partial) - self._partial = '' - self._closed = True - - def readline(self): - if not self._lines: - if self._closed: - return '' - return NeedMoreData - # Pop the line off the stack and see if it matches the current - # false-EOF predicate. - line = self._lines.pop() - # RFC 2046, section 5.1.2 requires us to recognize outer level - # boundaries at any level of inner nesting. Do this, but be sure it's - # in the order of most to least nested. - for ateof in self._eofstack[::-1]: - if ateof(line): - # We're at the false EOF. But push the last line back first. - self._lines.append(line) - return '' - return line - - def unreadline(self, line): - # Let the consumer push a line back into the buffer. - assert line is not NeedMoreData - self._lines.append(line) - - def push(self, data): - """Push some new data into this object.""" - # Handle any previous leftovers - data, self._partial = self._partial + data, '' - # Crack into lines, but preserve the newlines on the end of each - parts = NLCRE_crack.split(data) - # The *ahem* interesting behaviour of re.split when supplied grouping - # parentheses is that the last element of the resulting list is the - # data after the final RE. In the case of a NL/CR terminated string, - # this is the empty string. - self._partial = parts.pop() - #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: - # is there a \n to follow later? - if not self._partial and parts and parts[-1].endswith('\r'): - self._partial = parts.pop(-2)+parts.pop() - # parts is a list of strings, alternating between the line contents - # and the eol character(s). Gather up a list of lines after - # re-attaching the newlines. - lines = [] - for i in range(len(parts) // 2): - lines.append(parts[i*2] + parts[i*2+1]) - self.pushlines(lines) - - def pushlines(self, lines): - # Reverse and insert at the front of the lines. - self._lines[:0] = lines[::-1] - - def __iter__(self): - return self - - def __next__(self): - line = self.readline() - if line == '': - raise StopIteration - return line - - -class FeedParser(object): - """A feed-style parser of email.""" - - def __init__(self, _factory=message.Message, **_3to2kwargs): - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = compat32 - """_factory is called with no arguments to create a new message obj - - The policy keyword specifies a policy object that controls a number of - aspects of the parser's operation. The default policy maintains - backward compatibility. - - """ - self._factory = _factory - self.policy = policy - try: - _factory(policy=self.policy) - self._factory_kwds = lambda: {'policy': self.policy} - except TypeError: - # Assume this is an old-style factory - self._factory_kwds = lambda: {} - self._input = BufferedSubFile() - self._msgstack = [] - if PY3: - self._parse = self._parsegen().__next__ - else: - self._parse = self._parsegen().next - self._cur = None - self._last = None - self._headersonly = False - - # Non-public interface for supporting Parser's headersonly flag - def _set_headersonly(self): - self._headersonly = True - - def feed(self, data): - """Push more data into the parser.""" - self._input.push(data) - self._call_parse() - - def _call_parse(self): - try: - self._parse() - except StopIteration: - pass - - def close(self): - """Parse all remaining data and return the root message object.""" - self._input.close() - self._call_parse() - root = self._pop_message() - assert not self._msgstack - # Look for final set of defects - if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): - defect = errors.MultipartInvariantViolationDefect() - self.policy.handle_defect(root, defect) - return root - - def _new_message(self): - msg = self._factory(**self._factory_kwds()) - if self._cur and self._cur.get_content_type() == 'multipart/digest': - msg.set_default_type('message/rfc822') - if self._msgstack: - self._msgstack[-1].attach(msg) - self._msgstack.append(msg) - self._cur = msg - self._last = msg - - def _pop_message(self): - retval = self._msgstack.pop() - if self._msgstack: - self._cur = self._msgstack[-1] - else: - self._cur = None - return retval - - def _parsegen(self): - # Create a new message and start by parsing headers. - self._new_message() - headers = [] - # Collect the headers, searching for a line that doesn't match the RFC - # 2822 header or continuation pattern (including an empty line). - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - if not headerRE.match(line): - # If we saw the RFC defined header/body separator - # (i.e. newline), just throw it away. Otherwise the line is - # part of the body so push it back. - if not NLCRE.match(line): - defect = errors.MissingHeaderBodySeparatorDefect() - self.policy.handle_defect(self._cur, defect) - self._input.unreadline(line) - break - headers.append(line) - # Done with the headers, so parse them and figure out what we're - # supposed to see in the body of the message. - self._parse_headers(headers) - # Headers-only parsing is a backwards compatibility hack, which was - # necessary in the older parser, which could raise errors. All - # remaining lines in the input are thrown into the message body. - if self._headersonly: - lines = [] - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - if self._cur.get_content_type() == 'message/delivery-status': - # message/delivery-status contains blocks of headers separated by - # a blank line. We'll represent each header block as a separate - # nested message object, but the processing is a bit different - # than standard message/* types because there is no body for the - # nested messages. A blank line separates the subparts. - while True: - self._input.push_eof_matcher(NLCRE.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - msg = self._pop_message() - # We need to pop the EOF matcher in order to tell if we're at - # the end of the current file, not the end of the last block - # of message headers. - self._input.pop_eof_matcher() - # The input stream must be sitting at the newline or at the - # EOF. We want to see if we're at the end of this subpart, so - # first consume the blank line, then test the next line to see - # if we're at this subpart's EOF. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - if line == '': - break - # Not at EOF so this is a line we're going to need. - self._input.unreadline(line) - return - if self._cur.get_content_maintype() == 'message': - # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - self._pop_message() - return - if self._cur.get_content_maintype() == 'multipart': - boundary = self._cur.get_boundary() - if boundary is None: - # The message /claims/ to be a multipart but it has not - # defined a boundary. That's a problem which we'll handle by - # reading everything until the EOF and marking the message as - # defective. - defect = errors.NoBoundaryInMultipartDefect() - self.policy.handle_defect(self._cur, defect) - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - # Make sure a valid content type was specified per RFC 2045:6.4. - if (self._cur.get('content-transfer-encoding', '8bit').lower() - not in ('7bit', '8bit', 'binary')): - defect = errors.InvalidMultipartContentTransferEncodingDefect() - self.policy.handle_defect(self._cur, defect) - # Create a line match predicate which matches the inter-part - # boundary as well as the end-of-multipart boundary. Don't push - # this onto the input stream until we've scanned past the - # preamble. - separator = '--' + boundary - boundaryre = re.compile( - '(?P<sep>' + re.escape(separator) + - r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') - capturing_preamble = True - preamble = [] - linesep = False - close_boundary_seen = False - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - mo = boundaryre.match(line) - if mo: - # If we're looking at the end boundary, we're done with - # this multipart. If there was a newline at the end of - # the closing boundary, then we need to initialize the - # epilogue with the empty string (see below). - if mo.group('end'): - close_boundary_seen = True - linesep = mo.group('linesep') - break - # We saw an inter-part boundary. Were we in the preamble? - if capturing_preamble: - if preamble: - # According to RFC 2046, the last newline belongs - # to the boundary. - lastline = preamble[-1] - eolmo = NLCRE_eol.search(lastline) - if eolmo: - preamble[-1] = lastline[:-len(eolmo.group(0))] - self._cur.preamble = EMPTYSTRING.join(preamble) - capturing_preamble = False - self._input.unreadline(line) - continue - # We saw a boundary separating two parts. Consume any - # multiple boundary lines that may be following. Our - # interpretation of RFC 2046 BNF grammar does not produce - # body parts within such double boundaries. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - mo = boundaryre.match(line) - if not mo: - self._input.unreadline(line) - break - # Recurse to parse this subpart; the input stream points - # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - # Because of RFC 2046, the newline preceding the boundary - # separator actually belongs to the boundary, not the - # previous subpart's payload (or epilogue if the previous - # part is a multipart). - if self._last.get_content_maintype() == 'multipart': - epilogue = self._last.epilogue - if epilogue == '': - self._last.epilogue = None - elif epilogue is not None: - mo = NLCRE_eol.search(epilogue) - if mo: - end = len(mo.group(0)) - self._last.epilogue = epilogue[:-end] - else: - payload = self._last._payload - if isinstance(payload, str): - mo = NLCRE_eol.search(payload) - if mo: - payload = payload[:-len(mo.group(0))] - self._last._payload = payload - self._input.pop_eof_matcher() - self._pop_message() - # Set the multipart up for newline cleansing, which will - # happen if we're in a nested multipart. - self._last = self._cur - else: - # I think we must be in the preamble - assert capturing_preamble - preamble.append(line) - # We've seen either the EOF or the end boundary. If we're still - # capturing the preamble, we never saw the start boundary. Note - # that as a defect and store the captured text as the payload. - if capturing_preamble: - defect = errors.StartBoundaryNotFoundDefect() - self.policy.handle_defect(self._cur, defect) - self._cur.set_payload(EMPTYSTRING.join(preamble)) - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # If we're not processing the preamble, then we might have seen - # EOF without seeing that end boundary...that is also a defect. - if not close_boundary_seen: - defect = errors.CloseBoundaryNotFoundDefect() - self.policy.handle_defect(self._cur, defect) - return - # Everything from here to the EOF is epilogue. If the end boundary - # ended in a newline, we'll need to make sure the epilogue isn't - # None - if linesep: - epilogue = [''] - else: - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - epilogue.append(line) - # Any CRLF at the front of the epilogue is not technically part of - # the epilogue. Also, watch out for an empty string epilogue, - # which means a single newline. - if epilogue: - firstline = epilogue[0] - bolmo = NLCRE_bol.match(firstline) - if bolmo: - epilogue[0] = firstline[len(bolmo.group(0)):] - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # Otherwise, it's some non-multipart type, so the entire rest of the - # file contents becomes the payload. - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - - def _parse_headers(self, lines): - # Passed a list of lines that make up the headers for the current msg - lastheader = '' - lastvalue = [] - for lineno, line in enumerate(lines): - # Check for continuation - if line[0] in ' \t': - if not lastheader: - # The first line of the headers was a continuation. This - # is illegal, so let's note the defect, store the illegal - # line, and ignore it for purposes of headers. - defect = errors.FirstHeaderLineIsContinuationDefect(line) - self.policy.handle_defect(self._cur, defect) - continue - lastvalue.append(line) - continue - if lastheader: - self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) - lastheader, lastvalue = '', [] - # Check for envelope header, i.e. unix-from - if line.startswith('From '): - if lineno == 0: - # Strip off the trailing newline - mo = NLCRE_eol.search(line) - if mo: - line = line[:-len(mo.group(0))] - self._cur.set_unixfrom(line) - continue - elif lineno == len(lines) - 1: - # Something looking like a unix-from at the end - it's - # probably the first line of the body, so push back the - # line and stop. - self._input.unreadline(line) - return - else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. - defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) - continue - # Split the line on the colon separating field name from value. - # There will always be a colon, because if there wasn't the part of - # the parser that calls us would have started parsing the body. - i = line.find(':') - assert i>0, "_parse_headers fed line with no : and no leading WS" - lastheader = line[:i] - lastvalue = [line] - # Done with all the lines, so handle the last header. - if lastheader: - self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) - - -class BytesFeedParser(FeedParser): - """Like FeedParser, but feed accepts bytes.""" - - def feed(self, data): - super().feed(data.decode('ascii', 'surrogateescape')) diff --git a/contrib/python/future/future/backports/email/generator.py b/contrib/python/future/future/backports/email/generator.py deleted file mode 100644 index 53493d0ac51..00000000000 --- a/contrib/python/future/future/backports/email/generator.py +++ /dev/null @@ -1,498 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Classes to generate plain text from a message object tree.""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super -from future.builtins import str - -__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] - -import re -import sys -import time -import random -import warnings - -from io import StringIO, BytesIO -from future.backports.email._policybase import compat32 -from future.backports.email.header import Header -from future.backports.email.utils import _has_surrogates -import future.backports.email.charset as _charset - -UNDERSCORE = '_' -NL = '\n' # XXX: no longer used by the code below. - -fcre = re.compile(r'^From ', re.MULTILINE) - - -class Generator(object): - """Generates output from a Message object tree. - - This basic generator writes the message to the given file object as plain - text. - """ - # - # Public interface - # - - def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs): - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = None - """Create the generator for message flattening. - - outfp is the output file-like object for writing the message to. It - must have a write() method. - - Optional mangle_from_ is a flag that, when True (the default), escapes - From_ lines in the body of the message by putting a `>' in front of - them. - - Optional maxheaderlen specifies the longest length for a non-continued - header. When a header line is longer (in characters, with tabs - expanded to 8 spaces) than maxheaderlen, the header will split as - defined in the Header class. Set maxheaderlen to zero to disable - header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. - - The policy keyword specifies a policy object that controls a number of - aspects of the generator's operation. The default policy maintains - backward compatibility. - - """ - self._fp = outfp - self._mangle_from_ = mangle_from_ - self.maxheaderlen = maxheaderlen - self.policy = policy - - def write(self, s): - # Just delegate to the file object - self._fp.write(s) - - def flatten(self, msg, unixfrom=False, linesep=None): - r"""Print the message object tree rooted at msg to the output file - specified when the Generator instance was created. - - unixfrom is a flag that forces the printing of a Unix From_ delimiter - before the first object in the message tree. If the original message - has no From_ delimiter, a `standard' one is crafted. By default, this - is False to inhibit the printing of any From_ delimiter. - - Note that for subobjects, no From_ line is printed. - - linesep specifies the characters used to indicate a new line in - the output. The default value is determined by the policy. - - """ - # We use the _XXX constants for operating on data that comes directly - # from the msg, and _encoded_XXX constants for operating on data that - # has already been converted (to bytes in the BytesGenerator) and - # inserted into a temporary buffer. - policy = msg.policy if self.policy is None else self.policy - if linesep is not None: - policy = policy.clone(linesep=linesep) - if self.maxheaderlen is not None: - policy = policy.clone(max_line_length=self.maxheaderlen) - self._NL = policy.linesep - self._encoded_NL = self._encode(self._NL) - self._EMPTY = '' - self._encoded_EMTPY = self._encode('') - # Because we use clone (below) when we recursively process message - # subparts, and because clone uses the computed policy (not None), - # submessages will automatically get set to the computed policy when - # they are processed by this code. - old_gen_policy = self.policy - old_msg_policy = msg.policy - try: - self.policy = policy - msg.policy = policy - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - self.write(ufrom + self._NL) - self._write(msg) - finally: - self.policy = old_gen_policy - msg.policy = old_msg_policy - - def clone(self, fp): - """Clone this generator with the exact same options.""" - return self.__class__(fp, - self._mangle_from_, - None, # Use policy setting, which we've adjusted - policy=self.policy) - - # - # Protected interface - undocumented ;/ - # - - # Note that we use 'self.write' when what we are writing is coming from - # the source, and self._fp.write when what we are writing is coming from a - # buffer (because the Bytes subclass has already had a chance to transform - # the data in its write method in that case). This is an entirely - # pragmatic split determined by experiment; we could be more general by - # always using write and having the Bytes subclass write method detect when - # it has already transformed the input; but, since this whole thing is a - # hack anyway this seems good enough. - - # Similarly, we have _XXX and _encoded_XXX attributes that are used on - # source and buffer data, respectively. - _encoded_EMPTY = '' - - def _new_buffer(self): - # BytesGenerator overrides this to return BytesIO. - return StringIO() - - def _encode(self, s): - # BytesGenerator overrides this to encode strings to bytes. - return s - - def _write_lines(self, lines): - # We have to transform the line endings. - if not lines: - return - lines = lines.splitlines(True) - for line in lines[:-1]: - self.write(line.rstrip('\r\n')) - self.write(self._NL) - laststripped = lines[-1].rstrip('\r\n') - self.write(laststripped) - if len(lines[-1]) != len(laststripped): - self.write(self._NL) - - def _write(self, msg): - # We can't write the headers yet because of the following scenario: - # say a multipart message includes the boundary string somewhere in - # its body. We'd have to calculate the new boundary /before/ we write - # the headers so that we can write the correct Content-Type: - # parameter. - # - # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the - # headers and the buffer contents. That way, subpart handlers can - # Do The Right Thing, and can still modify the Content-Type: header if - # necessary. - oldfp = self._fp - try: - self._fp = sfp = self._new_buffer() - self._dispatch(msg) - finally: - self._fp = oldfp - # Write the headers. First we see if the message object wants to - # handle that itself. If not, we'll do it generically. - meth = getattr(msg, '_write_headers', None) - if meth is None: - self._write_headers(msg) - else: - meth(self) - self._fp.write(sfp.getvalue()) - - def _dispatch(self, msg): - # Get the Content-Type: for the message, then try to dispatch to - # self._handle_<maintype>_<subtype>(). If there's no handler for the - # full MIME type, then dispatch to self._handle_<maintype>(). If - # that's missing too, then dispatch to self._writeBody(). - main = msg.get_content_maintype() - sub = msg.get_content_subtype() - specific = UNDERSCORE.join((main, sub)).replace('-', '_') - meth = getattr(self, '_handle_' + specific, None) - if meth is None: - generic = main.replace('-', '_') - meth = getattr(self, '_handle_' + generic, None) - if meth is None: - meth = self._writeBody - meth(msg) - - # - # Default handlers - # - - def _write_headers(self, msg): - for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) - # A blank line always separates headers from body - self.write(self._NL) - - # - # Handlers for writing types and subtypes - # - - def _handle_text(self, msg): - payload = msg.get_payload() - if payload is None: - return - if not isinstance(payload, str): - raise TypeError('string payload expected: %s' % type(payload)) - if _has_surrogates(msg._payload): - charset = msg.get_param('charset') - if charset is not None: - del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) - payload = msg.get_payload() - if self._mangle_from_: - payload = fcre.sub('>From ', payload) - self._write_lines(payload) - - # Default body handler - _writeBody = _handle_text - - def _handle_multipart(self, msg): - # The trick here is to write out each part separately, merge them all - # together, and then make sure that the boundary we've chosen isn't - # present in the payload. - msgtexts = [] - subparts = msg.get_payload() - if subparts is None: - subparts = [] - elif isinstance(subparts, str): - # e.g. a non-strict parse of a message with no starting boundary. - self.write(subparts) - return - elif not isinstance(subparts, list): - # Scalar payload - subparts = [subparts] - for part in subparts: - s = self._new_buffer() - g = self.clone(s) - g.flatten(part, unixfrom=False, linesep=self._NL) - msgtexts.append(s.getvalue()) - # BAW: What about boundaries that are wrapped in double-quotes? - boundary = msg.get_boundary() - if not boundary: - # Create a boundary that doesn't appear in any of the - # message texts. - alltext = self._encoded_NL.join(msgtexts) - boundary = self._make_boundary(alltext) - msg.set_boundary(boundary) - # If there's a preamble, write it out, with a trailing CRLF - if msg.preamble is not None: - if self._mangle_from_: - preamble = fcre.sub('>From ', msg.preamble) - else: - preamble = msg.preamble - self._write_lines(preamble) - self.write(self._NL) - # dash-boundary transport-padding CRLF - self.write('--' + boundary + self._NL) - # body-part - if msgtexts: - self._fp.write(msgtexts.pop(0)) - # *encapsulation - # --> delimiter transport-padding - # --> CRLF body-part - for body_part in msgtexts: - # delimiter transport-padding CRLF - self.write(self._NL + '--' + boundary + self._NL) - # body-part - self._fp.write(body_part) - # close-delimiter transport-padding - self.write(self._NL + '--' + boundary + '--') - if msg.epilogue is not None: - self.write(self._NL) - if self._mangle_from_: - epilogue = fcre.sub('>From ', msg.epilogue) - else: - epilogue = msg.epilogue - self._write_lines(epilogue) - - def _handle_multipart_signed(self, msg): - # The contents of signed parts has to stay unmodified in order to keep - # the signature intact per RFC1847 2.1, so we disable header wrapping. - # RDM: This isn't enough to completely preserve the part, but it helps. - p = self.policy - self.policy = p.clone(max_line_length=0) - try: - self._handle_multipart(msg) - finally: - self.policy = p - - def _handle_message_delivery_status(self, msg): - # We can't just write the headers directly to self's file object - # because this will leave an extra newline between the last header - # block and the boundary. Sigh. - blocks = [] - for part in msg.get_payload(): - s = self._new_buffer() - g = self.clone(s) - g.flatten(part, unixfrom=False, linesep=self._NL) - text = s.getvalue() - lines = text.split(self._encoded_NL) - # Strip off the unnecessary trailing empty line - if lines and lines[-1] == self._encoded_EMPTY: - blocks.append(self._encoded_NL.join(lines[:-1])) - else: - blocks.append(text) - # Now join all the blocks with an empty line. This has the lovely - # effect of separating each block with an empty line, but not adding - # an extra one after the last one. - self._fp.write(self._encoded_NL.join(blocks)) - - def _handle_message(self, msg): - s = self._new_buffer() - g = self.clone(s) - # The payload of a message/rfc822 part should be a multipart sequence - # of length 1. The zeroth element of the list should be the Message - # object for the subpart. Extract that object, stringify it, and - # write it out. - # Except, it turns out, when it's a string instead, which happens when - # and only when HeaderParser is used on a message of mime type - # message/rfc822. Such messages are generated by, for example, - # Groupwise when forwarding unadorned messages. (Issue 7970.) So - # in that case we just emit the string body. - payload = msg._payload - if isinstance(payload, list): - g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) - payload = s.getvalue() - else: - payload = self._encode(payload) - self._fp.write(payload) - - # This used to be a module level function; we use a classmethod for this - # and _compile_re so we can continue to provide the module level function - # for backward compatibility by doing - # _make_boudary = Generator._make_boundary - # at the end of the module. It *is* internal, so we could drop that... - @classmethod - def _make_boundary(cls, text=None): - # Craft a random boundary. If text is given, ensure that the chosen - # boundary doesn't appear in the text. - token = random.randrange(sys.maxsize) - boundary = ('=' * 15) + (_fmt % token) + '==' - if text is None: - return boundary - b = boundary - counter = 0 - while True: - cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) - if not cre.search(text): - break - b = boundary + '.' + str(counter) - counter += 1 - return b - - @classmethod - def _compile_re(cls, s, flags): - return re.compile(s, flags) - -class BytesGenerator(Generator): - """Generates a bytes version of a Message object tree. - - Functionally identical to the base Generator except that the output is - bytes and not string. When surrogates were used in the input to encode - bytes, these are decoded back to bytes for output. If the policy has - cte_type set to 7bit, then the message is transformed such that the - non-ASCII bytes are properly content transfer encoded, using the charset - unknown-8bit. - - The outfp object must accept bytes in its write method. - """ - - # Bytes versions of this constant for use in manipulating data from - # the BytesIO buffer. - _encoded_EMPTY = b'' - - def write(self, s): - self._fp.write(str(s).encode('ascii', 'surrogateescape')) - - def _new_buffer(self): - return BytesIO() - - def _encode(self, s): - return s.encode('ascii') - - def _write_headers(self, msg): - # This is almost the same as the string version, except for handling - # strings with 8bit bytes. - for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) - # A blank line always separates headers from body - self.write(self._NL) - - def _handle_text(self, msg): - # If the string has surrogates the original source was bytes, so - # just write it back out. - if msg._payload is None: - return - if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': - if self._mangle_from_: - msg._payload = fcre.sub(">From ", msg._payload) - self._write_lines(msg._payload) - else: - super(BytesGenerator,self)._handle_text(msg) - - # Default body handler - _writeBody = _handle_text - - @classmethod - def _compile_re(cls, s, flags): - return re.compile(s.encode('ascii'), flags) - - -_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' - -class DecodedGenerator(Generator): - """Generates a text representation of a message. - - Like the Generator base class, except that non-text parts are substituted - with a format string representing the part. - """ - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): - """Like Generator.__init__() except that an additional optional - argument is allowed. - - Walks through all subparts of a message. If the subpart is of main - type `text', then it prints the decoded payload of the subpart. - - Otherwise, fmt is a format string that is used instead of the message - payload. fmt is expanded with the following keywords (in - %(keyword)s format): - - type : Full MIME type of the non-text part - maintype : Main MIME type of the non-text part - subtype : Sub-MIME type of the non-text part - filename : Filename of the non-text part - description: Description associated with the non-text part - encoding : Content transfer encoding of the non-text part - - The default value for fmt is None, meaning - - [Non-text (%(type)s) part of message omitted, filename %(filename)s] - """ - Generator.__init__(self, outfp, mangle_from_, maxheaderlen) - if fmt is None: - self._fmt = _FMT - else: - self._fmt = fmt - - def _dispatch(self, msg): - for part in msg.walk(): - maintype = part.get_content_maintype() - if maintype == 'text': - print(part.get_payload(decode=False), file=self) - elif maintype == 'multipart': - # Just skip this - pass - else: - print(self._fmt % { - 'type' : part.get_content_type(), - 'maintype' : part.get_content_maintype(), - 'subtype' : part.get_content_subtype(), - 'filename' : part.get_filename('[no filename]'), - 'description': part.get('Content-Description', - '[no description]'), - 'encoding' : part.get('Content-Transfer-Encoding', - '[no encoding]'), - }, file=self) - - -# Helper used by Generator._make_boundary -_width = len(repr(sys.maxsize-1)) -_fmt = '%%0%dd' % _width - -# Backward compatibility -_make_boundary = Generator._make_boundary diff --git a/contrib/python/future/future/backports/email/header.py b/contrib/python/future/future/backports/email/header.py deleted file mode 100644 index 63bf038c022..00000000000 --- a/contrib/python/future/future/backports/email/header.py +++ /dev/null @@ -1,581 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: [email protected] - -"""Header encoding and decoding functionality.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes, range, str, super, zip - -__all__ = [ - 'Header', - 'decode_header', - 'make_header', - ] - -import re -import binascii - -from future.backports import email -from future.backports.email import base64mime -from future.backports.email.errors import HeaderParseError -import future.backports.email.charset as _charset - -# Helpers -from future.backports.email.quoprimime import _max_append, header_decode - -Charset = _charset.Charset - -NL = '\n' -SPACE = ' ' -BSPACE = b' ' -SPACE8 = ' ' * 8 -EMPTYSTRING = '' -MAXLINELEN = 78 -FWS = ' \t' - -USASCII = Charset('us-ascii') -UTF8 = Charset('utf-8') - -# Match encoded-word strings in the form =?charset?q?Hello_World?= -ecre = re.compile(r''' - =\? # literal =? - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) - -# Field name regexp, including trailing colon, but not separating whitespace, -# according to RFC 2822. Character range is from tilde to exclamation mark. -# For use with .match() -fcre = re.compile(r'[\041-\176]+:$') - -# Find a header embedded in a putative header value. Used to check for -# header injection attack. -_embeded_header = re.compile(r'\n[^ \t]+:') - - -def decode_header(header): - """Decode a message header value without converting charset. - - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. - - header may be a string that may or may not contain RFC2047 encoded words, - or it may be a Header object. - - An email.errors.HeaderParseError may be raised when certain decoding error - occurs (e.g. a base64 decoding exception). - """ - # If it is a Header object, we can just return the encoded chunks. - if hasattr(header, '_chunks'): - return [(_charset._encode(string, str(charset)), str(charset)) - for string, charset in header._chunks] - # If no encoding, just return the header with no charset. - if not ecre.search(header): - return [(header, None)] - # First step is to parse all the encoded parts into triplets of the form - # (encoded_string, encoding, charset). For unencoded strings, the last - # two parts will be None. - words = [] - for line in header.splitlines(): - parts = ecre.split(line) - first = True - while parts: - unencoded = parts.pop(0) - if first: - unencoded = unencoded.lstrip() - first = False - if unencoded: - words.append((unencoded, None, None)) - if parts: - charset = parts.pop(0).lower() - encoding = parts.pop(0).lower() - encoded = parts.pop(0) - words.append((encoded, encoding, charset)) - # Now loop over words and remove words that consist of whitespace - # between two encoded strings. - import sys - droplist = [] - for n, w in enumerate(words): - if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): - droplist.append(n-1) - for d in reversed(droplist): - del words[d] - - # The next step is to decode each encoded word by applying the reverse - # base64 or quopri transformation. decoded_words is now a list of the - # form (decoded_word, charset). - decoded_words = [] - for encoded_string, encoding, charset in words: - if encoding is None: - # This is an unencoded word. - decoded_words.append((encoded_string, charset)) - elif encoding == 'q': - word = header_decode(encoded_string) - decoded_words.append((word, charset)) - elif encoding == 'b': - paderr = len(encoded_string) % 4 # Postel's law: add missing padding - if paderr: - encoded_string += '==='[:4 - paderr] - try: - word = base64mime.decode(encoded_string) - except binascii.Error: - raise HeaderParseError('Base64 decoding error') - else: - decoded_words.append((word, charset)) - else: - raise AssertionError('Unexpected encoding: ' + encoding) - # Now convert all words to bytes and collapse consecutive runs of - # similarly encoded words. - collapsed = [] - last_word = last_charset = None - for word, charset in decoded_words: - if isinstance(word, str): - word = bytes(word, 'raw-unicode-escape') - if last_word is None: - last_word = word - last_charset = charset - elif charset != last_charset: - collapsed.append((last_word, last_charset)) - last_word = word - last_charset = charset - elif last_charset is None: - last_word += BSPACE + word - else: - last_word += word - collapsed.append((last_word, last_charset)) - return collapsed - - -def make_header(decoded_seq, maxlinelen=None, header_name=None, - continuation_ws=' '): - """Create a Header from a sequence of pairs as returned by decode_header() - - decode_header() takes a header value string and returns a sequence of - pairs of the format (decoded_string, charset) where charset is the string - name of the character set. - - This function takes one of those sequence of pairs and returns a Header - instance. Optional maxlinelen, header_name, and continuation_ws are as in - the Header constructor. - """ - h = Header(maxlinelen=maxlinelen, header_name=header_name, - continuation_ws=continuation_ws) - for s, charset in decoded_seq: - # None means us-ascii but we can simply pass it on to h.append() - if charset is not None and not isinstance(charset, Charset): - charset = Charset(charset) - h.append(s, charset) - return h - - -class Header(object): - def __init__(self, s=None, charset=None, - maxlinelen=None, header_name=None, - continuation_ws=' ', errors='strict'): - """Create a MIME-compliant header that can contain many character sets. - - Optional s is the initial header value. If None, the initial header - value is not set. You can later append to the header with .append() - method calls. s may be a byte string or a Unicode string, but see the - .append() documentation for semantics. - - Optional charset serves two purposes: it has the same meaning as the - charset argument to the .append() method. It also sets the default - character set for all subsequent .append() calls that omit the charset - argument. If charset is not provided in the constructor, the us-ascii - charset is used both as s's initial charset and as the default for - subsequent .append() calls. - - The maximum line length can be specified explicitly via maxlinelen. For - splitting the first line to a shorter value (to account for the field - header which isn't included in s, e.g. `Subject') pass in the name of - the field in header_name. The default maxlinelen is 78 as recommended - by RFC 2822. - - continuation_ws must be RFC 2822 compliant folding whitespace (usually - either a space or a hard tab) which will be prepended to continuation - lines. - - errors is passed through to the .append() call. - """ - if charset is None: - charset = USASCII - elif not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - self._continuation_ws = continuation_ws - self._chunks = [] - if s is not None: - self.append(s, charset, errors) - if maxlinelen is None: - maxlinelen = MAXLINELEN - self._maxlinelen = maxlinelen - if header_name is None: - self._headerlen = 0 - else: - # Take the separating colon and space into account. - self._headerlen = len(header_name) + 2 - - def __str__(self): - """Return the string value of the header.""" - self._normalize() - uchunks = [] - lastcs = None - lastspace = None - for string, charset in self._chunks: - # We must preserve spaces between encoded and non-encoded word - # boundaries, which means for us we need to add a space when we go - # from a charset to None/us-ascii, or from None/us-ascii to a - # charset. Only do this for the second and subsequent chunks. - # Don't add a space if the None/us-ascii string already has - # a space (trailing or leading depending on transition) - nextcs = charset - if nextcs == _charset.UNKNOWN8BIT: - original_bytes = string.encode('ascii', 'surrogateescape') - string = original_bytes.decode('ascii', 'replace') - if uchunks: - hasspace = string and self._nonctext(string[0]) - if lastcs not in (None, 'us-ascii'): - if nextcs in (None, 'us-ascii') and not hasspace: - uchunks.append(SPACE) - nextcs = None - elif nextcs not in (None, 'us-ascii') and not lastspace: - uchunks.append(SPACE) - lastspace = string and self._nonctext(string[-1]) - lastcs = nextcs - uchunks.append(string) - return EMPTYSTRING.join(uchunks) - - # Rich comparison operators for equality only. BAW: does it make sense to - # have or explicitly disable <, <=, >, >= operators? - def __eq__(self, other): - # other may be a Header or a string. Both are fine so coerce - # ourselves to a unicode (of the unencoded header value), swap the - # args and do another comparison. - return other == str(self) - - def __ne__(self, other): - return not self == other - - def append(self, s, charset=None, errors='strict'): - """Append a string to the MIME header. - - Optional charset, if given, should be a Charset instance or the name - of a character set (which will be converted to a Charset instance). A - value of None (the default) means that the charset given in the - constructor is used. - - s may be a byte string or a Unicode string. If it is a byte string - (i.e. isinstance(s, str) is false), then charset is the encoding of - that byte string, and a UnicodeError will be raised if the string - cannot be decoded with that charset. If s is a Unicode string, then - charset is a hint specifying the character set of the characters in - the string. In either case, when producing an RFC 2822 compliant - header using RFC 2047 rules, the string will be encoded using the - output codec of the charset. If the string cannot be encoded to the - output codec, a UnicodeError will be raised. - - Optional `errors' is passed as the errors argument to the decode - call if s is a byte string. - """ - if charset is None: - charset = self._charset - elif not isinstance(charset, Charset): - charset = Charset(charset) - if not isinstance(s, str): - input_charset = charset.input_codec or 'us-ascii' - if input_charset == _charset.UNKNOWN8BIT: - s = s.decode('us-ascii', 'surrogateescape') - else: - s = s.decode(input_charset, errors) - # Ensure that the bytes we're storing can be decoded to the output - # character set, otherwise an early error is raised. - output_charset = charset.output_codec or 'us-ascii' - if output_charset != _charset.UNKNOWN8BIT: - try: - s.encode(output_charset, errors) - except UnicodeEncodeError: - if output_charset!='us-ascii': - raise - charset = UTF8 - self._chunks.append((s, charset)) - - def _nonctext(self, s): - """True if string s is not a ctext character of RFC822. - """ - return s.isspace() or s in ('(', ')', '\\') - - def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): - r"""Encode a message header into an RFC-compliant format. - - There are many issues involved in converting a given string for use in - an email header. Only certain character sets are readable in most - email clients, and as header strings can only contain a subset of - 7-bit ASCII, care must be taken to properly convert and encode (with - Base64 or quoted-printable) header strings. In addition, there is a - 75-character length limit on any given encoded header field, so - line-wrapping must be performed, even with double-byte character sets. - - Optional maxlinelen specifies the maximum length of each generated - line, exclusive of the linesep string. Individual lines may be longer - than maxlinelen if a folding point cannot be found. The first line - will be shorter by the length of the header name plus ": " if a header - name was specified at Header construction time. The default value for - maxlinelen is determined at header construction time. - - Optional splitchars is a string containing characters which should be - given extra weight by the splitting algorithm during normal header - wrapping. This is in very rough support of RFC 2822's `higher level - syntactic breaks': split points preceded by a splitchar are preferred - during line splitting, with the characters preferred in the order in - which they appear in the string. Space and tab may be included in the - string to indicate whether preference should be given to one over the - other as a split point when other split chars do not appear in the line - being split. Splitchars does not affect RFC 2047 encoded lines. - - Optional linesep is a string to be used to separate the lines of - the value. The default value is the most useful for typical - Python applications, but it can be set to \r\n to produce RFC-compliant - line separators when needed. - """ - self._normalize() - if maxlinelen is None: - maxlinelen = self._maxlinelen - # A maxlinelen of 0 means don't wrap. For all practical purposes, - # choosing a huge number here accomplishes that and makes the - # _ValueFormatter algorithm much simpler. - if maxlinelen == 0: - maxlinelen = 1000000 - formatter = _ValueFormatter(self._headerlen, maxlinelen, - self._continuation_ws, splitchars) - lastcs = None - hasspace = lastspace = None - for string, charset in self._chunks: - if hasspace is not None: - hasspace = string and self._nonctext(string[0]) - import sys - if lastcs not in (None, 'us-ascii'): - if not hasspace or charset not in (None, 'us-ascii'): - formatter.add_transition() - elif charset not in (None, 'us-ascii') and not lastspace: - formatter.add_transition() - lastspace = string and self._nonctext(string[-1]) - lastcs = charset - hasspace = False - lines = string.splitlines() - if lines: - formatter.feed('', lines[0], charset) - else: - formatter.feed('', '', charset) - for line in lines[1:]: - formatter.newline() - if charset.header_encoding is not None: - formatter.feed(self._continuation_ws, ' ' + line.lstrip(), - charset) - else: - sline = line.lstrip() - fws = line[:len(line)-len(sline)] - formatter.feed(fws, sline, charset) - if len(lines) > 1: - formatter.newline() - if self._chunks: - formatter.add_transition() - value = formatter._str(linesep) - if _embeded_header.search(value): - raise HeaderParseError("header value appears to contain " - "an embedded header: {!r}".format(value)) - return value - - def _normalize(self): - # Step 1: Normalize the chunks so that all runs of identical charsets - # get collapsed into a single unicode string. - chunks = [] - last_charset = None - last_chunk = [] - for string, charset in self._chunks: - if charset == last_charset: - last_chunk.append(string) - else: - if last_charset is not None: - chunks.append((SPACE.join(last_chunk), last_charset)) - last_chunk = [string] - last_charset = charset - if last_chunk: - chunks.append((SPACE.join(last_chunk), last_charset)) - self._chunks = chunks - - -class _ValueFormatter(object): - def __init__(self, headerlen, maxlen, continuation_ws, splitchars): - self._maxlen = maxlen - self._continuation_ws = continuation_ws - self._continuation_ws_len = len(continuation_ws) - self._splitchars = splitchars - self._lines = [] - self._current_line = _Accumulator(headerlen) - - def _str(self, linesep): - self.newline() - return linesep.join(self._lines) - - def __str__(self): - return self._str(NL) - - def newline(self): - end_of_line = self._current_line.pop() - if end_of_line != (' ', ''): - self._current_line.push(*end_of_line) - if len(self._current_line) > 0: - if self._current_line.is_onlyws(): - self._lines[-1] += str(self._current_line) - else: - self._lines.append(str(self._current_line)) - self._current_line.reset() - - def add_transition(self): - self._current_line.push(' ', '') - - def feed(self, fws, string, charset): - # If the charset has no header encoding (i.e. it is an ASCII encoding) - # then we must split the header at the "highest level syntactic break" - # possible. Note that we don't have a lot of smarts about field - # syntax; we just try to break on semi-colons, then commas, then - # whitespace. Eventually, this should be pluggable. - if charset.header_encoding is None: - self._ascii_split(fws, string, self._splitchars) - return - # Otherwise, we're doing either a Base64 or a quoted-printable - # encoding which means we don't need to split the line on syntactic - # breaks. We can basically just find enough characters to fit on the - # current line, minus the RFC 2047 chrome. What makes this trickier - # though is that we have to split at octet boundaries, not character - # boundaries but it's only safe to split at character boundaries so at - # best we can only get close. - encoded_lines = charset.header_encode_lines(string, self._maxlengths()) - # The first element extends the current line, but if it's None then - # nothing more fit on the current line so start a new line. - try: - first_line = encoded_lines.pop(0) - except IndexError: - # There are no encoded lines, so we're done. - return - if first_line is not None: - self._append_chunk(fws, first_line) - try: - last_line = encoded_lines.pop() - except IndexError: - # There was only one line. - return - self.newline() - self._current_line.push(self._continuation_ws, last_line) - # Everything else are full lines in themselves. - for line in encoded_lines: - self._lines.append(self._continuation_ws + line) - - def _maxlengths(self): - # The first line's length. - yield self._maxlen - len(self._current_line) - while True: - yield self._maxlen - self._continuation_ws_len - - def _ascii_split(self, fws, string, splitchars): - # The RFC 2822 header folding algorithm is simple in principle but - # complex in practice. Lines may be folded any place where "folding - # white space" appears by inserting a linesep character in front of the - # FWS. The complication is that not all spaces or tabs qualify as FWS, - # and we are also supposed to prefer to break at "higher level - # syntactic breaks". We can't do either of these without intimate - # knowledge of the structure of structured headers, which we don't have - # here. So the best we can do here is prefer to break at the specified - # splitchars, and hope that we don't choose any spaces or tabs that - # aren't legal FWS. (This is at least better than the old algorithm, - # where we would sometimes *introduce* FWS after a splitchar, or the - # algorithm before that, where we would turn all white space runs into - # single spaces or tabs.) - parts = re.split("(["+FWS+"]+)", fws+string) - if parts[0]: - parts[:0] = [''] - else: - parts.pop(0) - for fws, part in zip(*[iter(parts)]*2): - self._append_chunk(fws, part) - - def _append_chunk(self, fws, string): - self._current_line.push(fws, string) - if len(self._current_line) > self._maxlen: - # Find the best split point, working backward from the end. - # There might be none, on a long first line. - for ch in self._splitchars: - for i in range(self._current_line.part_count()-1, 0, -1): - if ch.isspace(): - fws = self._current_line[i][0] - if fws and fws[0]==ch: - break - prevpart = self._current_line[i-1][1] - if prevpart and prevpart[-1]==ch: - break - else: - continue - break - else: - fws, part = self._current_line.pop() - if self._current_line._initial_size > 0: - # There will be a header, so leave it on a line by itself. - self.newline() - if not fws: - # We don't use continuation_ws here because the whitespace - # after a header should always be a space. - fws = ' ' - self._current_line.push(fws, part) - return - remainder = self._current_line.pop_from(i) - self._lines.append(str(self._current_line)) - self._current_line.reset(remainder) - - -class _Accumulator(list): - - def __init__(self, initial_size=0): - self._initial_size = initial_size - super().__init__() - - def push(self, fws, string): - self.append((fws, string)) - - def pop_from(self, i=0): - popped = self[i:] - self[i:] = [] - return popped - - def pop(self): - if self.part_count()==0: - return ('', '') - return super().pop() - - def __len__(self): - return sum((len(fws)+len(part) for fws, part in self), - self._initial_size) - - def __str__(self): - return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) - for fws, part in self)) - - def reset(self, startval=None): - if startval is None: - startval = [] - self[:] = startval - self._initial_size = 0 - - def is_onlyws(self): - return self._initial_size==0 and (not self or str(self).isspace()) - - def part_count(self): - return super().__len__() diff --git a/contrib/python/future/future/backports/email/headerregistry.py b/contrib/python/future/future/backports/email/headerregistry.py deleted file mode 100644 index 9aaad65a149..00000000000 --- a/contrib/python/future/future/backports/email/headerregistry.py +++ /dev/null @@ -1,592 +0,0 @@ -"""Representing and manipulating email headers via custom objects. - -This module provides an implementation of the HeaderRegistry API. -The implementation is designed to flexibly follow RFC5322 rules. - -Eventually HeaderRegistry will be a public API, but it isn't yet, -and will probably change some before that happens. - -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -from future.builtins import super -from future.builtins import str -from future.utils import text_to_native_str -from future.backports.email import utils -from future.backports.email import errors -from future.backports.email import _header_value_parser as parser - -class Address(object): - - def __init__(self, display_name='', username='', domain='', addr_spec=None): - """Create an object represeting a full email address. - - An address can have a 'display_name', a 'username', and a 'domain'. In - addition to specifying the username and domain separately, they may be - specified together by using the addr_spec keyword *instead of* the - username and domain keywords. If an addr_spec string is specified it - must be properly quoted according to RFC 5322 rules; an error will be - raised if it is not. - - An Address object has display_name, username, domain, and addr_spec - attributes, all of which are read-only. The addr_spec and the string - value of the object are both quoted according to RFC5322 rules, but - without any Content Transfer Encoding. - - """ - # This clause with its potential 'raise' may only happen when an - # application program creates an Address object using an addr_spec - # keyword. The email library code itself must always supply username - # and domain. - if addr_spec is not None: - if username or domain: - raise TypeError("addrspec specified when username and/or " - "domain also specified") - a_s, rest = parser.get_addr_spec(addr_spec) - if rest: - raise ValueError("Invalid addr_spec; only '{}' " - "could be parsed from '{}'".format( - a_s, addr_spec)) - if a_s.all_defects: - raise a_s.all_defects[0] - username = a_s.local_part - domain = a_s.domain - self._display_name = display_name - self._username = username - self._domain = domain - - @property - def display_name(self): - return self._display_name - - @property - def username(self): - return self._username - - @property - def domain(self): - return self._domain - - @property - def addr_spec(self): - """The addr_spec (username@domain) portion of the address, quoted - according to RFC 5322 rules, but with no Content Transfer Encoding. - """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username - if self.domain: - return lp + '@' + self.domain - if not lp: - return '<>' - return lp - - def __repr__(self): - return "Address(display_name={!r}, username={!r}, domain={!r})".format( - self.display_name, self.username, self.domain) - - def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name - if disp: - addr_spec = '' if self.addr_spec=='<>' else self.addr_spec - return "{} <{}>".format(disp, addr_spec) - return self.addr_spec - - def __eq__(self, other): - if type(other) != type(self): - return False - return (self.display_name == other.display_name and - self.username == other.username and - self.domain == other.domain) - - -class Group(object): - - def __init__(self, display_name=None, addresses=None): - """Create an object representing an address group. - - An address group consists of a display_name followed by colon and an - list of addresses (see Address) terminated by a semi-colon. The Group - is created by specifying a display_name and a possibly empty list of - Address objects. A Group can also be used to represent a single - address that is not in a group, which is convenient when manipulating - lists that are a combination of Groups and individual Addresses. In - this case the display_name should be set to None. In particular, the - string representation of a Group whose display_name is None is the same - as the Address object, if there is one and only one Address object in - the addresses list. - - """ - self._display_name = display_name - self._addresses = tuple(addresses) if addresses else tuple() - - @property - def display_name(self): - return self._display_name - - @property - def addresses(self): - return self._addresses - - def __repr__(self): - return "Group(display_name={!r}, addresses={!r}".format( - self.display_name, self.addresses) - - def __str__(self): - if self.display_name is None and len(self.addresses)==1: - return str(self.addresses[0]) - disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) - adrstr = ", ".join(str(x) for x in self.addresses) - adrstr = ' ' + adrstr if adrstr else adrstr - return "{}:{};".format(disp, adrstr) - - def __eq__(self, other): - if type(other) != type(self): - return False - return (self.display_name == other.display_name and - self.addresses == other.addresses) - - -# Header Classes # - -class BaseHeader(str): - - """Base class for message headers. - - Implements generic behavior and provides tools for subclasses. - - A subclass must define a classmethod named 'parse' that takes an unfolded - value string and a dictionary as its arguments. The dictionary will - contain one key, 'defects', initialized to an empty list. After the call - the dictionary must contain two additional keys: parse_tree, set to the - parse tree obtained from parsing the header, and 'decoded', set to the - string value of the idealized representation of the data from the value. - (That is, encoded words are decoded, and values that have canonical - representations are so represented.) - - The defects key is intended to collect parsing defects, which the message - parser will subsequently dispose of as appropriate. The parser should not, - insofar as practical, raise any errors. Defects should be added to the - list instead. The standard header parsers register defects for RFC - compliance issues, for obsolete RFC syntax, and for unrecoverable parsing - errors. - - The parse method may add additional keys to the dictionary. In this case - the subclass must define an 'init' method, which will be passed the - dictionary as its keyword arguments. The method should use (usually by - setting them as the value of similarly named attributes) and remove all the - extra keys added by its parse method, and then use super to call its parent - class with the remaining arguments and keywords. - - The subclass should also make sure that a 'max_count' attribute is defined - that is either None or 1. XXX: need to better define this API. - - """ - - def __new__(cls, name, value): - kwds = {'defects': []} - cls.parse(value, kwds) - if utils._has_surrogates(kwds['decoded']): - kwds['decoded'] = utils._sanitize(kwds['decoded']) - self = str.__new__(cls, kwds['decoded']) - # del kwds['decoded'] - self.init(name, **kwds) - return self - - def init(self, name, **_3to2kwargs): - defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] - parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] - self._name = name - self._parse_tree = parse_tree - self._defects = defects - - @property - def name(self): - return self._name - - @property - def defects(self): - return tuple(self._defects) - - def __reduce__(self): - return ( - _reconstruct_header, - ( - self.__class__.__name__, - self.__class__.__bases__, - str(self), - ), - self.__dict__) - - @classmethod - def _reconstruct(cls, value): - return str.__new__(cls, value) - - def fold(self, **_3to2kwargs): - policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - """Fold header according to policy. - - The parsed representation of the header is folded according to - RFC5322 rules, as modified by the policy. If the parse tree - contains surrogateescaped bytes, the bytes are CTE encoded using - the charset 'unknown-8bit". - - Any non-ASCII characters in the parse tree are CTE encoded using - charset utf-8. XXX: make this a policy setting. - - The returned value is an ASCII-only string possibly containing linesep - characters, and ending with a linesep character. The string includes - the header name and the ': ' separator. - - """ - # At some point we need to only put fws here if it was in the source. - header = parser.Header([ - parser.HeaderLabel([ - parser.ValueTerminal(self.name, 'header-name'), - parser.ValueTerminal(':', 'header-sep')]), - parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), - self._parse_tree]) - return header.fold(policy=policy) - - -def _reconstruct_header(cls_name, bases, value): - return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) - - -class UnstructuredHeader(object): - - max_count = None - value_parser = staticmethod(parser.get_unstructured) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = cls.value_parser(value) - kwds['decoded'] = str(kwds['parse_tree']) - - -class UniqueUnstructuredHeader(UnstructuredHeader): - - max_count = 1 - - -class DateHeader(object): - - """Header whose value consists of a single timestamp. - - Provides an additional attribute, datetime, which is either an aware - datetime using a timezone, or a naive datetime if the timezone - in the input string is -0000. Also accepts a datetime as input. - The 'value' attribute is the normalized form of the timestamp, - which means it is the output of format_datetime on the datetime. - """ - - max_count = None - - # This is used only for folding, not for creating 'decoded'. - value_parser = staticmethod(parser.get_unstructured) - - @classmethod - def parse(cls, value, kwds): - if not value: - kwds['defects'].append(errors.HeaderMissingRequiredValue()) - kwds['datetime'] = None - kwds['decoded'] = '' - kwds['parse_tree'] = parser.TokenList() - return - if isinstance(value, str): - value = utils.parsedate_to_datetime(value) - kwds['datetime'] = value - kwds['decoded'] = utils.format_datetime(kwds['datetime']) - kwds['parse_tree'] = cls.value_parser(kwds['decoded']) - - def init(self, *args, **kw): - self._datetime = kw.pop('datetime') - super().init(*args, **kw) - - @property - def datetime(self): - return self._datetime - - -class UniqueDateHeader(DateHeader): - - max_count = 1 - - -class AddressHeader(object): - - max_count = None - - @staticmethod - def value_parser(value): - address_list, value = parser.get_address_list(value) - assert not value, 'this should not happen' - return address_list - - @classmethod - def parse(cls, value, kwds): - if isinstance(value, str): - # We are translating here from the RFC language (address/mailbox) - # to our API language (group/address). - kwds['parse_tree'] = address_list = cls.value_parser(value) - groups = [] - for addr in address_list.addresses: - groups.append(Group(addr.display_name, - [Address(mb.display_name or '', - mb.local_part or '', - mb.domain or '') - for mb in addr.all_mailboxes])) - defects = list(address_list.all_defects) - else: - # Assume it is Address/Group stuff - if not hasattr(value, '__iter__'): - value = [value] - groups = [Group(None, [item]) if not hasattr(item, 'addresses') - else item - for item in value] - defects = [] - kwds['groups'] = groups - kwds['defects'] = defects - kwds['decoded'] = ', '.join([str(item) for item in groups]) - if 'parse_tree' not in kwds: - kwds['parse_tree'] = cls.value_parser(kwds['decoded']) - - def init(self, *args, **kw): - self._groups = tuple(kw.pop('groups')) - self._addresses = None - super().init(*args, **kw) - - @property - def groups(self): - return self._groups - - @property - def addresses(self): - if self._addresses is None: - self._addresses = tuple([address for group in self._groups - for address in group.addresses]) - return self._addresses - - -class UniqueAddressHeader(AddressHeader): - - max_count = 1 - - -class SingleAddressHeader(AddressHeader): - - @property - def address(self): - if len(self.addresses)!=1: - raise ValueError(("value of single address header {} is not " - "a single address").format(self.name)) - return self.addresses[0] - - -class UniqueSingleAddressHeader(SingleAddressHeader): - - max_count = 1 - - -class MIMEVersionHeader(object): - - max_count = 1 - - value_parser = staticmethod(parser.parse_mime_version) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - kwds['major'] = None if parse_tree.minor is None else parse_tree.major - kwds['minor'] = parse_tree.minor - if parse_tree.minor is not None: - kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) - else: - kwds['version'] = None - - def init(self, *args, **kw): - self._version = kw.pop('version') - self._major = kw.pop('major') - self._minor = kw.pop('minor') - super().init(*args, **kw) - - @property - def major(self): - return self._major - - @property - def minor(self): - return self._minor - - @property - def version(self): - return self._version - - -class ParameterizedMIMEHeader(object): - - # Mixin that handles the params dict. Must be subclassed and - # a property value_parser for the specific header provided. - - max_count = 1 - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - if parse_tree.params is None: - kwds['params'] = {} - else: - # The MIME RFCs specify that parameter ordering is arbitrary. - kwds['params'] = dict((utils._sanitize(name).lower(), - utils._sanitize(value)) - for name, value in parse_tree.params) - - def init(self, *args, **kw): - self._params = kw.pop('params') - super().init(*args, **kw) - - @property - def params(self): - return self._params.copy() - - -class ContentTypeHeader(ParameterizedMIMEHeader): - - value_parser = staticmethod(parser.parse_content_type_header) - - def init(self, *args, **kw): - super().init(*args, **kw) - self._maintype = utils._sanitize(self._parse_tree.maintype) - self._subtype = utils._sanitize(self._parse_tree.subtype) - - @property - def maintype(self): - return self._maintype - - @property - def subtype(self): - return self._subtype - - @property - def content_type(self): - return self.maintype + '/' + self.subtype - - -class ContentDispositionHeader(ParameterizedMIMEHeader): - - value_parser = staticmethod(parser.parse_content_disposition_header) - - def init(self, *args, **kw): - super().init(*args, **kw) - cd = self._parse_tree.content_disposition - self._content_disposition = cd if cd is None else utils._sanitize(cd) - - @property - def content_disposition(self): - return self._content_disposition - - -class ContentTransferEncodingHeader(object): - - max_count = 1 - - value_parser = staticmethod(parser.parse_content_transfer_encoding_header) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - - def init(self, *args, **kw): - super().init(*args, **kw) - self._cte = utils._sanitize(self._parse_tree.cte) - - @property - def cte(self): - return self._cte - - -# The header factory # - -_default_header_map = { - 'subject': UniqueUnstructuredHeader, - 'date': UniqueDateHeader, - 'resent-date': DateHeader, - 'orig-date': UniqueDateHeader, - 'sender': UniqueSingleAddressHeader, - 'resent-sender': SingleAddressHeader, - 'to': UniqueAddressHeader, - 'resent-to': AddressHeader, - 'cc': UniqueAddressHeader, - 'resent-cc': AddressHeader, - 'bcc': UniqueAddressHeader, - 'resent-bcc': AddressHeader, - 'from': UniqueAddressHeader, - 'resent-from': AddressHeader, - 'reply-to': UniqueAddressHeader, - 'mime-version': MIMEVersionHeader, - 'content-type': ContentTypeHeader, - 'content-disposition': ContentDispositionHeader, - 'content-transfer-encoding': ContentTransferEncodingHeader, - } - -class HeaderRegistry(object): - - """A header_factory and header registry.""" - - def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, - use_default_map=True): - """Create a header_factory that works with the Policy API. - - base_class is the class that will be the last class in the created - header class's __bases__ list. default_class is the class that will be - used if "name" (see __call__) does not appear in the registry. - use_default_map controls whether or not the default mapping of names to - specialized classes is copied in to the registry when the factory is - created. The default is True. - - """ - self.registry = {} - self.base_class = base_class - self.default_class = default_class - if use_default_map: - self.registry.update(_default_header_map) - - def map_to_type(self, name, cls): - """Register cls as the specialized class for handling "name" headers. - - """ - self.registry[name.lower()] = cls - - def __getitem__(self, name): - cls = self.registry.get(name.lower(), self.default_class) - return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {}) - - def __call__(self, name, value): - """Create a header instance for header 'name' from 'value'. - - Creates a header instance by creating a specialized class for parsing - and representing the specified header by combining the factory - base_class with a specialized class from the registry or the - default_class, and passing the name and value to the constructed - class's constructor. - - """ - return self[name](name, value) diff --git a/contrib/python/future/future/backports/email/iterators.py b/contrib/python/future/future/backports/email/iterators.py deleted file mode 100644 index 82d320f8149..00000000000 --- a/contrib/python/future/future/backports/email/iterators.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Various types of useful iterators and generators.""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = [ - 'body_line_iterator', - 'typed_subpart_iterator', - 'walk', - # Do not include _structure() since it's part of the debugging API. - ] - -import sys -from io import StringIO - - -# This function will become a method of the Message class -def walk(self): - """Walk over the message tree, yielding each subpart. - - The walk is performed in depth-first order. This method is a - generator. - """ - yield self - if self.is_multipart(): - for subpart in self.get_payload(): - for subsubpart in subpart.walk(): - yield subsubpart - - -# These two functions are imported into the Iterators.py interface module. -def body_line_iterator(msg, decode=False): - """Iterate over the parts, returning string payloads line-by-line. - - Optional decode (default False) is passed through to .get_payload(). - """ - for subpart in msg.walk(): - payload = subpart.get_payload(decode=decode) - if isinstance(payload, str): - for line in StringIO(payload): - yield line - - -def typed_subpart_iterator(msg, maintype='text', subtype=None): - """Iterate over the subparts with a given MIME type. - - Use `maintype' as the main MIME type to match against; this defaults to - "text". Optional `subtype' is the MIME subtype to match against; if - omitted, only the main type is matched. - """ - for subpart in msg.walk(): - if subpart.get_content_maintype() == maintype: - if subtype is None or subpart.get_content_subtype() == subtype: - yield subpart - - -def _structure(msg, fp=None, level=0, include_default=False): - """A handy debugging aid""" - if fp is None: - fp = sys.stdout - tab = ' ' * (level * 4) - print(tab + msg.get_content_type(), end='', file=fp) - if include_default: - print(' [%s]' % msg.get_default_type(), file=fp) - else: - print(file=fp) - if msg.is_multipart(): - for subpart in msg.get_payload(): - _structure(subpart, fp, level+1, include_default) diff --git a/contrib/python/future/future/backports/email/message.py b/contrib/python/future/future/backports/email/message.py deleted file mode 100644 index d8d9615d7d7..00000000000 --- a/contrib/python/future/future/backports/email/message.py +++ /dev/null @@ -1,882 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Basic message object for the email package object model.""" -from __future__ import absolute_import, division, unicode_literals -from future.builtins import list, range, str, zip - -__all__ = ['Message'] - -import re -import uu -import base64 -import binascii -from io import BytesIO, StringIO - -# Intrapackage imports -from future.utils import as_native_str -from future.backports.email import utils -from future.backports.email import errors -from future.backports.email._policybase import compat32 -from future.backports.email import charset as _charset -from future.backports.email._encoded_words import decode_b -Charset = _charset.Charset - -SEMISPACE = '; ' - -# Regular expression that matches `special' characters in parameters, the -# existence of which force quoting of the parameter value. -tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - - -def _splitparam(param): - # Split header parameters. BAW: this may be too simple. It isn't - # strictly RFC 2045 (section 5.1) compliant, but it catches most headers - # found in the wild. We may eventually need a full fledged parser. - # RDM: we might have a Header here; for now just stringify it. - a, sep, b = str(param).partition(';') - if not sep: - return a.strip(), None - return a.strip(), b.strip() - -def _formatparam(param, value=None, quote=True): - """Convenience function to format and return a key=value pair. - - This will quote the value if needed or if quote is true. If value is a - three tuple (charset, language, value), it will be encoded according - to RFC2231 rules. If it contains non-ascii characters it will likewise - be encoded according to RFC2231 rules, using the utf-8 charset and - a null language. - """ - if value is not None and len(value) > 0: - # A tuple is used for RFC 2231 encoded parameter values where items - # are (charset, language, value). charset is a string, not a Charset - # instance. RFC 2231 encoded values are never quoted, per RFC. - if isinstance(value, tuple): - # Encode as per RFC 2231 - param += '*' - value = utils.encode_rfc2231(value[2], value[0], value[1]) - return '%s=%s' % (param, value) - else: - try: - value.encode('ascii') - except UnicodeEncodeError: - param += '*' - value = utils.encode_rfc2231(value, 'utf-8', '') - return '%s=%s' % (param, value) - # BAW: Please check this. I think that if quote is set it should - # force quoting even if not necessary. - if quote or tspecials.search(value): - return '%s="%s"' % (param, utils.quote(value)) - else: - return '%s=%s' % (param, value) - else: - return param - -def _parseparam(s): - # RDM This might be a Header, so for now stringify it. - s = ';' + str(s) - plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() - plist.append(f.strip()) - s = s[end:] - return plist - - -def _unquotevalue(value): - # This is different than utils.collapse_rfc2231_value() because it doesn't - # try to convert the value to a unicode. Message.get_param() and - # Message.get_params() are both currently defined to return the tuple in - # the face of RFC 2231 parameters. - if isinstance(value, tuple): - return value[0], value[1], utils.unquote(value[2]) - else: - return utils.unquote(value) - - -class Message(object): - """Basic message object. - - A message object is defined as something that has a bunch of RFC 2822 - headers and a payload. It may optionally have an envelope header - (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a - multipart or a message/rfc822), then the payload is a list of Message - objects, otherwise it is a string. - - Message objects implement part of the `mapping' interface, which assumes - there is exactly one occurrence of the header per message. Some headers - do in fact appear multiple times (e.g. Received) and for those headers, - you must use the explicit API to set or get all the headers. Not all of - the mapping methods are implemented. - """ - def __init__(self, policy=compat32): - self.policy = policy - self._headers = list() - self._unixfrom = None - self._payload = None - self._charset = None - # Defaults for multipart messages - self.preamble = self.epilogue = None - self.defects = [] - # Default content type - self._default_type = 'text/plain' - - @as_native_str(encoding='utf-8') - def __str__(self): - """Return the entire formatted message as a string. - This includes the headers, body, and envelope header. - """ - return self.as_string() - - def as_string(self, unixfrom=False, maxheaderlen=0): - """Return the entire formatted message as a (unicode) string. - Optional `unixfrom' when True, means include the Unix From_ envelope - header. - - This is a convenience method and may not generate the message exactly - as you intend. For more flexibility, use the flatten() method of a - Generator instance. - """ - from future.backports.email.generator import Generator - fp = StringIO() - g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) - g.flatten(self, unixfrom=unixfrom) - return fp.getvalue() - - def is_multipart(self): - """Return True if the message consists of multiple parts.""" - return isinstance(self._payload, list) - - # - # Unix From_ line - # - def set_unixfrom(self, unixfrom): - self._unixfrom = unixfrom - - def get_unixfrom(self): - return self._unixfrom - - # - # Payload manipulation. - # - def attach(self, payload): - """Add the given payload to the current payload. - - The current payload will always be a list of objects after this method - is called. If you want to set the payload to a scalar object, use - set_payload() instead. - """ - if self._payload is None: - self._payload = [payload] - else: - self._payload.append(payload) - - def get_payload(self, i=None, decode=False): - """Return a reference to the payload. - - The payload will either be a list object or a string. If you mutate - the list object, you modify the message's payload in place. Optional - i returns that index into the payload. - - Optional decode is a flag indicating whether the payload should be - decoded or not, according to the Content-Transfer-Encoding header - (default is False). - - When True and the message is not a multipart, the payload will be - decoded if this header's value is `quoted-printable' or `base64'. If - some other encoding is used, or the header is missing, or if the - payload has bogus data (i.e. bogus base64 or uuencoded data), the - payload is returned as-is. - - If the message is a multipart and the decode flag is True, then None - is returned. - """ - # Here is the logic table for this code, based on the email5.0.0 code: - # i decode is_multipart result - # ------ ------ ------------ ------------------------------ - # None True True None - # i True True None - # None False True _payload (a list) - # i False True _payload element i (a Message) - # i False False error (not a list) - # i True False error (not a list) - # None False False _payload - # None True False _payload decoded (bytes) - # Note that Barry planned to factor out the 'decode' case, but that - # isn't so easy now that we handle the 8 bit data, which needs to be - # converted in both the decode and non-decode path. - if self.is_multipart(): - if decode: - return None - if i is None: - return self._payload - else: - return self._payload[i] - # For backward compatibility, Use isinstance and this error message - # instead of the more logical is_multipart test. - if i is not None and not isinstance(self._payload, list): - raise TypeError('Expected list, got %s' % type(self._payload)) - payload = self._payload - # cte might be a Header, so for now stringify it. - cte = str(self.get('content-transfer-encoding', '')).lower() - # payload may be bytes here. - if isinstance(payload, str): - payload = str(payload) # for Python-Future, so surrogateescape works - if utils._has_surrogates(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: - try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') - except LookupError: - payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII codepoints in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: - return payload - if cte == 'quoted-printable': - return utils._qdecode(bpayload) - elif cte == 'base64': - # XXX: this is a bit of a hack; decode_b should probably be factored - # out somewhere, but I haven't figured out where yet. - value, defects = decode_b(b''.join(bpayload.splitlines())) - for defect in defects: - self.policy.handle_defect(self, defect) - return value - elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - in_file = BytesIO(bpayload) - out_file = BytesIO() - try: - uu.decode(in_file, out_file, quiet=True) - return out_file.getvalue() - except uu.Error: - # Some decoding problem - return bpayload - if isinstance(payload, str): - return bpayload - return payload - - def set_payload(self, payload, charset=None): - """Set the payload to the given value. - - Optional charset sets the message's default character set. See - set_charset() for details. - """ - self._payload = payload - if charset is not None: - self.set_charset(charset) - - def set_charset(self, charset): - """Set the charset of the payload to a given character set. - - charset can be a Charset instance, a string naming a character set, or - None. If it is a string it will be converted to a Charset instance. - If charset is None, the charset parameter will be removed from the - Content-Type field. Anything else will generate a TypeError. - - The message will be assumed to be of type text/* encoded with - charset.input_charset. It will be converted to charset.output_charset - and encoded properly, if needed, when generating the plain text - representation of the message. MIME headers (MIME-Version, - Content-Type, Content-Transfer-Encoding) will be added as needed. - """ - if charset is None: - self.del_param('charset') - self._charset = None - return - if not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - if 'MIME-Version' not in self: - self.add_header('MIME-Version', '1.0') - if 'Content-Type' not in self: - self.add_header('Content-Type', 'text/plain', - charset=charset.get_output_charset()) - else: - self.set_param('charset', charset.get_output_charset()) - if charset != charset.get_output_charset(): - self._payload = charset.body_encode(self._payload) - if 'Content-Transfer-Encoding' not in self: - cte = charset.get_body_encoding() - try: - cte(self) - except TypeError: - self._payload = charset.body_encode(self._payload) - self.add_header('Content-Transfer-Encoding', cte) - - def get_charset(self): - """Return the Charset instance associated with the message's payload. - """ - return self._charset - - # - # MAPPING INTERFACE (partial) - # - def __len__(self): - """Return the total number of headers, including duplicates.""" - return len(self._headers) - - def __getitem__(self, name): - """Get a header value. - - Return None if the header is missing instead of raising an exception. - - Note that if the header appeared multiple times, exactly which - occurrence gets returned is undefined. Use get_all() to get all - the values matching a header field name. - """ - return self.get(name) - - def __setitem__(self, name, val): - """Set the value of a header. - - Note: this does not overwrite an existing header with the same field - name. Use __delitem__() first to delete any existing headers. - """ - max_count = self.policy.header_max_count(name) - if max_count: - lname = name.lower() - found = 0 - for k, v in self._headers: - if k.lower() == lname: - found += 1 - if found >= max_count: - raise ValueError("There may be at most {} {} headers " - "in a message".format(max_count, name)) - self._headers.append(self.policy.header_store_parse(name, val)) - - def __delitem__(self, name): - """Delete all occurrences of a header, if present. - - Does not raise an exception if the header is missing. - """ - name = name.lower() - newheaders = list() - for k, v in self._headers: - if k.lower() != name: - newheaders.append((k, v)) - self._headers = newheaders - - def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] - - def __iter__(self): - for field, value in self._headers: - yield field - - def keys(self): - """Return a list of all the message's header field names. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [k for k, v in self._headers] - - def values(self): - """Return a list of all the message's header values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [self.policy.header_fetch_parse(k, v) - for k, v in self._headers] - - def items(self): - """Get all the message's header fields and values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [(k, self.policy.header_fetch_parse(k, v)) - for k, v in self._headers] - - def get(self, name, failobj=None): - """Get a header value. - - Like __getitem__() but return failobj instead of None when the field - is missing. - """ - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - return self.policy.header_fetch_parse(k, v) - return failobj - - # - # "Internal" methods (public API, but only intended for use by a parser - # or generator, not normal application code. - # - - def set_raw(self, name, value): - """Store name and value in the model without modification. - - This is an "internal" API, intended only for use by a parser. - """ - self._headers.append((name, value)) - - def raw_items(self): - """Return the (name, value) header pairs without modification. - - This is an "internal" API, intended only for use by a generator. - """ - return iter(self._headers.copy()) - - # - # Additional useful stuff - # - - def get_all(self, name, failobj=None): - """Return a list of all the values for the named field. - - These will be sorted in the order they appeared in the original - message, and may contain duplicates. Any fields deleted and - re-inserted are always appended to the header list. - - If no such fields exist, failobj is returned (defaults to None). - """ - values = [] - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - values.append(self.policy.header_fetch_parse(k, v)) - if not values: - return failobj - return values - - def add_header(self, _name, _value, **_params): - """Extended header setting. - - name is the header field to add. keyword arguments can be used to set - additional parameters for the header field, with underscores converted - to dashes. Normally the parameter will be added as key="value" unless - value is None, in which case only the key will be added. If a - parameter value contains non-ASCII characters it can be specified as a - three-tuple of (charset, language, value), in which case it will be - encoded according to RFC2231 rules. Otherwise it will be encoded using - the utf-8 charset and a language of ''. - - Examples: - - msg.add_header('content-disposition', 'attachment', filename='bud.gif') - msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', 'Fußballer.ppt')) - msg.add_header('content-disposition', 'attachment', - filename='Fußballer.ppt')) - """ - parts = [] - for k, v in _params.items(): - if v is None: - parts.append(k.replace('_', '-')) - else: - parts.append(_formatparam(k.replace('_', '-'), v)) - if _value is not None: - parts.insert(0, _value) - self[_name] = SEMISPACE.join(parts) - - def replace_header(self, _name, _value): - """Replace a header. - - Replace the first matching header found in the message, retaining - header order and case. If no matching header was found, a KeyError is - raised. - """ - _name = _name.lower() - for i, (k, v) in zip(range(len(self._headers)), self._headers): - if k.lower() == _name: - self._headers[i] = self.policy.header_store_parse(k, _value) - break - else: - raise KeyError(_name) - - # - # Use these three methods instead of the three above. - # - - def get_content_type(self): - """Return the message's content type. - - The returned string is coerced to lower case of the form - `maintype/subtype'. If there was no Content-Type header in the - message, the default type as given by get_default_type() will be - returned. Since according to RFC 2045, messages always have a default - type this will always return a value. - - RFC 2045 defines a message's default type to be text/plain unless it - appears inside a multipart/digest container, in which case it would be - message/rfc822. - """ - missing = object() - value = self.get('content-type', missing) - if value is missing: - # This should have no parameters - return self.get_default_type() - ctype = _splitparam(value)[0].lower() - # RFC 2045, section 5.2 says if its invalid, use text/plain - if ctype.count('/') != 1: - return 'text/plain' - return ctype - - def get_content_maintype(self): - """Return the message's main content type. - - This is the `maintype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[0] - - def get_content_subtype(self): - """Returns the message's sub-content type. - - This is the `subtype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[1] - - def get_default_type(self): - """Return the `default' content type. - - Most messages have a default content type of text/plain, except for - messages that are subparts of multipart/digest containers. Such - subparts have a default content type of message/rfc822. - """ - return self._default_type - - def set_default_type(self, ctype): - """Set the `default' content type. - - ctype should be either "text/plain" or "message/rfc822", although this - is not enforced. The default content type is not stored in the - Content-Type header. - """ - self._default_type = ctype - - def _get_params_preserve(self, failobj, header): - # Like get_params() but preserves the quoting of values. BAW: - # should this be part of the public interface? - missing = object() - value = self.get(header, missing) - if value is missing: - return failobj - params = [] - for p in _parseparam(value): - try: - name, val = p.split('=', 1) - name = name.strip() - val = val.strip() - except ValueError: - # Must have been a bare attribute - name = p.strip() - val = '' - params.append((name, val)) - params = utils.decode_params(params) - return params - - def get_params(self, failobj=None, header='content-type', unquote=True): - """Return the message's Content-Type parameters, as a list. - - The elements of the returned list are 2-tuples of key/value pairs, as - split on the `=' sign. The left hand side of the `=' is the key, - while the right hand side is the value. If there is no `=' sign in - the parameter the value is the empty string. The value is as - described in the get_param() method. - - Optional failobj is the object to return if there is no Content-Type - header. Optional header is the header to search instead of - Content-Type. If unquote is True, the value is unquoted. - """ - missing = object() - params = self._get_params_preserve(missing, header) - if params is missing: - return failobj - if unquote: - return [(k, _unquotevalue(v)) for k, v in params] - else: - return params - - def get_param(self, param, failobj=None, header='content-type', - unquote=True): - """Return the parameter value if found in the Content-Type header. - - Optional failobj is the object to return if there is no Content-Type - header, or the Content-Type header has no such parameter. Optional - header is the header to search instead of Content-Type. - - Parameter keys are always compared case insensitively. The return - value can either be a string, or a 3-tuple if the parameter was RFC - 2231 encoded. When it's a 3-tuple, the elements of the value are of - the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and - LANGUAGE can be None, in which case you should consider VALUE to be - encoded in the us-ascii charset. You can usually ignore LANGUAGE. - The parameter value (either the returned string, or the VALUE item in - the 3-tuple) is always unquoted, unless unquote is set to False. - - If your application doesn't care whether the parameter was RFC 2231 - encoded, it can turn the return value into a string as follows: - - param = msg.get_param('foo') - param = email.utils.collapse_rfc2231_value(rawparam) - - """ - if header not in self: - return failobj - for k, v in self._get_params_preserve(failobj, header): - if k.lower() == param.lower(): - if unquote: - return _unquotevalue(v) - else: - return v - return failobj - - def set_param(self, param, value, header='Content-Type', requote=True, - charset=None, language=''): - """Set a parameter in the Content-Type header. - - If the parameter already exists in the header, its value will be - replaced with the new value. - - If header is Content-Type and has not yet been defined for this - message, it will be set to "text/plain" and the new parameter and - value will be appended as per RFC 2045. - - An alternate header can specified in the header argument, and all - parameters will be quoted as necessary unless requote is False. - - If charset is specified, the parameter will be encoded according to RFC - 2231. Optional language specifies the RFC 2231 language, defaulting - to the empty string. Both charset and language should be strings. - """ - if not isinstance(value, tuple) and charset: - value = (charset, language, value) - - if header not in self and header.lower() == 'content-type': - ctype = 'text/plain' - else: - ctype = self.get(header) - if not self.get_param(param, header=header): - if not ctype: - ctype = _formatparam(param, value, requote) - else: - ctype = SEMISPACE.join( - [ctype, _formatparam(param, value, requote)]) - else: - ctype = '' - for old_param, old_value in self.get_params(header=header, - unquote=requote): - append_param = '' - if old_param.lower() == param.lower(): - append_param = _formatparam(param, value, requote) - else: - append_param = _formatparam(old_param, old_value, requote) - if not ctype: - ctype = append_param - else: - ctype = SEMISPACE.join([ctype, append_param]) - if ctype != self.get(header): - del self[header] - self[header] = ctype - - def del_param(self, param, header='content-type', requote=True): - """Remove the given parameter completely from the Content-Type header. - - The header will be re-written in place without the parameter or its - value. All values will be quoted as necessary unless requote is - False. Optional header specifies an alternative to the Content-Type - header. - """ - if header not in self: - return - new_ctype = '' - for p, v in self.get_params(header=header, unquote=requote): - if p.lower() != param.lower(): - if not new_ctype: - new_ctype = _formatparam(p, v, requote) - else: - new_ctype = SEMISPACE.join([new_ctype, - _formatparam(p, v, requote)]) - if new_ctype != self.get(header): - del self[header] - self[header] = new_ctype - - def set_type(self, type, header='Content-Type', requote=True): - """Set the main type and subtype for the Content-Type header. - - type must be a string in the form "maintype/subtype", otherwise a - ValueError is raised. - - This method replaces the Content-Type header, keeping all the - parameters in place. If requote is False, this leaves the existing - header's quoting as is. Otherwise, the parameters will be quoted (the - default). - - An alternative header can be specified in the header argument. When - the Content-Type header is set, we'll always also add a MIME-Version - header. - """ - # BAW: should we be strict? - if not type.count('/') == 1: - raise ValueError - # Set the Content-Type, you get a MIME-Version - if header.lower() == 'content-type': - del self['mime-version'] - self['MIME-Version'] = '1.0' - if header not in self: - self[header] = type - return - params = self.get_params(header=header, unquote=requote) - del self[header] - self[header] = type - # Skip the first param; it's the old type. - for p, v in params[1:]: - self.set_param(p, v, header, requote) - - def get_filename(self, failobj=None): - """Return the filename associated with the payload if present. - - The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. If that header is missing - the `filename' parameter, this method falls back to looking for the - `name' parameter. - """ - missing = object() - filename = self.get_param('filename', missing, 'content-disposition') - if filename is missing: - filename = self.get_param('name', missing, 'content-type') - if filename is missing: - return failobj - return utils.collapse_rfc2231_value(filename).strip() - - def get_boundary(self, failobj=None): - """Return the boundary associated with the payload if present. - - The boundary is extracted from the Content-Type header's `boundary' - parameter, and it is unquoted. - """ - missing = object() - boundary = self.get_param('boundary', missing) - if boundary is missing: - return failobj - # RFC 2046 says that boundaries may begin but not end in w/s - return utils.collapse_rfc2231_value(boundary).rstrip() - - def set_boundary(self, boundary): - """Set the boundary parameter in Content-Type to 'boundary'. - - This is subtly different than deleting the Content-Type header and - adding a new one with a new boundary parameter via add_header(). The - main difference is that using the set_boundary() method preserves the - order of the Content-Type header in the original message. - - HeaderParseError is raised if the message has no Content-Type header. - """ - missing = object() - params = self._get_params_preserve(missing, 'content-type') - if params is missing: - # There was no Content-Type header, and we don't know what type - # to set it to, so raise an exception. - raise errors.HeaderParseError('No Content-Type header found') - newparams = list() - foundp = False - for pk, pv in params: - if pk.lower() == 'boundary': - newparams.append(('boundary', '"%s"' % boundary)) - foundp = True - else: - newparams.append((pk, pv)) - if not foundp: - # The original Content-Type header had no boundary attribute. - # Tack one on the end. BAW: should we raise an exception - # instead??? - newparams.append(('boundary', '"%s"' % boundary)) - # Replace the existing Content-Type header with the new value - newheaders = list() - for h, v in self._headers: - if h.lower() == 'content-type': - parts = list() - for k, v in newparams: - if v == '': - parts.append(k) - else: - parts.append('%s=%s' % (k, v)) - val = SEMISPACE.join(parts) - newheaders.append(self.policy.header_store_parse(h, val)) - - else: - newheaders.append((h, v)) - self._headers = newheaders - - def get_content_charset(self, failobj=None): - """Return the charset parameter of the Content-Type header. - - The returned string is always coerced to lower case. If there is no - Content-Type header, or if that header has no charset parameter, - failobj is returned. - """ - missing = object() - charset = self.get_param('charset', missing) - if charset is missing: - return failobj - if isinstance(charset, tuple): - # RFC 2231 encoded, so decode it, and it better end up as ascii. - pcharset = charset[0] or 'us-ascii' - try: - # LookupError will be raised if the charset isn't known to - # Python. UnicodeError will be raised if the encoded text - # contains a character not in the charset. - as_bytes = charset[2].encode('raw-unicode-escape') - charset = str(as_bytes, pcharset) - except (LookupError, UnicodeError): - charset = charset[2] - # charset characters must be in us-ascii range - try: - charset.encode('us-ascii') - except UnicodeError: - return failobj - # RFC 2046, $4.1.2 says charsets are not case sensitive - return charset.lower() - - def get_charsets(self, failobj=None): - """Return a list containing the charset(s) used in this message. - - The returned list of items describes the Content-Type headers' - charset parameter for this message and all the subparts in its - payload. - - Each item will either be a string (the value of the charset parameter - in the Content-Type header of that part) or the value of the - 'failobj' parameter (defaults to None), if the part does not have a - main MIME type of "text", or the charset is not defined. - - The list will contain one string for each part of the message, plus - one for the container message (i.e. self), so that a non-multipart - message will still return a list of length 1. - """ - return [part.get_content_charset(failobj) for part in self.walk()] - - # I.e. def walk(self): ... - from future.backports.email.iterators import walk diff --git a/contrib/python/future/future/backports/email/mime/__init__.py b/contrib/python/future/future/backports/email/mime/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/future/backports/email/mime/__init__.py +++ /dev/null diff --git a/contrib/python/future/future/backports/email/mime/application.py b/contrib/python/future/future/backports/email/mime/application.py deleted file mode 100644 index 5cbfb174af5..00000000000 --- a/contrib/python/future/future/backports/email/mime/application.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Keith Dart -# Contact: [email protected] - -"""Class representing application/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - -__all__ = ["MIMEApplication"] - - -class MIMEApplication(MIMENonMultipart): - """Class for generating application/* MIME documents.""" - - def __init__(self, _data, _subtype='octet-stream', - _encoder=encoders.encode_base64, **_params): - """Create an application/* type MIME document. - - _data is a string containing the raw application data. - - _subtype is the MIME content type subtype, defaulting to - 'octet-stream'. - - _encoder is a function which will perform the actual encoding for - transport of the application data, defaulting to base64 encoding. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - raise TypeError('Invalid application MIME subtype') - MIMENonMultipart.__init__(self, 'application', _subtype, **_params) - self.set_payload(_data) - _encoder(self) diff --git a/contrib/python/future/future/backports/email/mime/audio.py b/contrib/python/future/future/backports/email/mime/audio.py deleted file mode 100644 index 4989c114207..00000000000 --- a/contrib/python/future/future/backports/email/mime/audio.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Anthony Baxter -# Contact: [email protected] - -"""Class representing audio/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEAudio'] - -import sndhdr - -from io import BytesIO -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - -class MIMEAudio(MIMENonMultipart): - """Class for generating audio/* MIME documents.""" - - def __init__(self, _audiodata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an audio/* type MIME document. - - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific audio subtype via the - _subtype parameter. If _subtype is not given, and no subtype can be - guessed, a TypeError is raised. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = _whatsnd(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') - MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) - self.set_payload(_audiodata) - _encoder(self) diff --git a/contrib/python/future/future/backports/email/mime/base.py b/contrib/python/future/future/backports/email/mime/base.py deleted file mode 100644 index e77f3ca4ae5..00000000000 --- a/contrib/python/future/future/backports/email/mime/base.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Base class for MIME specializations.""" -from __future__ import absolute_import, division, unicode_literals -from future.backports.email import message - -__all__ = ['MIMEBase'] - - -class MIMEBase(message.Message): - """Base class for MIME specializations.""" - - def __init__(self, _maintype, _subtype, **_params): - """This constructor adds a Content-Type: and a MIME-Version: header. - - The Content-Type: header is taken from the _maintype and _subtype - arguments. Additional parameters for this header are taken from the - keyword arguments. - """ - message.Message.__init__(self) - ctype = '%s/%s' % (_maintype, _subtype) - self.add_header('Content-Type', ctype, **_params) - self['MIME-Version'] = '1.0' diff --git a/contrib/python/future/future/backports/email/mime/image.py b/contrib/python/future/future/backports/email/mime/image.py deleted file mode 100644 index a03602464aa..00000000000 --- a/contrib/python/future/future/backports/email/mime/image.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Class representing image/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEImage'] - -import imghdr - -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEImage(MIMENonMultipart): - """Class for generating image/* type MIME documents.""" - - def __init__(self, _imagedata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an image/* type MIME document. - - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) - if _subtype is None: - raise TypeError('Could not guess image MIME subtype') - MIMENonMultipart.__init__(self, 'image', _subtype, **_params) - self.set_payload(_imagedata) - _encoder(self) diff --git a/contrib/python/future/future/backports/email/mime/message.py b/contrib/python/future/future/backports/email/mime/message.py deleted file mode 100644 index 7f92075150e..00000000000 --- a/contrib/python/future/future/backports/email/mime/message.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Class representing message/* MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEMessage'] - -from future.backports.email import message -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEMessage(MIMENonMultipart): - """Class representing message/* MIME documents.""" - - def __init__(self, _msg, _subtype='rfc822'): - """Create a message/* type MIME document. - - _msg is a message object and must be an instance of Message, or a - derived class of Message, otherwise a TypeError is raised. - - Optional _subtype defines the subtype of the contained message. The - default is "rfc822" (this is defined by the MIME standard, even though - the term "rfc822" is technically outdated by RFC 2822). - """ - MIMENonMultipart.__init__(self, 'message', _subtype) - if not isinstance(_msg, message.Message): - raise TypeError('Argument is not an instance of Message') - # It's convenient to use this base class method. We need to do it - # this way or we'll get an exception - message.Message.attach(self, _msg) - # And be sure our default type is set correctly - self.set_default_type('message/rfc822') diff --git a/contrib/python/future/future/backports/email/mime/multipart.py b/contrib/python/future/future/backports/email/mime/multipart.py deleted file mode 100644 index 6d7ed3dcb9e..00000000000 --- a/contrib/python/future/future/backports/email/mime/multipart.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Base class for MIME multipart/* type messages.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEMultipart'] - -from future.backports.email.mime.base import MIMEBase - - -class MIMEMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def __init__(self, _subtype='mixed', boundary=None, _subparts=None, - **_params): - """Creates a multipart/* type message. - - By default, creates a multipart/mixed message, with proper - Content-Type and MIME-Version headers. - - _subtype is the subtype of the multipart content type, defaulting to - `mixed'. - - boundary is the multipart boundary string. By default it is - calculated as needed. - - _subparts is a sequence of initial subparts for the payload. It - must be an iterable object, such as a list. You can always - attach new subparts to the message by using the attach() method. - - Additional parameters for the Content-Type header are taken from the - keyword arguments (or passed into the _params argument). - """ - MIMEBase.__init__(self, 'multipart', _subtype, **_params) - - # Initialise _payload to an empty list as the Message superclass's - # implementation of is_multipart assumes that _payload is a list for - # multipart messages. - self._payload = [] - - if _subparts: - for p in _subparts: - self.attach(p) - if boundary: - self.set_boundary(boundary) diff --git a/contrib/python/future/future/backports/email/mime/nonmultipart.py b/contrib/python/future/future/backports/email/mime/nonmultipart.py deleted file mode 100644 index 08c37c36d12..00000000000 --- a/contrib/python/future/future/backports/email/mime/nonmultipart.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Base class for MIME type messages that are not multipart.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMENonMultipart'] - -from future.backports.email import errors -from future.backports.email.mime.base import MIMEBase - - -class MIMENonMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def attach(self, payload): - # The public API prohibits attaching multiple subparts to MIMEBase - # derived subtypes since none of them are, by definition, of content - # type multipart/* - raise errors.MultipartConversionError( - 'Cannot attach additional subparts to non-multipart/*') diff --git a/contrib/python/future/future/backports/email/mime/text.py b/contrib/python/future/future/backports/email/mime/text.py deleted file mode 100644 index 6269f4a68a7..00000000000 --- a/contrib/python/future/future/backports/email/mime/text.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Class representing text/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEText'] - -from future.backports.email.encoders import encode_7or8bit -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEText(MIMENonMultipart): - """Class for generating text/* type MIME documents.""" - - def __init__(self, _text, _subtype='plain', _charset=None): - """Create a text/* type MIME document. - - _text is the string for this message object. - - _subtype is the MIME sub content type, defaulting to "plain". - - _charset is the character set parameter added to the Content-Type - header. This defaults to "us-ascii". Note that as a side-effect, the - Content-Transfer-Encoding header will also be set. - """ - - # If no _charset was specified, check to see if there are non-ascii - # characters present. If not, use 'us-ascii', otherwise use utf-8. - # XXX: This can be removed once #7304 is fixed. - if _charset is None: - try: - _text.encode('us-ascii') - _charset = 'us-ascii' - except UnicodeEncodeError: - _charset = 'utf-8' - - MIMENonMultipart.__init__(self, 'text', _subtype, - **{'charset': _charset}) - - self.set_payload(_text, _charset) diff --git a/contrib/python/future/future/backports/email/parser.py b/contrib/python/future/future/backports/email/parser.py deleted file mode 100644 index df1c6e28689..00000000000 --- a/contrib/python/future/future/backports/email/parser.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter -# Contact: [email protected] - -"""A parser of RFC 2822 and MIME email messages.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] - -import warnings -from io import StringIO, TextIOWrapper - -from future.backports.email.feedparser import FeedParser, BytesFeedParser -from future.backports.email.message import Message -from future.backports.email._policybase import compat32 - - -class Parser(object): - def __init__(self, _class=Message, **_3to2kwargs): - """Parser of RFC 2822 and MIME email messages. - - Creates an in-memory object tree representing the email message, which - can then be manipulated and turned over to a Generator to return the - textual representation of the message. - - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The - header block is terminated either by the end of the string or by a - blank line. - - _class is the class to instantiate for new message objects when they - must be created. This class must have a constructor that can take - zero arguments. Default is Message.Message. - - The policy keyword specifies a policy object that controls a number of - aspects of the parser's operation. The default policy maintains - backward compatibility. - - """ - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = compat32 - self._class = _class - self.policy = policy - - def parse(self, fp, headersonly=False): - """Create a message structure from the data in a file. - - Reads all the data from the file and returns the root of the message - structure. Optional headersonly is a flag specifying whether to stop - parsing after reading the headers or not. The default is False, - meaning it parses the entire contents of the file. - """ - feedparser = FeedParser(self._class, policy=self.policy) - if headersonly: - feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break - feedparser.feed(data) - return feedparser.close() - - def parsestr(self, text, headersonly=False): - """Create a message structure from a string. - - Returns the root of the message structure. Optional headersonly is a - flag specifying whether to stop parsing after reading the headers or - not. The default is False, meaning it parses the entire contents of - the file. - """ - return self.parse(StringIO(text), headersonly=headersonly) - - - -class HeaderParser(Parser): - def parse(self, fp, headersonly=True): - return Parser.parse(self, fp, True) - - def parsestr(self, text, headersonly=True): - return Parser.parsestr(self, text, True) - - -class BytesParser(object): - - def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. - - Creates an in-memory object tree representing the email message, which - can then be manipulated and turned over to a Generator to return the - textual representation of the message. - - The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The - header block is terminated either by the end of the input or by a - blank line. - - _class is the class to instantiate for new message objects when they - must be created. This class must have a constructor that can take - zero arguments. Default is Message.Message. - """ - self.parser = Parser(*args, **kw) - - def parse(self, fp, headersonly=False): - """Create a message structure from the data in a binary file. - - Reads all the data from the file and returns the root of the message - structure. Optional headersonly is a flag specifying whether to stop - parsing after reading the headers or not. The default is False, - meaning it parses the entire contents of the file. - """ - fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') - with fp: - return self.parser.parse(fp, headersonly) - - - def parsebytes(self, text, headersonly=False): - """Create a message structure from a byte string. - - Returns the root of the message structure. Optional headersonly is a - flag specifying whether to stop parsing after reading the headers or - not. The default is False, meaning it parses the entire contents of - the file. - """ - text = text.decode('ASCII', errors='surrogateescape') - return self.parser.parsestr(text, headersonly) - - -class BytesHeaderParser(BytesParser): - def parse(self, fp, headersonly=True): - return BytesParser.parse(self, fp, headersonly=True) - - def parsebytes(self, text, headersonly=True): - return BytesParser.parsebytes(self, text, headersonly=True) diff --git a/contrib/python/future/future/backports/email/policy.py b/contrib/python/future/future/backports/email/policy.py deleted file mode 100644 index 2f609a23aeb..00000000000 --- a/contrib/python/future/future/backports/email/policy.py +++ /dev/null @@ -1,193 +0,0 @@ -"""This will be the home for the policy that hooks in the new -code that adds all the email6 features. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super - -from future.standard_library.email._policybase import (Policy, Compat32, - compat32, _extend_docstrings) -from future.standard_library.email.utils import _has_surrogates -from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry - -__all__ = [ - 'Compat32', - 'compat32', - 'Policy', - 'EmailPolicy', - 'default', - 'strict', - 'SMTP', - 'HTTP', - ] - -@_extend_docstrings -class EmailPolicy(Policy): - - """+ - PROVISIONAL - - The API extensions enabled by this policy are currently provisional. - Refer to the documentation for details. - - This policy adds new header parsing and folding algorithms. Instead of - simple strings, headers are custom objects with custom attributes - depending on the type of the field. The folding algorithm fully - implements RFCs 2047 and 5322. - - In addition to the settable attributes listed above that apply to - all Policies, this policy adds the following additional attributes: - - refold_source -- if the value for a header in the Message object - came from the parsing of some source, this attribute - indicates whether or not a generator should refold - that value when transforming the message back into - stream form. The possible values are: - - none -- all source values use original folding - long -- source values that have any line that is - longer than max_line_length will be - refolded - all -- all values are refolded. - - The default is 'long'. - - header_factory -- a callable that takes two arguments, 'name' and - 'value', where 'name' is a header field name and - 'value' is an unfolded header field value, and - returns a string-like object that represents that - header. A default header_factory is provided that - understands some of the RFC5322 header field types. - (Currently address fields and date fields have - special treatment, while all other fields are - treated as unstructured. This list will be - completed before the extension is marked stable.) - """ - - refold_source = 'long' - header_factory = HeaderRegistry() - - def __init__(self, **kw): - # Ensure that each new instance gets a unique header factory - # (as opposed to clones, which share the factory). - if 'header_factory' not in kw: - object.__setattr__(self, 'header_factory', HeaderRegistry()) - super().__init__(**kw) - - def header_max_count(self, name): - """+ - The implementation for this class returns the max_count attribute from - the specialized header class that would be used to construct a header - of type 'name'. - """ - return self.header_factory[name].max_count - - # The logic of the next three methods is chosen such that it is possible to - # switch a Message object between a Compat32 policy and a policy derived - # from this class and have the results stay consistent. This allows a - # Message object constructed with this policy to be passed to a library - # that only handles Compat32 objects, or to receive such an object and - # convert it to use the newer style by just changing its policy. It is - # also chosen because it postpones the relatively expensive full rfc5322 - # parse until as late as possible when parsing from source, since in many - # applications only a few headers will actually be inspected. - - def header_source_parse(self, sourcelines): - """+ - The name is parsed as everything up to the ':' and returned unmodified. - The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and - stripping any trailing carriage return or linefeed characters. (This - is the same as Compat32). - - """ - name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) - return (name, value.rstrip('\r\n')) - - def header_store_parse(self, name, value): - """+ - The name is returned unchanged. If the input value has a 'name' - attribute and it matches the name ignoring case, the value is returned - unchanged. Otherwise the name and value are passed to header_factory - method, and the resulting custom header object is returned as the - value. In this case a ValueError is raised if the input value contains - CR or LF characters. - - """ - if hasattr(value, 'name') and value.name.lower() == name.lower(): - return (name, value) - if isinstance(value, str) and len(value.splitlines())>1: - raise ValueError("Header values may not contain linefeed " - "or carriage return characters") - return (name, self.header_factory(name, value)) - - def header_fetch_parse(self, name, value): - """+ - If the value has a 'name' attribute, it is returned to unmodified. - Otherwise the name and the value with any linesep characters removed - are passed to the header_factory method, and the resulting custom - header object is returned. Any surrogateescaped bytes get turned - into the unicode unknown-character glyph. - - """ - if hasattr(value, 'name'): - return value - return self.header_factory(name, ''.join(value.splitlines())) - - def fold(self, name, value): - """+ - Header folding is controlled by the refold_source policy setting. A - value is considered to be a 'source value' if and only if it does not - have a 'name' attribute (having a 'name' attribute means it is a header - object of some sort). If a source value needs to be refolded according - to the policy, it is converted into a custom header object by passing - the name and the value with any linesep characters removed to the - header_factory method. Folding of a custom header object is done by - calling its fold method with the current policy. - - Source values are split into lines using splitlines. If the value is - not to be refolded, the lines are rejoined using the linesep from the - policy and returned. The exception is lines containing non-ascii - binary data. In that case the value is refolded regardless of the - refold_source setting, which causes the binary data to be CTE encoded - using the unknown-8bit charset. - - """ - return self._fold(name, value, refold_binary=True) - - def fold_binary(self, name, value): - """+ - The same as fold if cte_type is 7bit, except that the returned value is - bytes. - - If cte_type is 8bit, non-ASCII binary data is converted back into - bytes. Headers with binary data are not refolded, regardless of the - refold_header setting, since there is no way to know whether the binary - data consists of single byte characters or multibyte characters. - - """ - folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') - return folded.encode('ascii', 'surrogateescape') - - def _fold(self, name, value, refold_binary=False): - if hasattr(value, 'name'): - return value.fold(policy=self) - maxlen = self.max_line_length if self.max_line_length else float('inf') - lines = value.splitlines() - refold = (self.refold_source == 'all' or - self.refold_source == 'long' and - (lines and len(lines[0])+len(name)+2 > maxlen or - any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): - return self.header_factory(name, ''.join(lines)).fold(policy=self) - return name + ': ' + self.linesep.join(lines) + self.linesep - - -default = EmailPolicy() -# Make the default policy use the class default header_factory -del default.header_factory -strict = default.clone(raise_on_defect=True) -SMTP = default.clone(linesep='\r\n') -HTTP = default.clone(linesep='\r\n', max_line_length=None) diff --git a/contrib/python/future/future/backports/email/quoprimime.py b/contrib/python/future/future/backports/email/quoprimime.py deleted file mode 100644 index b69d158bc4c..00000000000 --- a/contrib/python/future/future/backports/email/quoprimime.py +++ /dev/null @@ -1,326 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Ben Gertzfield -# Contact: [email protected] - -"""Quoted-printable content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to -safely encode text that is in a character set similar to the 7-bit US ASCII -character set, but that includes some 8-bit characters that are normally not -allowed in email bodies or headers. - -Quoted-printable is very space-inefficient for encoding binary files; use the -email.base64mime module for that instead. - -This module provides an interface to encode and decode both headers and bodies -with quoted-printable encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:/From:/Cc: etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character -conversion necessary for proper internationalized headers; it only -does dumb encoding and decoding. To deal with the various line -wrapping issues, use the email.header module. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes, chr, dict, int, range, super - -__all__ = [ - 'body_decode', - 'body_encode', - 'body_length', - 'decode', - 'decodestring', - 'header_decode', - 'header_encode', - 'header_length', - 'quote', - 'unquote', - ] - -import re -import io - -from string import ascii_letters, digits, hexdigits - -CRLF = '\r\n' -NL = '\n' -EMPTYSTRING = '' - -# Build a mapping of octets to the expansion of that octet. Since we're only -# going to have 256 of these things, this isn't terribly inefficient -# space-wise. Remember that headers and bodies have different sets of safe -# characters. Initialize both maps with the full expansion, and then override -# the safe bytes with the more compact form. -_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) -_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() - -# Safe header bytes which need no encoding. -for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): - _QUOPRI_HEADER_MAP[c] = chr(c) -# Headers have one other special encoding; spaces become underscores. -_QUOPRI_HEADER_MAP[ord(' ')] = '_' - -# Safe body bytes which need no encoding. -for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' - b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' - b'abcdefghijklmnopqrstuvwxyz{|}~\t'): - _QUOPRI_BODY_MAP[c] = chr(c) - - - -# Helpers -def header_check(octet): - """Return True if the octet should be escaped with header quopri.""" - return chr(octet) != _QUOPRI_HEADER_MAP[octet] - - -def body_check(octet): - """Return True if the octet should be escaped with body quopri.""" - return chr(octet) != _QUOPRI_BODY_MAP[octet] - - -def header_length(bytearray): - """Return a header quoted-printable encoding length. - - Note that this does not include any RFC 2047 chrome added by - `header_encode()`. - - :param bytearray: An array of bytes (a.k.a. octets). - :return: The length in bytes of the byte array when it is encoded with - quoted-printable for headers. - """ - return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) - - -def body_length(bytearray): - """Return a body quoted-printable encoding length. - - :param bytearray: An array of bytes (a.k.a. octets). - :return: The length in bytes of the byte array when it is encoded with - quoted-printable for bodies. - """ - return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) - - -def _max_append(L, s, maxlen, extra=''): - if not isinstance(s, str): - s = chr(s) - if not L: - L.append(s.lstrip()) - elif len(L[-1]) + len(s) <= maxlen: - L[-1] += extra + s - else: - L.append(s.lstrip()) - - -def unquote(s): - """Turn a string in the form =AB to the ASCII character with value 0xab""" - return chr(int(s[1:3], 16)) - - -def quote(c): - return '=%02X' % ord(c) - - - -def header_encode(header_bytes, charset='iso-8859-1'): - """Encode a single header line with quoted-printable (like) encoding. - - Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but - used specifically for email header fields to allow charsets with mostly 7 - bit characters (and some 8 bit) to remain more or less readable in non-RFC - 2045 aware mail clients. - - charset names the character set to use in the RFC 2046 header. It - defaults to iso-8859-1. - """ - # Return empty headers as an empty string. - if not header_bytes: - return '' - # Iterate over every byte, encoding if necessary. - encoded = [] - for octet in header_bytes: - encoded.append(_QUOPRI_HEADER_MAP[octet]) - # Now add the RFC chrome to each encoded chunk and glue the chunks - # together. - return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) - - -class _body_accumulator(io.StringIO): - - def __init__(self, maxlinelen, eol, *args, **kw): - super().__init__(*args, **kw) - self.eol = eol - self.maxlinelen = self.room = maxlinelen - - def write_str(self, s): - """Add string s to the accumulated body.""" - self.write(s) - self.room -= len(s) - - def newline(self): - """Write eol, then start new line.""" - self.write_str(self.eol) - self.room = self.maxlinelen - - def write_soft_break(self): - """Write a soft break, then start a new line.""" - self.write_str('=') - self.newline() - - def write_wrapped(self, s, extra_room=0): - """Add a soft line break if needed, then write s.""" - if self.room < len(s) + extra_room: - self.write_soft_break() - self.write_str(s) - - def write_char(self, c, is_last_char): - if not is_last_char: - # Another character follows on this line, so we must leave - # extra room, either for it or a soft break, and whitespace - # need not be quoted. - self.write_wrapped(c, extra_room=1) - elif c not in ' \t': - # For this and remaining cases, no more characters follow, - # so there is no need to reserve extra room (since a hard - # break will immediately follow). - self.write_wrapped(c) - elif self.room >= 3: - # It's a whitespace character at end-of-line, and we have room - # for the three-character quoted encoding. - self.write(quote(c)) - elif self.room == 2: - # There's room for the whitespace character and a soft break. - self.write(c) - self.write_soft_break() - else: - # There's room only for a soft break. The quoted whitespace - # will be the only content on the subsequent line. - self.write_soft_break() - self.write(quote(c)) - - -def body_encode(body, maxlinelen=76, eol=NL): - """Encode with quoted-printable, wrapping at maxlinelen characters. - - Each line of encoded text will end with eol, which defaults to "\\n". Set - this to "\\r\\n" if you will be using the result of this function directly - in an email. - - Each line will be wrapped at, at most, maxlinelen characters before the - eol string (maxlinelen defaults to 76 characters, the maximum value - permitted by RFC 2045). Long lines will have the 'soft line break' - quoted-printable character "=" appended to them, so the decoded text will - be identical to the original text. - - The minimum maxlinelen is 4 to have room for a quoted character ("=XX") - followed by a soft line break. Smaller values will generate a - ValueError. - - """ - - if maxlinelen < 4: - raise ValueError("maxlinelen must be at least 4") - if not body: - return body - - # The last line may or may not end in eol, but all other lines do. - last_has_eol = (body[-1] in '\r\n') - - # This accumulator will make it easier to build the encoded body. - encoded_body = _body_accumulator(maxlinelen, eol) - - lines = body.splitlines() - last_line_no = len(lines) - 1 - for line_no, line in enumerate(lines): - last_char_index = len(line) - 1 - for i, c in enumerate(line): - if body_check(ord(c)): - c = quote(c) - encoded_body.write_char(c, i==last_char_index) - # Add an eol if input line had eol. All input lines have eol except - # possibly the last one. - if line_no < last_line_no or last_has_eol: - encoded_body.newline() - - return encoded_body.getvalue() - - - -# BAW: I'm not sure if the intent was for the signature of this function to be -# the same as base64MIME.decode() or not... -def decode(encoded, eol=NL): - """Decode a quoted-printable string. - - Lines are separated with eol, which defaults to \\n. - """ - if not encoded: - return encoded - # BAW: see comment in encode() above. Again, we're building up the - # decoded string with string concatenation, which could be done much more - # efficiently. - decoded = '' - - for line in encoded.splitlines(): - line = line.rstrip() - if not line: - decoded += eol - continue - - i = 0 - n = len(line) - while i < n: - c = line[i] - if c != '=': - decoded += c - i += 1 - # Otherwise, c == "=". Are we at the end of the line? If so, add - # a soft line break. - elif i+1 == n: - i += 1 - continue - # Decode if in form =AB - elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: - decoded += unquote(line[i:i+3]) - i += 3 - # Otherwise, not in form =AB, pass literally - else: - decoded += c - i += 1 - - if i == n: - decoded += eol - # Special case if original string did not end with eol - if encoded[-1] not in '\r\n' and decoded.endswith(eol): - decoded = decoded[:-1] - return decoded - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode - - - -def _unquote_match(match): - """Turn a match in the form =AB to the ASCII character with value 0xab""" - s = match.group(0) - return unquote(s) - - -# Header decoding is done a bit differently -def header_decode(s): - """Decode a string encoded with RFC 2045 MIME header `Q' encoding. - - This function does not parse a full MIME header value encoded with - quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use - the high level email.header class for that functionality. - """ - s = s.replace('_', ' ') - return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) diff --git a/contrib/python/future/future/backports/email/utils.py b/contrib/python/future/future/backports/email/utils.py deleted file mode 100644 index 4abebf7cb63..00000000000 --- a/contrib/python/future/future/backports/email/utils.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Author: Barry Warsaw -# Contact: [email protected] - -"""Miscellaneous utilities.""" - -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future import utils -from future.builtins import bytes, int, str - -__all__ = [ - 'collapse_rfc2231_value', - 'decode_params', - 'decode_rfc2231', - 'encode_rfc2231', - 'formataddr', - 'formatdate', - 'format_datetime', - 'getaddresses', - 'make_msgid', - 'mktime_tz', - 'parseaddr', - 'parsedate', - 'parsedate_tz', - 'parsedate_to_datetime', - 'unquote', - ] - -import os -import re -if utils.PY2: - re.ASCII = 0 -import time -import base64 -import random -import socket -from future.backports import datetime -from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote -import warnings -from io import StringIO - -from future.backports.email._parseaddr import quote -from future.backports.email._parseaddr import AddressList as _AddressList -from future.backports.email._parseaddr import mktime_tz - -from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz - -from quopri import decodestring as _qdecode - -# Intrapackage imports -from future.backports.email.encoders import _bencode, _qencode -from future.backports.email.charset import Charset - -COMMASPACE = ', ' -EMPTYSTRING = '' -UEMPTYSTRING = '' -CRLF = '\r\n' -TICK = "'" - -specialsre = re.compile(r'[][\\()<>@,:;".]') -escapesre = re.compile(r'[\\"]') - -# How to figure out if we are processing strings that come from a byte -# source with undecodable characters. -_has_surrogates = re.compile( - '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search - -# How to deal with a string containing bytes before handing it to the -# application through the 'normal' interface. -def _sanitize(string): - # Turn any escaped bytes into unicode 'unknown' char. - original_bytes = string.encode('ascii', 'surrogateescape') - return original_bytes.decode('ascii', 'replace') - - -# Helpers - -def formataddr(pair, charset='utf-8'): - """The inverse of parseaddr(), this takes a 2-tuple of the form - (realname, email_address) and returns the string value suitable - for an RFC 2822 From, To or Cc header. - - If the first element of pair is false, then the second element is - returned unmodified. - - Optional charset if given is the character set that is used to encode - realname in case realname is not ASCII safe. Can be an instance of str or - a Charset-like object which has a header_encode method. Default is - 'utf-8'. - """ - name, address = pair - # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. - address.encode('ascii') - if name: - try: - name.encode('ascii') - except UnicodeEncodeError: - if isinstance(charset, str): - charset = Charset(charset) - encoded_name = charset.header_encode(name) - return "%s <%s>" % (encoded_name, address) - else: - quotes = '' - if specialsre.search(name): - quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) - return address - - - -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist - - - -ecre = re.compile(r''' - =\? # literal =? - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P<atom>.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) - - -def _format_timetuple_and_zone(timetuple, zone): - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], - timetuple[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], - timetuple[0], timetuple[3], timetuple[4], timetuple[5], - zone) - -def formatdate(timeval=None, localtime=False, usegmt=False): - """Returns a date string as specified by RFC 2822, e.g.: - - Fri, 09 Nov 2001 01:08:47 -0000 - - Optional timeval if given is a floating point time value as accepted by - gmtime() and localtime(), otherwise the current time is used. - - Optional localtime is a flag that when True, interprets timeval, and - returns a date relative to the local timezone instead of UTC, properly - taking daylight savings time into account. - - Optional argument usegmt means that the timezone is written out as - an ascii string, not numeric one (so "GMT" instead of "+0000"). This - is needed for HTTP, and is only used when localtime==False. - """ - # Note: we cannot use strftime() because that honors the locale and RFC - # 2822 requires that day and month names be the English abbreviations. - if timeval is None: - timeval = time.time() - if localtime: - now = time.localtime(timeval) - # Calculate timezone offset, based on whether the local zone has - # daylight savings time, and whether DST is in effect. - if time.daylight and now[-1]: - offset = time.altzone - else: - offset = time.timezone - hours, minutes = divmod(abs(offset), 3600) - # Remember offset is in seconds west of UTC, but the timezone is in - # minutes east of UTC, so the signs differ. - if offset > 0: - sign = '-' - else: - sign = '+' - zone = '%s%02d%02d' % (sign, hours, minutes // 60) - else: - now = time.gmtime(timeval) - # Timezone offset is always -0000 - if usegmt: - zone = 'GMT' - else: - zone = '-0000' - return _format_timetuple_and_zone(now, zone) - -def format_datetime(dt, usegmt=False): - """Turn a datetime into a date string as specified in RFC 2822. - - If usegmt is True, dt must be an aware datetime with an offset of zero. In - this case 'GMT' will be rendered instead of the normal +0000 required by - RFC2822. This is to support HTTP headers involving date stamps. - """ - now = dt.timetuple() - if usegmt: - if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: - raise ValueError("usegmt option requires a UTC datetime") - zone = 'GMT' - elif dt.tzinfo is None: - zone = '-0000' - else: - zone = dt.strftime("%z") - return _format_timetuple_and_zone(now, zone) - - -def make_msgid(idstring=None, domain=None): - """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: - - - Optional idstring if given is a string used to strengthen the - uniqueness of the message id. Optional domain if given provides the - portion of the message id after the '@'. It defaults to the locally - defined hostname. - """ - timeval = time.time() - utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) - pid = os.getpid() - randint = random.randrange(100000) - if idstring is None: - idstring = '' - else: - idstring = '.' + idstring - if domain is None: - domain = socket.getfqdn() - msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) - return msgid - - -def parsedate_to_datetime(data): - _3to2list = list(_parsedate_tz(data)) - dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] - if tz is None: - return datetime.datetime(*dtuple[:6]) - return datetime.datetime(*dtuple[:6], - tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) - - -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' - return addrs[0] - - -# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. -def unquote(str): - """Remove quotes from a string.""" - if len(str) > 1: - if str.startswith('"') and str.endswith('"'): - return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') - if str.startswith('<') and str.endswith('>'): - return str[1:-1] - return str - - - -# RFC2231-related functions - parameter encoding and decoding -def decode_rfc2231(s): - """Decode string according to RFC 2231""" - parts = s.split(TICK, 2) - if len(parts) <= 2: - return None, None, s - return parts - - -def encode_rfc2231(s, charset=None, language=None): - """Encode string according to RFC 2231. - - If neither charset nor language is given, then s is returned as-is. If - charset is given but not language, the string is encoded using the empty - string for language. - """ - s = url_quote(s, safe='', encoding=charset or 'ascii') - if charset is None and language is None: - return s - if language is None: - language = '' - return "%s'%s'%s" % (charset, language, s) - - -rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', - re.ASCII) - -def decode_params(params): - """Decode parameters list according to RFC 2231. - - params is a sequence of 2-tuples containing (param name, string value). - """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] - # Map parameter's name to a list of continuations. The values are a - # 3-tuple of the continuation number, the string value, and a flag - # specifying whether a particular segment is %-encoded. - rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False - value = unquote(value) - mo = rfc2231_continuation.match(name) - if mo: - name, num = mo.group('name', 'num') - if num is not None: - num = int(num) - rfc2231_params.setdefault(name, []).append((num, value, encoded)) - else: - new_params.append((name, '"%s"' % quote(value))) - if rfc2231_params: - for name, continuations in rfc2231_params.items(): - value = [] - extended = False - # Sort by number - continuations.sort() - # And now append all values in numerical order, converting - # %-encodings for the encoded segments. If any of the - # continuation names ends in a *, then the entire string, after - # decoding segments and concatenating, must have the charset and - # language specifiers at the beginning of the string. - for num, s, encoded in continuations: - if encoded: - # Decode as "latin-1", so the characters in s directly - # represent the percent-encoded octet values. - # collapse_rfc2231_value treats this as an octet sequence. - s = url_unquote(s, encoding="latin-1") - extended = True - value.append(s) - value = quote(EMPTYSTRING.join(value)) - if extended: - charset, language, value = decode_rfc2231(value) - new_params.append((name, (charset, language, '"%s"' % value))) - else: - new_params.append((name, '"%s"' % value)) - return new_params - -def collapse_rfc2231_value(value, errors='replace', - fallback_charset='us-ascii'): - if not isinstance(value, tuple) or len(value) != 3: - return unquote(value) - # While value comes to us as a unicode string, we need it to be a bytes - # object. We do not want bytes() normal utf-8 decoder, we want a straight - # interpretation of the string as character bytes. - charset, language, text = value - rawbytes = bytes(text, 'raw-unicode-escape') - try: - return str(rawbytes, charset, errors) - except LookupError: - # charset is not a known codec. - return unquote(text) - - -# -# datetime doesn't provide a localtime function yet, so provide one. Code -# adapted from the patch in issue 9527. This may not be perfect, but it is -# better than not having it. -# - -def localtime(dt=None, isdst=-1): - """Return local time as an aware datetime object. - - If called without arguments, return current time. Otherwise *dt* - argument should be a datetime instance, and it is converted to the - local time zone according to the system time zone database. If *dt* is - naive (that is, dt.tzinfo is None), it is assumed to be in local time. - In this case, a positive or zero value for *isdst* causes localtime to - presume initially that summer time (for example, Daylight Saving Time) - is or is not (respectively) in effect for the specified time. A - negative value for *isdst* causes the localtime() function to attempt - to divine whether summer time is in effect for the specified time. - - """ - if dt is None: - return datetime.datetime.now(datetime.timezone.utc).astimezone() - if dt.tzinfo is not None: - return dt.astimezone() - # We have a naive datetime. Convert to a (localtime) timetuple and pass to - # system mktime together with the isdst hint. System mktime will return - # seconds since epoch. - tm = dt.timetuple()[:-1] + (isdst,) - seconds = time.mktime(tm) - localtm = time.localtime(seconds) - try: - delta = datetime.timedelta(seconds=localtm.tm_gmtoff) - tz = datetime.timezone(delta, localtm.tm_zone) - except AttributeError: - # Compute UTC offset and compare with the value implied by tm_isdst. - # If the values match, use the zone name implied by tm_isdst. - delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) - dst = time.daylight and localtm.tm_isdst > 0 - gmtoff = -(time.altzone if dst else time.timezone) - if delta == datetime.timedelta(seconds=gmtoff): - tz = datetime.timezone(delta, time.tzname[dst]) - else: - tz = datetime.timezone(delta) - return dt.replace(tzinfo=tz) diff --git a/contrib/python/future/future/backports/html/__init__.py b/contrib/python/future/future/backports/html/__init__.py deleted file mode 100644 index 58e133fd4b4..00000000000 --- a/contrib/python/future/future/backports/html/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -General functions for HTML manipulation, backported from Py3. - -Note that this uses Python 2.7 code with the corresponding Python 3 -module names and locations. -""" - -from __future__ import unicode_literals - - -_escape_map = {ord('&'): '&', ord('<'): '<', ord('>'): '>'} -_escape_map_full = {ord('&'): '&', ord('<'): '<', ord('>'): '>', - ord('"'): '"', ord('\''): '''} - -# NB: this is a candidate for a bytes/string polymorphic interface - -def escape(s, quote=True): - """ - Replace special characters "&", "<" and ">" to HTML-safe sequences. - If the optional flag quote is true (the default), the quotation mark - characters, both double quote (") and single quote (') characters are also - translated. - """ - assert not isinstance(s, bytes), 'Pass a unicode string' - if quote: - return s.translate(_escape_map_full) - return s.translate(_escape_map) diff --git a/contrib/python/future/future/backports/html/entities.py b/contrib/python/future/future/backports/html/entities.py deleted file mode 100644 index 5c73f6923a9..00000000000 --- a/contrib/python/future/future/backports/html/entities.py +++ /dev/null @@ -1,2514 +0,0 @@ -"""HTML character entity references. - -Backported for python-future from Python 3.3 -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future.builtins import * - - -# maps the HTML entity name to the Unicode codepoint -name2codepoint = { - 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 - 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 - 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1 - 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 - 'Alpha': 0x0391, # greek capital letter alpha, U+0391 - 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 - 'Atilde': 0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1 - 'Auml': 0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1 - 'Beta': 0x0392, # greek capital letter beta, U+0392 - 'Ccedil': 0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1 - 'Chi': 0x03a7, # greek capital letter chi, U+03A7 - 'Dagger': 0x2021, # double dagger, U+2021 ISOpub - 'Delta': 0x0394, # greek capital letter delta, U+0394 ISOgrk3 - 'ETH': 0x00d0, # latin capital letter ETH, U+00D0 ISOlat1 - 'Eacute': 0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1 - 'Ecirc': 0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1 - 'Egrave': 0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1 - 'Epsilon': 0x0395, # greek capital letter epsilon, U+0395 - 'Eta': 0x0397, # greek capital letter eta, U+0397 - 'Euml': 0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1 - 'Gamma': 0x0393, # greek capital letter gamma, U+0393 ISOgrk3 - 'Iacute': 0x00cd, # latin capital letter I with acute, U+00CD ISOlat1 - 'Icirc': 0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1 - 'Igrave': 0x00cc, # latin capital letter I with grave, U+00CC ISOlat1 - 'Iota': 0x0399, # greek capital letter iota, U+0399 - 'Iuml': 0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1 - 'Kappa': 0x039a, # greek capital letter kappa, U+039A - 'Lambda': 0x039b, # greek capital letter lambda, U+039B ISOgrk3 - 'Mu': 0x039c, # greek capital letter mu, U+039C - 'Ntilde': 0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1 - 'Nu': 0x039d, # greek capital letter nu, U+039D - 'OElig': 0x0152, # latin capital ligature OE, U+0152 ISOlat2 - 'Oacute': 0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1 - 'Ocirc': 0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1 - 'Ograve': 0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1 - 'Omega': 0x03a9, # greek capital letter omega, U+03A9 ISOgrk3 - 'Omicron': 0x039f, # greek capital letter omicron, U+039F - 'Oslash': 0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 - 'Otilde': 0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1 - 'Ouml': 0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1 - 'Phi': 0x03a6, # greek capital letter phi, U+03A6 ISOgrk3 - 'Pi': 0x03a0, # greek capital letter pi, U+03A0 ISOgrk3 - 'Prime': 0x2033, # double prime = seconds = inches, U+2033 ISOtech - 'Psi': 0x03a8, # greek capital letter psi, U+03A8 ISOgrk3 - 'Rho': 0x03a1, # greek capital letter rho, U+03A1 - 'Scaron': 0x0160, # latin capital letter S with caron, U+0160 ISOlat2 - 'Sigma': 0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3 - 'THORN': 0x00de, # latin capital letter THORN, U+00DE ISOlat1 - 'Tau': 0x03a4, # greek capital letter tau, U+03A4 - 'Theta': 0x0398, # greek capital letter theta, U+0398 ISOgrk3 - 'Uacute': 0x00da, # latin capital letter U with acute, U+00DA ISOlat1 - 'Ucirc': 0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1 - 'Ugrave': 0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1 - 'Upsilon': 0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3 - 'Uuml': 0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1 - 'Xi': 0x039e, # greek capital letter xi, U+039E ISOgrk3 - 'Yacute': 0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1 - 'Yuml': 0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2 - 'Zeta': 0x0396, # greek capital letter zeta, U+0396 - 'aacute': 0x00e1, # latin small letter a with acute, U+00E1 ISOlat1 - 'acirc': 0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1 - 'acute': 0x00b4, # acute accent = spacing acute, U+00B4 ISOdia - 'aelig': 0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 - 'agrave': 0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 - 'alefsym': 0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW - 'alpha': 0x03b1, # greek small letter alpha, U+03B1 ISOgrk3 - 'amp': 0x0026, # ampersand, U+0026 ISOnum - 'and': 0x2227, # logical and = wedge, U+2227 ISOtech - 'ang': 0x2220, # angle, U+2220 ISOamso - 'aring': 0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 - 'asymp': 0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr - 'atilde': 0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1 - 'auml': 0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1 - 'bdquo': 0x201e, # double low-9 quotation mark, U+201E NEW - 'beta': 0x03b2, # greek small letter beta, U+03B2 ISOgrk3 - 'brvbar': 0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum - 'bull': 0x2022, # bullet = black small circle, U+2022 ISOpub - 'cap': 0x2229, # intersection = cap, U+2229 ISOtech - 'ccedil': 0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1 - 'cedil': 0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia - 'cent': 0x00a2, # cent sign, U+00A2 ISOnum - 'chi': 0x03c7, # greek small letter chi, U+03C7 ISOgrk3 - 'circ': 0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub - 'clubs': 0x2663, # black club suit = shamrock, U+2663 ISOpub - 'cong': 0x2245, # approximately equal to, U+2245 ISOtech - 'copy': 0x00a9, # copyright sign, U+00A9 ISOnum - 'crarr': 0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW - 'cup': 0x222a, # union = cup, U+222A ISOtech - 'curren': 0x00a4, # currency sign, U+00A4 ISOnum - 'dArr': 0x21d3, # downwards double arrow, U+21D3 ISOamsa - 'dagger': 0x2020, # dagger, U+2020 ISOpub - 'darr': 0x2193, # downwards arrow, U+2193 ISOnum - 'deg': 0x00b0, # degree sign, U+00B0 ISOnum - 'delta': 0x03b4, # greek small letter delta, U+03B4 ISOgrk3 - 'diams': 0x2666, # black diamond suit, U+2666 ISOpub - 'divide': 0x00f7, # division sign, U+00F7 ISOnum - 'eacute': 0x00e9, # latin small letter e with acute, U+00E9 ISOlat1 - 'ecirc': 0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1 - 'egrave': 0x00e8, # latin small letter e with grave, U+00E8 ISOlat1 - 'empty': 0x2205, # empty set = null set = diameter, U+2205 ISOamso - 'emsp': 0x2003, # em space, U+2003 ISOpub - 'ensp': 0x2002, # en space, U+2002 ISOpub - 'epsilon': 0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3 - 'equiv': 0x2261, # identical to, U+2261 ISOtech - 'eta': 0x03b7, # greek small letter eta, U+03B7 ISOgrk3 - 'eth': 0x00f0, # latin small letter eth, U+00F0 ISOlat1 - 'euml': 0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1 - 'euro': 0x20ac, # euro sign, U+20AC NEW - 'exist': 0x2203, # there exists, U+2203 ISOtech - 'fnof': 0x0192, # latin small f with hook = function = florin, U+0192 ISOtech - 'forall': 0x2200, # for all, U+2200 ISOtech - 'frac12': 0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum - 'frac14': 0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum - 'frac34': 0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum - 'frasl': 0x2044, # fraction slash, U+2044 NEW - 'gamma': 0x03b3, # greek small letter gamma, U+03B3 ISOgrk3 - 'ge': 0x2265, # greater-than or equal to, U+2265 ISOtech - 'gt': 0x003e, # greater-than sign, U+003E ISOnum - 'hArr': 0x21d4, # left right double arrow, U+21D4 ISOamsa - 'harr': 0x2194, # left right arrow, U+2194 ISOamsa - 'hearts': 0x2665, # black heart suit = valentine, U+2665 ISOpub - 'hellip': 0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub - 'iacute': 0x00ed, # latin small letter i with acute, U+00ED ISOlat1 - 'icirc': 0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1 - 'iexcl': 0x00a1, # inverted exclamation mark, U+00A1 ISOnum - 'igrave': 0x00ec, # latin small letter i with grave, U+00EC ISOlat1 - 'image': 0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso - 'infin': 0x221e, # infinity, U+221E ISOtech - 'int': 0x222b, # integral, U+222B ISOtech - 'iota': 0x03b9, # greek small letter iota, U+03B9 ISOgrk3 - 'iquest': 0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum - 'isin': 0x2208, # element of, U+2208 ISOtech - 'iuml': 0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1 - 'kappa': 0x03ba, # greek small letter kappa, U+03BA ISOgrk3 - 'lArr': 0x21d0, # leftwards double arrow, U+21D0 ISOtech - 'lambda': 0x03bb, # greek small letter lambda, U+03BB ISOgrk3 - 'lang': 0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech - 'laquo': 0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum - 'larr': 0x2190, # leftwards arrow, U+2190 ISOnum - 'lceil': 0x2308, # left ceiling = apl upstile, U+2308 ISOamsc - 'ldquo': 0x201c, # left double quotation mark, U+201C ISOnum - 'le': 0x2264, # less-than or equal to, U+2264 ISOtech - 'lfloor': 0x230a, # left floor = apl downstile, U+230A ISOamsc - 'lowast': 0x2217, # asterisk operator, U+2217 ISOtech - 'loz': 0x25ca, # lozenge, U+25CA ISOpub - 'lrm': 0x200e, # left-to-right mark, U+200E NEW RFC 2070 - 'lsaquo': 0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed - 'lsquo': 0x2018, # left single quotation mark, U+2018 ISOnum - 'lt': 0x003c, # less-than sign, U+003C ISOnum - 'macr': 0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia - 'mdash': 0x2014, # em dash, U+2014 ISOpub - 'micro': 0x00b5, # micro sign, U+00B5 ISOnum - 'middot': 0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum - 'minus': 0x2212, # minus sign, U+2212 ISOtech - 'mu': 0x03bc, # greek small letter mu, U+03BC ISOgrk3 - 'nabla': 0x2207, # nabla = backward difference, U+2207 ISOtech - 'nbsp': 0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum - 'ndash': 0x2013, # en dash, U+2013 ISOpub - 'ne': 0x2260, # not equal to, U+2260 ISOtech - 'ni': 0x220b, # contains as member, U+220B ISOtech - 'not': 0x00ac, # not sign, U+00AC ISOnum - 'notin': 0x2209, # not an element of, U+2209 ISOtech - 'nsub': 0x2284, # not a subset of, U+2284 ISOamsn - 'ntilde': 0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1 - 'nu': 0x03bd, # greek small letter nu, U+03BD ISOgrk3 - 'oacute': 0x00f3, # latin small letter o with acute, U+00F3 ISOlat1 - 'ocirc': 0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1 - 'oelig': 0x0153, # latin small ligature oe, U+0153 ISOlat2 - 'ograve': 0x00f2, # latin small letter o with grave, U+00F2 ISOlat1 - 'oline': 0x203e, # overline = spacing overscore, U+203E NEW - 'omega': 0x03c9, # greek small letter omega, U+03C9 ISOgrk3 - 'omicron': 0x03bf, # greek small letter omicron, U+03BF NEW - 'oplus': 0x2295, # circled plus = direct sum, U+2295 ISOamsb - 'or': 0x2228, # logical or = vee, U+2228 ISOtech - 'ordf': 0x00aa, # feminine ordinal indicator, U+00AA ISOnum - 'ordm': 0x00ba, # masculine ordinal indicator, U+00BA ISOnum - 'oslash': 0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 - 'otilde': 0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1 - 'otimes': 0x2297, # circled times = vector product, U+2297 ISOamsb - 'ouml': 0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1 - 'para': 0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum - 'part': 0x2202, # partial differential, U+2202 ISOtech - 'permil': 0x2030, # per mille sign, U+2030 ISOtech - 'perp': 0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech - 'phi': 0x03c6, # greek small letter phi, U+03C6 ISOgrk3 - 'pi': 0x03c0, # greek small letter pi, U+03C0 ISOgrk3 - 'piv': 0x03d6, # greek pi symbol, U+03D6 ISOgrk3 - 'plusmn': 0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum - 'pound': 0x00a3, # pound sign, U+00A3 ISOnum - 'prime': 0x2032, # prime = minutes = feet, U+2032 ISOtech - 'prod': 0x220f, # n-ary product = product sign, U+220F ISOamsb - 'prop': 0x221d, # proportional to, U+221D ISOtech - 'psi': 0x03c8, # greek small letter psi, U+03C8 ISOgrk3 - 'quot': 0x0022, # quotation mark = APL quote, U+0022 ISOnum - 'rArr': 0x21d2, # rightwards double arrow, U+21D2 ISOtech - 'radic': 0x221a, # square root = radical sign, U+221A ISOtech - 'rang': 0x232a, # right-pointing angle bracket = ket, U+232A ISOtech - 'raquo': 0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum - 'rarr': 0x2192, # rightwards arrow, U+2192 ISOnum - 'rceil': 0x2309, # right ceiling, U+2309 ISOamsc - 'rdquo': 0x201d, # right double quotation mark, U+201D ISOnum - 'real': 0x211c, # blackletter capital R = real part symbol, U+211C ISOamso - 'reg': 0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum - 'rfloor': 0x230b, # right floor, U+230B ISOamsc - 'rho': 0x03c1, # greek small letter rho, U+03C1 ISOgrk3 - 'rlm': 0x200f, # right-to-left mark, U+200F NEW RFC 2070 - 'rsaquo': 0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed - 'rsquo': 0x2019, # right single quotation mark, U+2019 ISOnum - 'sbquo': 0x201a, # single low-9 quotation mark, U+201A NEW - 'scaron': 0x0161, # latin small letter s with caron, U+0161 ISOlat2 - 'sdot': 0x22c5, # dot operator, U+22C5 ISOamsb - 'sect': 0x00a7, # section sign, U+00A7 ISOnum - 'shy': 0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum - 'sigma': 0x03c3, # greek small letter sigma, U+03C3 ISOgrk3 - 'sigmaf': 0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3 - 'sim': 0x223c, # tilde operator = varies with = similar to, U+223C ISOtech - 'spades': 0x2660, # black spade suit, U+2660 ISOpub - 'sub': 0x2282, # subset of, U+2282 ISOtech - 'sube': 0x2286, # subset of or equal to, U+2286 ISOtech - 'sum': 0x2211, # n-ary sumation, U+2211 ISOamsb - 'sup': 0x2283, # superset of, U+2283 ISOtech - 'sup1': 0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum - 'sup2': 0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum - 'sup3': 0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum - 'supe': 0x2287, # superset of or equal to, U+2287 ISOtech - 'szlig': 0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1 - 'tau': 0x03c4, # greek small letter tau, U+03C4 ISOgrk3 - 'there4': 0x2234, # therefore, U+2234 ISOtech - 'theta': 0x03b8, # greek small letter theta, U+03B8 ISOgrk3 - 'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW - 'thinsp': 0x2009, # thin space, U+2009 ISOpub - 'thorn': 0x00fe, # latin small letter thorn with, U+00FE ISOlat1 - 'tilde': 0x02dc, # small tilde, U+02DC ISOdia - 'times': 0x00d7, # multiplication sign, U+00D7 ISOnum - 'trade': 0x2122, # trade mark sign, U+2122 ISOnum - 'uArr': 0x21d1, # upwards double arrow, U+21D1 ISOamsa - 'uacute': 0x00fa, # latin small letter u with acute, U+00FA ISOlat1 - 'uarr': 0x2191, # upwards arrow, U+2191 ISOnum - 'ucirc': 0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1 - 'ugrave': 0x00f9, # latin small letter u with grave, U+00F9 ISOlat1 - 'uml': 0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia - 'upsih': 0x03d2, # greek upsilon with hook symbol, U+03D2 NEW - 'upsilon': 0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3 - 'uuml': 0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1 - 'weierp': 0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso - 'xi': 0x03be, # greek small letter xi, U+03BE ISOgrk3 - 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1 - 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum - 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1 - 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3 - 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070 - 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070 -} - - -# maps the HTML5 named character references to the equivalent Unicode character(s) -html5 = { - 'Aacute': '\xc1', - 'aacute': '\xe1', - 'Aacute;': '\xc1', - 'aacute;': '\xe1', - 'Abreve;': '\u0102', - 'abreve;': '\u0103', - 'ac;': '\u223e', - 'acd;': '\u223f', - 'acE;': '\u223e\u0333', - 'Acirc': '\xc2', - 'acirc': '\xe2', - 'Acirc;': '\xc2', - 'acirc;': '\xe2', - 'acute': '\xb4', - 'acute;': '\xb4', - 'Acy;': '\u0410', - 'acy;': '\u0430', - 'AElig': '\xc6', - 'aelig': '\xe6', - 'AElig;': '\xc6', - 'aelig;': '\xe6', - 'af;': '\u2061', - 'Afr;': '\U0001d504', - 'afr;': '\U0001d51e', - 'Agrave': '\xc0', - 'agrave': '\xe0', - 'Agrave;': '\xc0', - 'agrave;': '\xe0', - 'alefsym;': '\u2135', - 'aleph;': '\u2135', - 'Alpha;': '\u0391', - 'alpha;': '\u03b1', - 'Amacr;': '\u0100', - 'amacr;': '\u0101', - 'amalg;': '\u2a3f', - 'AMP': '&', - 'amp': '&', - 'AMP;': '&', - 'amp;': '&', - 'And;': '\u2a53', - 'and;': '\u2227', - 'andand;': '\u2a55', - 'andd;': '\u2a5c', - 'andslope;': '\u2a58', - 'andv;': '\u2a5a', - 'ang;': '\u2220', - 'ange;': '\u29a4', - 'angle;': '\u2220', - 'angmsd;': '\u2221', - 'angmsdaa;': '\u29a8', - 'angmsdab;': '\u29a9', - 'angmsdac;': '\u29aa', - 'angmsdad;': '\u29ab', - 'angmsdae;': '\u29ac', - 'angmsdaf;': '\u29ad', - 'angmsdag;': '\u29ae', - 'angmsdah;': '\u29af', - 'angrt;': '\u221f', - 'angrtvb;': '\u22be', - 'angrtvbd;': '\u299d', - 'angsph;': '\u2222', - 'angst;': '\xc5', - 'angzarr;': '\u237c', - 'Aogon;': '\u0104', - 'aogon;': '\u0105', - 'Aopf;': '\U0001d538', - 'aopf;': '\U0001d552', - 'ap;': '\u2248', - 'apacir;': '\u2a6f', - 'apE;': '\u2a70', - 'ape;': '\u224a', - 'apid;': '\u224b', - 'apos;': "'", - 'ApplyFunction;': '\u2061', - 'approx;': '\u2248', - 'approxeq;': '\u224a', - 'Aring': '\xc5', - 'aring': '\xe5', - 'Aring;': '\xc5', - 'aring;': '\xe5', - 'Ascr;': '\U0001d49c', - 'ascr;': '\U0001d4b6', - 'Assign;': '\u2254', - 'ast;': '*', - 'asymp;': '\u2248', - 'asympeq;': '\u224d', - 'Atilde': '\xc3', - 'atilde': '\xe3', - 'Atilde;': '\xc3', - 'atilde;': '\xe3', - 'Auml': '\xc4', - 'auml': '\xe4', - 'Auml;': '\xc4', - 'auml;': '\xe4', - 'awconint;': '\u2233', - 'awint;': '\u2a11', - 'backcong;': '\u224c', - 'backepsilon;': '\u03f6', - 'backprime;': '\u2035', - 'backsim;': '\u223d', - 'backsimeq;': '\u22cd', - 'Backslash;': '\u2216', - 'Barv;': '\u2ae7', - 'barvee;': '\u22bd', - 'Barwed;': '\u2306', - 'barwed;': '\u2305', - 'barwedge;': '\u2305', - 'bbrk;': '\u23b5', - 'bbrktbrk;': '\u23b6', - 'bcong;': '\u224c', - 'Bcy;': '\u0411', - 'bcy;': '\u0431', - 'bdquo;': '\u201e', - 'becaus;': '\u2235', - 'Because;': '\u2235', - 'because;': '\u2235', - 'bemptyv;': '\u29b0', - 'bepsi;': '\u03f6', - 'bernou;': '\u212c', - 'Bernoullis;': '\u212c', - 'Beta;': '\u0392', - 'beta;': '\u03b2', - 'beth;': '\u2136', - 'between;': '\u226c', - 'Bfr;': '\U0001d505', - 'bfr;': '\U0001d51f', - 'bigcap;': '\u22c2', - 'bigcirc;': '\u25ef', - 'bigcup;': '\u22c3', - 'bigodot;': '\u2a00', - 'bigoplus;': '\u2a01', - 'bigotimes;': '\u2a02', - 'bigsqcup;': '\u2a06', - 'bigstar;': '\u2605', - 'bigtriangledown;': '\u25bd', - 'bigtriangleup;': '\u25b3', - 'biguplus;': '\u2a04', - 'bigvee;': '\u22c1', - 'bigwedge;': '\u22c0', - 'bkarow;': '\u290d', - 'blacklozenge;': '\u29eb', - 'blacksquare;': '\u25aa', - 'blacktriangle;': '\u25b4', - 'blacktriangledown;': '\u25be', - 'blacktriangleleft;': '\u25c2', - 'blacktriangleright;': '\u25b8', - 'blank;': '\u2423', - 'blk12;': '\u2592', - 'blk14;': '\u2591', - 'blk34;': '\u2593', - 'block;': '\u2588', - 'bne;': '=\u20e5', - 'bnequiv;': '\u2261\u20e5', - 'bNot;': '\u2aed', - 'bnot;': '\u2310', - 'Bopf;': '\U0001d539', - 'bopf;': '\U0001d553', - 'bot;': '\u22a5', - 'bottom;': '\u22a5', - 'bowtie;': '\u22c8', - 'boxbox;': '\u29c9', - 'boxDL;': '\u2557', - 'boxDl;': '\u2556', - 'boxdL;': '\u2555', - 'boxdl;': '\u2510', - 'boxDR;': '\u2554', - 'boxDr;': '\u2553', - 'boxdR;': '\u2552', - 'boxdr;': '\u250c', - 'boxH;': '\u2550', - 'boxh;': '\u2500', - 'boxHD;': '\u2566', - 'boxHd;': '\u2564', - 'boxhD;': '\u2565', - 'boxhd;': '\u252c', - 'boxHU;': '\u2569', - 'boxHu;': '\u2567', - 'boxhU;': '\u2568', - 'boxhu;': '\u2534', - 'boxminus;': '\u229f', - 'boxplus;': '\u229e', - 'boxtimes;': '\u22a0', - 'boxUL;': '\u255d', - 'boxUl;': '\u255c', - 'boxuL;': '\u255b', - 'boxul;': '\u2518', - 'boxUR;': '\u255a', - 'boxUr;': '\u2559', - 'boxuR;': '\u2558', - 'boxur;': '\u2514', - 'boxV;': '\u2551', - 'boxv;': '\u2502', - 'boxVH;': '\u256c', - 'boxVh;': '\u256b', - 'boxvH;': '\u256a', - 'boxvh;': '\u253c', - 'boxVL;': '\u2563', - 'boxVl;': '\u2562', - 'boxvL;': '\u2561', - 'boxvl;': '\u2524', - 'boxVR;': '\u2560', - 'boxVr;': '\u255f', - 'boxvR;': '\u255e', - 'boxvr;': '\u251c', - 'bprime;': '\u2035', - 'Breve;': '\u02d8', - 'breve;': '\u02d8', - 'brvbar': '\xa6', - 'brvbar;': '\xa6', - 'Bscr;': '\u212c', - 'bscr;': '\U0001d4b7', - 'bsemi;': '\u204f', - 'bsim;': '\u223d', - 'bsime;': '\u22cd', - 'bsol;': '\\', - 'bsolb;': '\u29c5', - 'bsolhsub;': '\u27c8', - 'bull;': '\u2022', - 'bullet;': '\u2022', - 'bump;': '\u224e', - 'bumpE;': '\u2aae', - 'bumpe;': '\u224f', - 'Bumpeq;': '\u224e', - 'bumpeq;': '\u224f', - 'Cacute;': '\u0106', - 'cacute;': '\u0107', - 'Cap;': '\u22d2', - 'cap;': '\u2229', - 'capand;': '\u2a44', - 'capbrcup;': '\u2a49', - 'capcap;': '\u2a4b', - 'capcup;': '\u2a47', - 'capdot;': '\u2a40', - 'CapitalDifferentialD;': '\u2145', - 'caps;': '\u2229\ufe00', - 'caret;': '\u2041', - 'caron;': '\u02c7', - 'Cayleys;': '\u212d', - 'ccaps;': '\u2a4d', - 'Ccaron;': '\u010c', - 'ccaron;': '\u010d', - 'Ccedil': '\xc7', - 'ccedil': '\xe7', - 'Ccedil;': '\xc7', - 'ccedil;': '\xe7', - 'Ccirc;': '\u0108', - 'ccirc;': '\u0109', - 'Cconint;': '\u2230', - 'ccups;': '\u2a4c', - 'ccupssm;': '\u2a50', - 'Cdot;': '\u010a', - 'cdot;': '\u010b', - 'cedil': '\xb8', - 'cedil;': '\xb8', - 'Cedilla;': '\xb8', - 'cemptyv;': '\u29b2', - 'cent': '\xa2', - 'cent;': '\xa2', - 'CenterDot;': '\xb7', - 'centerdot;': '\xb7', - 'Cfr;': '\u212d', - 'cfr;': '\U0001d520', - 'CHcy;': '\u0427', - 'chcy;': '\u0447', - 'check;': '\u2713', - 'checkmark;': '\u2713', - 'Chi;': '\u03a7', - 'chi;': '\u03c7', - 'cir;': '\u25cb', - 'circ;': '\u02c6', - 'circeq;': '\u2257', - 'circlearrowleft;': '\u21ba', - 'circlearrowright;': '\u21bb', - 'circledast;': '\u229b', - 'circledcirc;': '\u229a', - 'circleddash;': '\u229d', - 'CircleDot;': '\u2299', - 'circledR;': '\xae', - 'circledS;': '\u24c8', - 'CircleMinus;': '\u2296', - 'CirclePlus;': '\u2295', - 'CircleTimes;': '\u2297', - 'cirE;': '\u29c3', - 'cire;': '\u2257', - 'cirfnint;': '\u2a10', - 'cirmid;': '\u2aef', - 'cirscir;': '\u29c2', - 'ClockwiseContourIntegral;': '\u2232', - 'CloseCurlyDoubleQuote;': '\u201d', - 'CloseCurlyQuote;': '\u2019', - 'clubs;': '\u2663', - 'clubsuit;': '\u2663', - 'Colon;': '\u2237', - 'colon;': ':', - 'Colone;': '\u2a74', - 'colone;': '\u2254', - 'coloneq;': '\u2254', - 'comma;': ',', - 'commat;': '@', - 'comp;': '\u2201', - 'compfn;': '\u2218', - 'complement;': '\u2201', - 'complexes;': '\u2102', - 'cong;': '\u2245', - 'congdot;': '\u2a6d', - 'Congruent;': '\u2261', - 'Conint;': '\u222f', - 'conint;': '\u222e', - 'ContourIntegral;': '\u222e', - 'Copf;': '\u2102', - 'copf;': '\U0001d554', - 'coprod;': '\u2210', - 'Coproduct;': '\u2210', - 'COPY': '\xa9', - 'copy': '\xa9', - 'COPY;': '\xa9', - 'copy;': '\xa9', - 'copysr;': '\u2117', - 'CounterClockwiseContourIntegral;': '\u2233', - 'crarr;': '\u21b5', - 'Cross;': '\u2a2f', - 'cross;': '\u2717', - 'Cscr;': '\U0001d49e', - 'cscr;': '\U0001d4b8', - 'csub;': '\u2acf', - 'csube;': '\u2ad1', - 'csup;': '\u2ad0', - 'csupe;': '\u2ad2', - 'ctdot;': '\u22ef', - 'cudarrl;': '\u2938', - 'cudarrr;': '\u2935', - 'cuepr;': '\u22de', - 'cuesc;': '\u22df', - 'cularr;': '\u21b6', - 'cularrp;': '\u293d', - 'Cup;': '\u22d3', - 'cup;': '\u222a', - 'cupbrcap;': '\u2a48', - 'CupCap;': '\u224d', - 'cupcap;': '\u2a46', - 'cupcup;': '\u2a4a', - 'cupdot;': '\u228d', - 'cupor;': '\u2a45', - 'cups;': '\u222a\ufe00', - 'curarr;': '\u21b7', - 'curarrm;': '\u293c', - 'curlyeqprec;': '\u22de', - 'curlyeqsucc;': '\u22df', - 'curlyvee;': '\u22ce', - 'curlywedge;': '\u22cf', - 'curren': '\xa4', - 'curren;': '\xa4', - 'curvearrowleft;': '\u21b6', - 'curvearrowright;': '\u21b7', - 'cuvee;': '\u22ce', - 'cuwed;': '\u22cf', - 'cwconint;': '\u2232', - 'cwint;': '\u2231', - 'cylcty;': '\u232d', - 'Dagger;': '\u2021', - 'dagger;': '\u2020', - 'daleth;': '\u2138', - 'Darr;': '\u21a1', - 'dArr;': '\u21d3', - 'darr;': '\u2193', - 'dash;': '\u2010', - 'Dashv;': '\u2ae4', - 'dashv;': '\u22a3', - 'dbkarow;': '\u290f', - 'dblac;': '\u02dd', - 'Dcaron;': '\u010e', - 'dcaron;': '\u010f', - 'Dcy;': '\u0414', - 'dcy;': '\u0434', - 'DD;': '\u2145', - 'dd;': '\u2146', - 'ddagger;': '\u2021', - 'ddarr;': '\u21ca', - 'DDotrahd;': '\u2911', - 'ddotseq;': '\u2a77', - 'deg': '\xb0', - 'deg;': '\xb0', - 'Del;': '\u2207', - 'Delta;': '\u0394', - 'delta;': '\u03b4', - 'demptyv;': '\u29b1', - 'dfisht;': '\u297f', - 'Dfr;': '\U0001d507', - 'dfr;': '\U0001d521', - 'dHar;': '\u2965', - 'dharl;': '\u21c3', - 'dharr;': '\u21c2', - 'DiacriticalAcute;': '\xb4', - 'DiacriticalDot;': '\u02d9', - 'DiacriticalDoubleAcute;': '\u02dd', - 'DiacriticalGrave;': '`', - 'DiacriticalTilde;': '\u02dc', - 'diam;': '\u22c4', - 'Diamond;': '\u22c4', - 'diamond;': '\u22c4', - 'diamondsuit;': '\u2666', - 'diams;': '\u2666', - 'die;': '\xa8', - 'DifferentialD;': '\u2146', - 'digamma;': '\u03dd', - 'disin;': '\u22f2', - 'div;': '\xf7', - 'divide': '\xf7', - 'divide;': '\xf7', - 'divideontimes;': '\u22c7', - 'divonx;': '\u22c7', - 'DJcy;': '\u0402', - 'djcy;': '\u0452', - 'dlcorn;': '\u231e', - 'dlcrop;': '\u230d', - 'dollar;': '$', - 'Dopf;': '\U0001d53b', - 'dopf;': '\U0001d555', - 'Dot;': '\xa8', - 'dot;': '\u02d9', - 'DotDot;': '\u20dc', - 'doteq;': '\u2250', - 'doteqdot;': '\u2251', - 'DotEqual;': '\u2250', - 'dotminus;': '\u2238', - 'dotplus;': '\u2214', - 'dotsquare;': '\u22a1', - 'doublebarwedge;': '\u2306', - 'DoubleContourIntegral;': '\u222f', - 'DoubleDot;': '\xa8', - 'DoubleDownArrow;': '\u21d3', - 'DoubleLeftArrow;': '\u21d0', - 'DoubleLeftRightArrow;': '\u21d4', - 'DoubleLeftTee;': '\u2ae4', - 'DoubleLongLeftArrow;': '\u27f8', - 'DoubleLongLeftRightArrow;': '\u27fa', - 'DoubleLongRightArrow;': '\u27f9', - 'DoubleRightArrow;': '\u21d2', - 'DoubleRightTee;': '\u22a8', - 'DoubleUpArrow;': '\u21d1', - 'DoubleUpDownArrow;': '\u21d5', - 'DoubleVerticalBar;': '\u2225', - 'DownArrow;': '\u2193', - 'Downarrow;': '\u21d3', - 'downarrow;': '\u2193', - 'DownArrowBar;': '\u2913', - 'DownArrowUpArrow;': '\u21f5', - 'DownBreve;': '\u0311', - 'downdownarrows;': '\u21ca', - 'downharpoonleft;': '\u21c3', - 'downharpoonright;': '\u21c2', - 'DownLeftRightVector;': '\u2950', - 'DownLeftTeeVector;': '\u295e', - 'DownLeftVector;': '\u21bd', - 'DownLeftVectorBar;': '\u2956', - 'DownRightTeeVector;': '\u295f', - 'DownRightVector;': '\u21c1', - 'DownRightVectorBar;': '\u2957', - 'DownTee;': '\u22a4', - 'DownTeeArrow;': '\u21a7', - 'drbkarow;': '\u2910', - 'drcorn;': '\u231f', - 'drcrop;': '\u230c', - 'Dscr;': '\U0001d49f', - 'dscr;': '\U0001d4b9', - 'DScy;': '\u0405', - 'dscy;': '\u0455', - 'dsol;': '\u29f6', - 'Dstrok;': '\u0110', - 'dstrok;': '\u0111', - 'dtdot;': '\u22f1', - 'dtri;': '\u25bf', - 'dtrif;': '\u25be', - 'duarr;': '\u21f5', - 'duhar;': '\u296f', - 'dwangle;': '\u29a6', - 'DZcy;': '\u040f', - 'dzcy;': '\u045f', - 'dzigrarr;': '\u27ff', - 'Eacute': '\xc9', - 'eacute': '\xe9', - 'Eacute;': '\xc9', - 'eacute;': '\xe9', - 'easter;': '\u2a6e', - 'Ecaron;': '\u011a', - 'ecaron;': '\u011b', - 'ecir;': '\u2256', - 'Ecirc': '\xca', - 'ecirc': '\xea', - 'Ecirc;': '\xca', - 'ecirc;': '\xea', - 'ecolon;': '\u2255', - 'Ecy;': '\u042d', - 'ecy;': '\u044d', - 'eDDot;': '\u2a77', - 'Edot;': '\u0116', - 'eDot;': '\u2251', - 'edot;': '\u0117', - 'ee;': '\u2147', - 'efDot;': '\u2252', - 'Efr;': '\U0001d508', - 'efr;': '\U0001d522', - 'eg;': '\u2a9a', - 'Egrave': '\xc8', - 'egrave': '\xe8', - 'Egrave;': '\xc8', - 'egrave;': '\xe8', - 'egs;': '\u2a96', - 'egsdot;': '\u2a98', - 'el;': '\u2a99', - 'Element;': '\u2208', - 'elinters;': '\u23e7', - 'ell;': '\u2113', - 'els;': '\u2a95', - 'elsdot;': '\u2a97', - 'Emacr;': '\u0112', - 'emacr;': '\u0113', - 'empty;': '\u2205', - 'emptyset;': '\u2205', - 'EmptySmallSquare;': '\u25fb', - 'emptyv;': '\u2205', - 'EmptyVerySmallSquare;': '\u25ab', - 'emsp13;': '\u2004', - 'emsp14;': '\u2005', - 'emsp;': '\u2003', - 'ENG;': '\u014a', - 'eng;': '\u014b', - 'ensp;': '\u2002', - 'Eogon;': '\u0118', - 'eogon;': '\u0119', - 'Eopf;': '\U0001d53c', - 'eopf;': '\U0001d556', - 'epar;': '\u22d5', - 'eparsl;': '\u29e3', - 'eplus;': '\u2a71', - 'epsi;': '\u03b5', - 'Epsilon;': '\u0395', - 'epsilon;': '\u03b5', - 'epsiv;': '\u03f5', - 'eqcirc;': '\u2256', - 'eqcolon;': '\u2255', - 'eqsim;': '\u2242', - 'eqslantgtr;': '\u2a96', - 'eqslantless;': '\u2a95', - 'Equal;': '\u2a75', - 'equals;': '=', - 'EqualTilde;': '\u2242', - 'equest;': '\u225f', - 'Equilibrium;': '\u21cc', - 'equiv;': '\u2261', - 'equivDD;': '\u2a78', - 'eqvparsl;': '\u29e5', - 'erarr;': '\u2971', - 'erDot;': '\u2253', - 'Escr;': '\u2130', - 'escr;': '\u212f', - 'esdot;': '\u2250', - 'Esim;': '\u2a73', - 'esim;': '\u2242', - 'Eta;': '\u0397', - 'eta;': '\u03b7', - 'ETH': '\xd0', - 'eth': '\xf0', - 'ETH;': '\xd0', - 'eth;': '\xf0', - 'Euml': '\xcb', - 'euml': '\xeb', - 'Euml;': '\xcb', - 'euml;': '\xeb', - 'euro;': '\u20ac', - 'excl;': '!', - 'exist;': '\u2203', - 'Exists;': '\u2203', - 'expectation;': '\u2130', - 'ExponentialE;': '\u2147', - 'exponentiale;': '\u2147', - 'fallingdotseq;': '\u2252', - 'Fcy;': '\u0424', - 'fcy;': '\u0444', - 'female;': '\u2640', - 'ffilig;': '\ufb03', - 'fflig;': '\ufb00', - 'ffllig;': '\ufb04', - 'Ffr;': '\U0001d509', - 'ffr;': '\U0001d523', - 'filig;': '\ufb01', - 'FilledSmallSquare;': '\u25fc', - 'FilledVerySmallSquare;': '\u25aa', - 'fjlig;': 'fj', - 'flat;': '\u266d', - 'fllig;': '\ufb02', - 'fltns;': '\u25b1', - 'fnof;': '\u0192', - 'Fopf;': '\U0001d53d', - 'fopf;': '\U0001d557', - 'ForAll;': '\u2200', - 'forall;': '\u2200', - 'fork;': '\u22d4', - 'forkv;': '\u2ad9', - 'Fouriertrf;': '\u2131', - 'fpartint;': '\u2a0d', - 'frac12': '\xbd', - 'frac12;': '\xbd', - 'frac13;': '\u2153', - 'frac14': '\xbc', - 'frac14;': '\xbc', - 'frac15;': '\u2155', - 'frac16;': '\u2159', - 'frac18;': '\u215b', - 'frac23;': '\u2154', - 'frac25;': '\u2156', - 'frac34': '\xbe', - 'frac34;': '\xbe', - 'frac35;': '\u2157', - 'frac38;': '\u215c', - 'frac45;': '\u2158', - 'frac56;': '\u215a', - 'frac58;': '\u215d', - 'frac78;': '\u215e', - 'frasl;': '\u2044', - 'frown;': '\u2322', - 'Fscr;': '\u2131', - 'fscr;': '\U0001d4bb', - 'gacute;': '\u01f5', - 'Gamma;': '\u0393', - 'gamma;': '\u03b3', - 'Gammad;': '\u03dc', - 'gammad;': '\u03dd', - 'gap;': '\u2a86', - 'Gbreve;': '\u011e', - 'gbreve;': '\u011f', - 'Gcedil;': '\u0122', - 'Gcirc;': '\u011c', - 'gcirc;': '\u011d', - 'Gcy;': '\u0413', - 'gcy;': '\u0433', - 'Gdot;': '\u0120', - 'gdot;': '\u0121', - 'gE;': '\u2267', - 'ge;': '\u2265', - 'gEl;': '\u2a8c', - 'gel;': '\u22db', - 'geq;': '\u2265', - 'geqq;': '\u2267', - 'geqslant;': '\u2a7e', - 'ges;': '\u2a7e', - 'gescc;': '\u2aa9', - 'gesdot;': '\u2a80', - 'gesdoto;': '\u2a82', - 'gesdotol;': '\u2a84', - 'gesl;': '\u22db\ufe00', - 'gesles;': '\u2a94', - 'Gfr;': '\U0001d50a', - 'gfr;': '\U0001d524', - 'Gg;': '\u22d9', - 'gg;': '\u226b', - 'ggg;': '\u22d9', - 'gimel;': '\u2137', - 'GJcy;': '\u0403', - 'gjcy;': '\u0453', - 'gl;': '\u2277', - 'gla;': '\u2aa5', - 'glE;': '\u2a92', - 'glj;': '\u2aa4', - 'gnap;': '\u2a8a', - 'gnapprox;': '\u2a8a', - 'gnE;': '\u2269', - 'gne;': '\u2a88', - 'gneq;': '\u2a88', - 'gneqq;': '\u2269', - 'gnsim;': '\u22e7', - 'Gopf;': '\U0001d53e', - 'gopf;': '\U0001d558', - 'grave;': '`', - 'GreaterEqual;': '\u2265', - 'GreaterEqualLess;': '\u22db', - 'GreaterFullEqual;': '\u2267', - 'GreaterGreater;': '\u2aa2', - 'GreaterLess;': '\u2277', - 'GreaterSlantEqual;': '\u2a7e', - 'GreaterTilde;': '\u2273', - 'Gscr;': '\U0001d4a2', - 'gscr;': '\u210a', - 'gsim;': '\u2273', - 'gsime;': '\u2a8e', - 'gsiml;': '\u2a90', - 'GT': '>', - 'gt': '>', - 'GT;': '>', - 'Gt;': '\u226b', - 'gt;': '>', - 'gtcc;': '\u2aa7', - 'gtcir;': '\u2a7a', - 'gtdot;': '\u22d7', - 'gtlPar;': '\u2995', - 'gtquest;': '\u2a7c', - 'gtrapprox;': '\u2a86', - 'gtrarr;': '\u2978', - 'gtrdot;': '\u22d7', - 'gtreqless;': '\u22db', - 'gtreqqless;': '\u2a8c', - 'gtrless;': '\u2277', - 'gtrsim;': '\u2273', - 'gvertneqq;': '\u2269\ufe00', - 'gvnE;': '\u2269\ufe00', - 'Hacek;': '\u02c7', - 'hairsp;': '\u200a', - 'half;': '\xbd', - 'hamilt;': '\u210b', - 'HARDcy;': '\u042a', - 'hardcy;': '\u044a', - 'hArr;': '\u21d4', - 'harr;': '\u2194', - 'harrcir;': '\u2948', - 'harrw;': '\u21ad', - 'Hat;': '^', - 'hbar;': '\u210f', - 'Hcirc;': '\u0124', - 'hcirc;': '\u0125', - 'hearts;': '\u2665', - 'heartsuit;': '\u2665', - 'hellip;': '\u2026', - 'hercon;': '\u22b9', - 'Hfr;': '\u210c', - 'hfr;': '\U0001d525', - 'HilbertSpace;': '\u210b', - 'hksearow;': '\u2925', - 'hkswarow;': '\u2926', - 'hoarr;': '\u21ff', - 'homtht;': '\u223b', - 'hookleftarrow;': '\u21a9', - 'hookrightarrow;': '\u21aa', - 'Hopf;': '\u210d', - 'hopf;': '\U0001d559', - 'horbar;': '\u2015', - 'HorizontalLine;': '\u2500', - 'Hscr;': '\u210b', - 'hscr;': '\U0001d4bd', - 'hslash;': '\u210f', - 'Hstrok;': '\u0126', - 'hstrok;': '\u0127', - 'HumpDownHump;': '\u224e', - 'HumpEqual;': '\u224f', - 'hybull;': '\u2043', - 'hyphen;': '\u2010', - 'Iacute': '\xcd', - 'iacute': '\xed', - 'Iacute;': '\xcd', - 'iacute;': '\xed', - 'ic;': '\u2063', - 'Icirc': '\xce', - 'icirc': '\xee', - 'Icirc;': '\xce', - 'icirc;': '\xee', - 'Icy;': '\u0418', - 'icy;': '\u0438', - 'Idot;': '\u0130', - 'IEcy;': '\u0415', - 'iecy;': '\u0435', - 'iexcl': '\xa1', - 'iexcl;': '\xa1', - 'iff;': '\u21d4', - 'Ifr;': '\u2111', - 'ifr;': '\U0001d526', - 'Igrave': '\xcc', - 'igrave': '\xec', - 'Igrave;': '\xcc', - 'igrave;': '\xec', - 'ii;': '\u2148', - 'iiiint;': '\u2a0c', - 'iiint;': '\u222d', - 'iinfin;': '\u29dc', - 'iiota;': '\u2129', - 'IJlig;': '\u0132', - 'ijlig;': '\u0133', - 'Im;': '\u2111', - 'Imacr;': '\u012a', - 'imacr;': '\u012b', - 'image;': '\u2111', - 'ImaginaryI;': '\u2148', - 'imagline;': '\u2110', - 'imagpart;': '\u2111', - 'imath;': '\u0131', - 'imof;': '\u22b7', - 'imped;': '\u01b5', - 'Implies;': '\u21d2', - 'in;': '\u2208', - 'incare;': '\u2105', - 'infin;': '\u221e', - 'infintie;': '\u29dd', - 'inodot;': '\u0131', - 'Int;': '\u222c', - 'int;': '\u222b', - 'intcal;': '\u22ba', - 'integers;': '\u2124', - 'Integral;': '\u222b', - 'intercal;': '\u22ba', - 'Intersection;': '\u22c2', - 'intlarhk;': '\u2a17', - 'intprod;': '\u2a3c', - 'InvisibleComma;': '\u2063', - 'InvisibleTimes;': '\u2062', - 'IOcy;': '\u0401', - 'iocy;': '\u0451', - 'Iogon;': '\u012e', - 'iogon;': '\u012f', - 'Iopf;': '\U0001d540', - 'iopf;': '\U0001d55a', - 'Iota;': '\u0399', - 'iota;': '\u03b9', - 'iprod;': '\u2a3c', - 'iquest': '\xbf', - 'iquest;': '\xbf', - 'Iscr;': '\u2110', - 'iscr;': '\U0001d4be', - 'isin;': '\u2208', - 'isindot;': '\u22f5', - 'isinE;': '\u22f9', - 'isins;': '\u22f4', - 'isinsv;': '\u22f3', - 'isinv;': '\u2208', - 'it;': '\u2062', - 'Itilde;': '\u0128', - 'itilde;': '\u0129', - 'Iukcy;': '\u0406', - 'iukcy;': '\u0456', - 'Iuml': '\xcf', - 'iuml': '\xef', - 'Iuml;': '\xcf', - 'iuml;': '\xef', - 'Jcirc;': '\u0134', - 'jcirc;': '\u0135', - 'Jcy;': '\u0419', - 'jcy;': '\u0439', - 'Jfr;': '\U0001d50d', - 'jfr;': '\U0001d527', - 'jmath;': '\u0237', - 'Jopf;': '\U0001d541', - 'jopf;': '\U0001d55b', - 'Jscr;': '\U0001d4a5', - 'jscr;': '\U0001d4bf', - 'Jsercy;': '\u0408', - 'jsercy;': '\u0458', - 'Jukcy;': '\u0404', - 'jukcy;': '\u0454', - 'Kappa;': '\u039a', - 'kappa;': '\u03ba', - 'kappav;': '\u03f0', - 'Kcedil;': '\u0136', - 'kcedil;': '\u0137', - 'Kcy;': '\u041a', - 'kcy;': '\u043a', - 'Kfr;': '\U0001d50e', - 'kfr;': '\U0001d528', - 'kgreen;': '\u0138', - 'KHcy;': '\u0425', - 'khcy;': '\u0445', - 'KJcy;': '\u040c', - 'kjcy;': '\u045c', - 'Kopf;': '\U0001d542', - 'kopf;': '\U0001d55c', - 'Kscr;': '\U0001d4a6', - 'kscr;': '\U0001d4c0', - 'lAarr;': '\u21da', - 'Lacute;': '\u0139', - 'lacute;': '\u013a', - 'laemptyv;': '\u29b4', - 'lagran;': '\u2112', - 'Lambda;': '\u039b', - 'lambda;': '\u03bb', - 'Lang;': '\u27ea', - 'lang;': '\u27e8', - 'langd;': '\u2991', - 'langle;': '\u27e8', - 'lap;': '\u2a85', - 'Laplacetrf;': '\u2112', - 'laquo': '\xab', - 'laquo;': '\xab', - 'Larr;': '\u219e', - 'lArr;': '\u21d0', - 'larr;': '\u2190', - 'larrb;': '\u21e4', - 'larrbfs;': '\u291f', - 'larrfs;': '\u291d', - 'larrhk;': '\u21a9', - 'larrlp;': '\u21ab', - 'larrpl;': '\u2939', - 'larrsim;': '\u2973', - 'larrtl;': '\u21a2', - 'lat;': '\u2aab', - 'lAtail;': '\u291b', - 'latail;': '\u2919', - 'late;': '\u2aad', - 'lates;': '\u2aad\ufe00', - 'lBarr;': '\u290e', - 'lbarr;': '\u290c', - 'lbbrk;': '\u2772', - 'lbrace;': '{', - 'lbrack;': '[', - 'lbrke;': '\u298b', - 'lbrksld;': '\u298f', - 'lbrkslu;': '\u298d', - 'Lcaron;': '\u013d', - 'lcaron;': '\u013e', - 'Lcedil;': '\u013b', - 'lcedil;': '\u013c', - 'lceil;': '\u2308', - 'lcub;': '{', - 'Lcy;': '\u041b', - 'lcy;': '\u043b', - 'ldca;': '\u2936', - 'ldquo;': '\u201c', - 'ldquor;': '\u201e', - 'ldrdhar;': '\u2967', - 'ldrushar;': '\u294b', - 'ldsh;': '\u21b2', - 'lE;': '\u2266', - 'le;': '\u2264', - 'LeftAngleBracket;': '\u27e8', - 'LeftArrow;': '\u2190', - 'Leftarrow;': '\u21d0', - 'leftarrow;': '\u2190', - 'LeftArrowBar;': '\u21e4', - 'LeftArrowRightArrow;': '\u21c6', - 'leftarrowtail;': '\u21a2', - 'LeftCeiling;': '\u2308', - 'LeftDoubleBracket;': '\u27e6', - 'LeftDownTeeVector;': '\u2961', - 'LeftDownVector;': '\u21c3', - 'LeftDownVectorBar;': '\u2959', - 'LeftFloor;': '\u230a', - 'leftharpoondown;': '\u21bd', - 'leftharpoonup;': '\u21bc', - 'leftleftarrows;': '\u21c7', - 'LeftRightArrow;': '\u2194', - 'Leftrightarrow;': '\u21d4', - 'leftrightarrow;': '\u2194', - 'leftrightarrows;': '\u21c6', - 'leftrightharpoons;': '\u21cb', - 'leftrightsquigarrow;': '\u21ad', - 'LeftRightVector;': '\u294e', - 'LeftTee;': '\u22a3', - 'LeftTeeArrow;': '\u21a4', - 'LeftTeeVector;': '\u295a', - 'leftthreetimes;': '\u22cb', - 'LeftTriangle;': '\u22b2', - 'LeftTriangleBar;': '\u29cf', - 'LeftTriangleEqual;': '\u22b4', - 'LeftUpDownVector;': '\u2951', - 'LeftUpTeeVector;': '\u2960', - 'LeftUpVector;': '\u21bf', - 'LeftUpVectorBar;': '\u2958', - 'LeftVector;': '\u21bc', - 'LeftVectorBar;': '\u2952', - 'lEg;': '\u2a8b', - 'leg;': '\u22da', - 'leq;': '\u2264', - 'leqq;': '\u2266', - 'leqslant;': '\u2a7d', - 'les;': '\u2a7d', - 'lescc;': '\u2aa8', - 'lesdot;': '\u2a7f', - 'lesdoto;': '\u2a81', - 'lesdotor;': '\u2a83', - 'lesg;': '\u22da\ufe00', - 'lesges;': '\u2a93', - 'lessapprox;': '\u2a85', - 'lessdot;': '\u22d6', - 'lesseqgtr;': '\u22da', - 'lesseqqgtr;': '\u2a8b', - 'LessEqualGreater;': '\u22da', - 'LessFullEqual;': '\u2266', - 'LessGreater;': '\u2276', - 'lessgtr;': '\u2276', - 'LessLess;': '\u2aa1', - 'lesssim;': '\u2272', - 'LessSlantEqual;': '\u2a7d', - 'LessTilde;': '\u2272', - 'lfisht;': '\u297c', - 'lfloor;': '\u230a', - 'Lfr;': '\U0001d50f', - 'lfr;': '\U0001d529', - 'lg;': '\u2276', - 'lgE;': '\u2a91', - 'lHar;': '\u2962', - 'lhard;': '\u21bd', - 'lharu;': '\u21bc', - 'lharul;': '\u296a', - 'lhblk;': '\u2584', - 'LJcy;': '\u0409', - 'ljcy;': '\u0459', - 'Ll;': '\u22d8', - 'll;': '\u226a', - 'llarr;': '\u21c7', - 'llcorner;': '\u231e', - 'Lleftarrow;': '\u21da', - 'llhard;': '\u296b', - 'lltri;': '\u25fa', - 'Lmidot;': '\u013f', - 'lmidot;': '\u0140', - 'lmoust;': '\u23b0', - 'lmoustache;': '\u23b0', - 'lnap;': '\u2a89', - 'lnapprox;': '\u2a89', - 'lnE;': '\u2268', - 'lne;': '\u2a87', - 'lneq;': '\u2a87', - 'lneqq;': '\u2268', - 'lnsim;': '\u22e6', - 'loang;': '\u27ec', - 'loarr;': '\u21fd', - 'lobrk;': '\u27e6', - 'LongLeftArrow;': '\u27f5', - 'Longleftarrow;': '\u27f8', - 'longleftarrow;': '\u27f5', - 'LongLeftRightArrow;': '\u27f7', - 'Longleftrightarrow;': '\u27fa', - 'longleftrightarrow;': '\u27f7', - 'longmapsto;': '\u27fc', - 'LongRightArrow;': '\u27f6', - 'Longrightarrow;': '\u27f9', - 'longrightarrow;': '\u27f6', - 'looparrowleft;': '\u21ab', - 'looparrowright;': '\u21ac', - 'lopar;': '\u2985', - 'Lopf;': '\U0001d543', - 'lopf;': '\U0001d55d', - 'loplus;': '\u2a2d', - 'lotimes;': '\u2a34', - 'lowast;': '\u2217', - 'lowbar;': '_', - 'LowerLeftArrow;': '\u2199', - 'LowerRightArrow;': '\u2198', - 'loz;': '\u25ca', - 'lozenge;': '\u25ca', - 'lozf;': '\u29eb', - 'lpar;': '(', - 'lparlt;': '\u2993', - 'lrarr;': '\u21c6', - 'lrcorner;': '\u231f', - 'lrhar;': '\u21cb', - 'lrhard;': '\u296d', - 'lrm;': '\u200e', - 'lrtri;': '\u22bf', - 'lsaquo;': '\u2039', - 'Lscr;': '\u2112', - 'lscr;': '\U0001d4c1', - 'Lsh;': '\u21b0', - 'lsh;': '\u21b0', - 'lsim;': '\u2272', - 'lsime;': '\u2a8d', - 'lsimg;': '\u2a8f', - 'lsqb;': '[', - 'lsquo;': '\u2018', - 'lsquor;': '\u201a', - 'Lstrok;': '\u0141', - 'lstrok;': '\u0142', - 'LT': '<', - 'lt': '<', - 'LT;': '<', - 'Lt;': '\u226a', - 'lt;': '<', - 'ltcc;': '\u2aa6', - 'ltcir;': '\u2a79', - 'ltdot;': '\u22d6', - 'lthree;': '\u22cb', - 'ltimes;': '\u22c9', - 'ltlarr;': '\u2976', - 'ltquest;': '\u2a7b', - 'ltri;': '\u25c3', - 'ltrie;': '\u22b4', - 'ltrif;': '\u25c2', - 'ltrPar;': '\u2996', - 'lurdshar;': '\u294a', - 'luruhar;': '\u2966', - 'lvertneqq;': '\u2268\ufe00', - 'lvnE;': '\u2268\ufe00', - 'macr': '\xaf', - 'macr;': '\xaf', - 'male;': '\u2642', - 'malt;': '\u2720', - 'maltese;': '\u2720', - 'Map;': '\u2905', - 'map;': '\u21a6', - 'mapsto;': '\u21a6', - 'mapstodown;': '\u21a7', - 'mapstoleft;': '\u21a4', - 'mapstoup;': '\u21a5', - 'marker;': '\u25ae', - 'mcomma;': '\u2a29', - 'Mcy;': '\u041c', - 'mcy;': '\u043c', - 'mdash;': '\u2014', - 'mDDot;': '\u223a', - 'measuredangle;': '\u2221', - 'MediumSpace;': '\u205f', - 'Mellintrf;': '\u2133', - 'Mfr;': '\U0001d510', - 'mfr;': '\U0001d52a', - 'mho;': '\u2127', - 'micro': '\xb5', - 'micro;': '\xb5', - 'mid;': '\u2223', - 'midast;': '*', - 'midcir;': '\u2af0', - 'middot': '\xb7', - 'middot;': '\xb7', - 'minus;': '\u2212', - 'minusb;': '\u229f', - 'minusd;': '\u2238', - 'minusdu;': '\u2a2a', - 'MinusPlus;': '\u2213', - 'mlcp;': '\u2adb', - 'mldr;': '\u2026', - 'mnplus;': '\u2213', - 'models;': '\u22a7', - 'Mopf;': '\U0001d544', - 'mopf;': '\U0001d55e', - 'mp;': '\u2213', - 'Mscr;': '\u2133', - 'mscr;': '\U0001d4c2', - 'mstpos;': '\u223e', - 'Mu;': '\u039c', - 'mu;': '\u03bc', - 'multimap;': '\u22b8', - 'mumap;': '\u22b8', - 'nabla;': '\u2207', - 'Nacute;': '\u0143', - 'nacute;': '\u0144', - 'nang;': '\u2220\u20d2', - 'nap;': '\u2249', - 'napE;': '\u2a70\u0338', - 'napid;': '\u224b\u0338', - 'napos;': '\u0149', - 'napprox;': '\u2249', - 'natur;': '\u266e', - 'natural;': '\u266e', - 'naturals;': '\u2115', - 'nbsp': '\xa0', - 'nbsp;': '\xa0', - 'nbump;': '\u224e\u0338', - 'nbumpe;': '\u224f\u0338', - 'ncap;': '\u2a43', - 'Ncaron;': '\u0147', - 'ncaron;': '\u0148', - 'Ncedil;': '\u0145', - 'ncedil;': '\u0146', - 'ncong;': '\u2247', - 'ncongdot;': '\u2a6d\u0338', - 'ncup;': '\u2a42', - 'Ncy;': '\u041d', - 'ncy;': '\u043d', - 'ndash;': '\u2013', - 'ne;': '\u2260', - 'nearhk;': '\u2924', - 'neArr;': '\u21d7', - 'nearr;': '\u2197', - 'nearrow;': '\u2197', - 'nedot;': '\u2250\u0338', - 'NegativeMediumSpace;': '\u200b', - 'NegativeThickSpace;': '\u200b', - 'NegativeThinSpace;': '\u200b', - 'NegativeVeryThinSpace;': '\u200b', - 'nequiv;': '\u2262', - 'nesear;': '\u2928', - 'nesim;': '\u2242\u0338', - 'NestedGreaterGreater;': '\u226b', - 'NestedLessLess;': '\u226a', - 'NewLine;': '\n', - 'nexist;': '\u2204', - 'nexists;': '\u2204', - 'Nfr;': '\U0001d511', - 'nfr;': '\U0001d52b', - 'ngE;': '\u2267\u0338', - 'nge;': '\u2271', - 'ngeq;': '\u2271', - 'ngeqq;': '\u2267\u0338', - 'ngeqslant;': '\u2a7e\u0338', - 'nges;': '\u2a7e\u0338', - 'nGg;': '\u22d9\u0338', - 'ngsim;': '\u2275', - 'nGt;': '\u226b\u20d2', - 'ngt;': '\u226f', - 'ngtr;': '\u226f', - 'nGtv;': '\u226b\u0338', - 'nhArr;': '\u21ce', - 'nharr;': '\u21ae', - 'nhpar;': '\u2af2', - 'ni;': '\u220b', - 'nis;': '\u22fc', - 'nisd;': '\u22fa', - 'niv;': '\u220b', - 'NJcy;': '\u040a', - 'njcy;': '\u045a', - 'nlArr;': '\u21cd', - 'nlarr;': '\u219a', - 'nldr;': '\u2025', - 'nlE;': '\u2266\u0338', - 'nle;': '\u2270', - 'nLeftarrow;': '\u21cd', - 'nleftarrow;': '\u219a', - 'nLeftrightarrow;': '\u21ce', - 'nleftrightarrow;': '\u21ae', - 'nleq;': '\u2270', - 'nleqq;': '\u2266\u0338', - 'nleqslant;': '\u2a7d\u0338', - 'nles;': '\u2a7d\u0338', - 'nless;': '\u226e', - 'nLl;': '\u22d8\u0338', - 'nlsim;': '\u2274', - 'nLt;': '\u226a\u20d2', - 'nlt;': '\u226e', - 'nltri;': '\u22ea', - 'nltrie;': '\u22ec', - 'nLtv;': '\u226a\u0338', - 'nmid;': '\u2224', - 'NoBreak;': '\u2060', - 'NonBreakingSpace;': '\xa0', - 'Nopf;': '\u2115', - 'nopf;': '\U0001d55f', - 'not': '\xac', - 'Not;': '\u2aec', - 'not;': '\xac', - 'NotCongruent;': '\u2262', - 'NotCupCap;': '\u226d', - 'NotDoubleVerticalBar;': '\u2226', - 'NotElement;': '\u2209', - 'NotEqual;': '\u2260', - 'NotEqualTilde;': '\u2242\u0338', - 'NotExists;': '\u2204', - 'NotGreater;': '\u226f', - 'NotGreaterEqual;': '\u2271', - 'NotGreaterFullEqual;': '\u2267\u0338', - 'NotGreaterGreater;': '\u226b\u0338', - 'NotGreaterLess;': '\u2279', - 'NotGreaterSlantEqual;': '\u2a7e\u0338', - 'NotGreaterTilde;': '\u2275', - 'NotHumpDownHump;': '\u224e\u0338', - 'NotHumpEqual;': '\u224f\u0338', - 'notin;': '\u2209', - 'notindot;': '\u22f5\u0338', - 'notinE;': '\u22f9\u0338', - 'notinva;': '\u2209', - 'notinvb;': '\u22f7', - 'notinvc;': '\u22f6', - 'NotLeftTriangle;': '\u22ea', - 'NotLeftTriangleBar;': '\u29cf\u0338', - 'NotLeftTriangleEqual;': '\u22ec', - 'NotLess;': '\u226e', - 'NotLessEqual;': '\u2270', - 'NotLessGreater;': '\u2278', - 'NotLessLess;': '\u226a\u0338', - 'NotLessSlantEqual;': '\u2a7d\u0338', - 'NotLessTilde;': '\u2274', - 'NotNestedGreaterGreater;': '\u2aa2\u0338', - 'NotNestedLessLess;': '\u2aa1\u0338', - 'notni;': '\u220c', - 'notniva;': '\u220c', - 'notnivb;': '\u22fe', - 'notnivc;': '\u22fd', - 'NotPrecedes;': '\u2280', - 'NotPrecedesEqual;': '\u2aaf\u0338', - 'NotPrecedesSlantEqual;': '\u22e0', - 'NotReverseElement;': '\u220c', - 'NotRightTriangle;': '\u22eb', - 'NotRightTriangleBar;': '\u29d0\u0338', - 'NotRightTriangleEqual;': '\u22ed', - 'NotSquareSubset;': '\u228f\u0338', - 'NotSquareSubsetEqual;': '\u22e2', - 'NotSquareSuperset;': '\u2290\u0338', - 'NotSquareSupersetEqual;': '\u22e3', - 'NotSubset;': '\u2282\u20d2', - 'NotSubsetEqual;': '\u2288', - 'NotSucceeds;': '\u2281', - 'NotSucceedsEqual;': '\u2ab0\u0338', - 'NotSucceedsSlantEqual;': '\u22e1', - 'NotSucceedsTilde;': '\u227f\u0338', - 'NotSuperset;': '\u2283\u20d2', - 'NotSupersetEqual;': '\u2289', - 'NotTilde;': '\u2241', - 'NotTildeEqual;': '\u2244', - 'NotTildeFullEqual;': '\u2247', - 'NotTildeTilde;': '\u2249', - 'NotVerticalBar;': '\u2224', - 'npar;': '\u2226', - 'nparallel;': '\u2226', - 'nparsl;': '\u2afd\u20e5', - 'npart;': '\u2202\u0338', - 'npolint;': '\u2a14', - 'npr;': '\u2280', - 'nprcue;': '\u22e0', - 'npre;': '\u2aaf\u0338', - 'nprec;': '\u2280', - 'npreceq;': '\u2aaf\u0338', - 'nrArr;': '\u21cf', - 'nrarr;': '\u219b', - 'nrarrc;': '\u2933\u0338', - 'nrarrw;': '\u219d\u0338', - 'nRightarrow;': '\u21cf', - 'nrightarrow;': '\u219b', - 'nrtri;': '\u22eb', - 'nrtrie;': '\u22ed', - 'nsc;': '\u2281', - 'nsccue;': '\u22e1', - 'nsce;': '\u2ab0\u0338', - 'Nscr;': '\U0001d4a9', - 'nscr;': '\U0001d4c3', - 'nshortmid;': '\u2224', - 'nshortparallel;': '\u2226', - 'nsim;': '\u2241', - 'nsime;': '\u2244', - 'nsimeq;': '\u2244', - 'nsmid;': '\u2224', - 'nspar;': '\u2226', - 'nsqsube;': '\u22e2', - 'nsqsupe;': '\u22e3', - 'nsub;': '\u2284', - 'nsubE;': '\u2ac5\u0338', - 'nsube;': '\u2288', - 'nsubset;': '\u2282\u20d2', - 'nsubseteq;': '\u2288', - 'nsubseteqq;': '\u2ac5\u0338', - 'nsucc;': '\u2281', - 'nsucceq;': '\u2ab0\u0338', - 'nsup;': '\u2285', - 'nsupE;': '\u2ac6\u0338', - 'nsupe;': '\u2289', - 'nsupset;': '\u2283\u20d2', - 'nsupseteq;': '\u2289', - 'nsupseteqq;': '\u2ac6\u0338', - 'ntgl;': '\u2279', - 'Ntilde': '\xd1', - 'ntilde': '\xf1', - 'Ntilde;': '\xd1', - 'ntilde;': '\xf1', - 'ntlg;': '\u2278', - 'ntriangleleft;': '\u22ea', - 'ntrianglelefteq;': '\u22ec', - 'ntriangleright;': '\u22eb', - 'ntrianglerighteq;': '\u22ed', - 'Nu;': '\u039d', - 'nu;': '\u03bd', - 'num;': '#', - 'numero;': '\u2116', - 'numsp;': '\u2007', - 'nvap;': '\u224d\u20d2', - 'nVDash;': '\u22af', - 'nVdash;': '\u22ae', - 'nvDash;': '\u22ad', - 'nvdash;': '\u22ac', - 'nvge;': '\u2265\u20d2', - 'nvgt;': '>\u20d2', - 'nvHarr;': '\u2904', - 'nvinfin;': '\u29de', - 'nvlArr;': '\u2902', - 'nvle;': '\u2264\u20d2', - 'nvlt;': '<\u20d2', - 'nvltrie;': '\u22b4\u20d2', - 'nvrArr;': '\u2903', - 'nvrtrie;': '\u22b5\u20d2', - 'nvsim;': '\u223c\u20d2', - 'nwarhk;': '\u2923', - 'nwArr;': '\u21d6', - 'nwarr;': '\u2196', - 'nwarrow;': '\u2196', - 'nwnear;': '\u2927', - 'Oacute': '\xd3', - 'oacute': '\xf3', - 'Oacute;': '\xd3', - 'oacute;': '\xf3', - 'oast;': '\u229b', - 'ocir;': '\u229a', - 'Ocirc': '\xd4', - 'ocirc': '\xf4', - 'Ocirc;': '\xd4', - 'ocirc;': '\xf4', - 'Ocy;': '\u041e', - 'ocy;': '\u043e', - 'odash;': '\u229d', - 'Odblac;': '\u0150', - 'odblac;': '\u0151', - 'odiv;': '\u2a38', - 'odot;': '\u2299', - 'odsold;': '\u29bc', - 'OElig;': '\u0152', - 'oelig;': '\u0153', - 'ofcir;': '\u29bf', - 'Ofr;': '\U0001d512', - 'ofr;': '\U0001d52c', - 'ogon;': '\u02db', - 'Ograve': '\xd2', - 'ograve': '\xf2', - 'Ograve;': '\xd2', - 'ograve;': '\xf2', - 'ogt;': '\u29c1', - 'ohbar;': '\u29b5', - 'ohm;': '\u03a9', - 'oint;': '\u222e', - 'olarr;': '\u21ba', - 'olcir;': '\u29be', - 'olcross;': '\u29bb', - 'oline;': '\u203e', - 'olt;': '\u29c0', - 'Omacr;': '\u014c', - 'omacr;': '\u014d', - 'Omega;': '\u03a9', - 'omega;': '\u03c9', - 'Omicron;': '\u039f', - 'omicron;': '\u03bf', - 'omid;': '\u29b6', - 'ominus;': '\u2296', - 'Oopf;': '\U0001d546', - 'oopf;': '\U0001d560', - 'opar;': '\u29b7', - 'OpenCurlyDoubleQuote;': '\u201c', - 'OpenCurlyQuote;': '\u2018', - 'operp;': '\u29b9', - 'oplus;': '\u2295', - 'Or;': '\u2a54', - 'or;': '\u2228', - 'orarr;': '\u21bb', - 'ord;': '\u2a5d', - 'order;': '\u2134', - 'orderof;': '\u2134', - 'ordf': '\xaa', - 'ordf;': '\xaa', - 'ordm': '\xba', - 'ordm;': '\xba', - 'origof;': '\u22b6', - 'oror;': '\u2a56', - 'orslope;': '\u2a57', - 'orv;': '\u2a5b', - 'oS;': '\u24c8', - 'Oscr;': '\U0001d4aa', - 'oscr;': '\u2134', - 'Oslash': '\xd8', - 'oslash': '\xf8', - 'Oslash;': '\xd8', - 'oslash;': '\xf8', - 'osol;': '\u2298', - 'Otilde': '\xd5', - 'otilde': '\xf5', - 'Otilde;': '\xd5', - 'otilde;': '\xf5', - 'Otimes;': '\u2a37', - 'otimes;': '\u2297', - 'otimesas;': '\u2a36', - 'Ouml': '\xd6', - 'ouml': '\xf6', - 'Ouml;': '\xd6', - 'ouml;': '\xf6', - 'ovbar;': '\u233d', - 'OverBar;': '\u203e', - 'OverBrace;': '\u23de', - 'OverBracket;': '\u23b4', - 'OverParenthesis;': '\u23dc', - 'par;': '\u2225', - 'para': '\xb6', - 'para;': '\xb6', - 'parallel;': '\u2225', - 'parsim;': '\u2af3', - 'parsl;': '\u2afd', - 'part;': '\u2202', - 'PartialD;': '\u2202', - 'Pcy;': '\u041f', - 'pcy;': '\u043f', - 'percnt;': '%', - 'period;': '.', - 'permil;': '\u2030', - 'perp;': '\u22a5', - 'pertenk;': '\u2031', - 'Pfr;': '\U0001d513', - 'pfr;': '\U0001d52d', - 'Phi;': '\u03a6', - 'phi;': '\u03c6', - 'phiv;': '\u03d5', - 'phmmat;': '\u2133', - 'phone;': '\u260e', - 'Pi;': '\u03a0', - 'pi;': '\u03c0', - 'pitchfork;': '\u22d4', - 'piv;': '\u03d6', - 'planck;': '\u210f', - 'planckh;': '\u210e', - 'plankv;': '\u210f', - 'plus;': '+', - 'plusacir;': '\u2a23', - 'plusb;': '\u229e', - 'pluscir;': '\u2a22', - 'plusdo;': '\u2214', - 'plusdu;': '\u2a25', - 'pluse;': '\u2a72', - 'PlusMinus;': '\xb1', - 'plusmn': '\xb1', - 'plusmn;': '\xb1', - 'plussim;': '\u2a26', - 'plustwo;': '\u2a27', - 'pm;': '\xb1', - 'Poincareplane;': '\u210c', - 'pointint;': '\u2a15', - 'Popf;': '\u2119', - 'popf;': '\U0001d561', - 'pound': '\xa3', - 'pound;': '\xa3', - 'Pr;': '\u2abb', - 'pr;': '\u227a', - 'prap;': '\u2ab7', - 'prcue;': '\u227c', - 'prE;': '\u2ab3', - 'pre;': '\u2aaf', - 'prec;': '\u227a', - 'precapprox;': '\u2ab7', - 'preccurlyeq;': '\u227c', - 'Precedes;': '\u227a', - 'PrecedesEqual;': '\u2aaf', - 'PrecedesSlantEqual;': '\u227c', - 'PrecedesTilde;': '\u227e', - 'preceq;': '\u2aaf', - 'precnapprox;': '\u2ab9', - 'precneqq;': '\u2ab5', - 'precnsim;': '\u22e8', - 'precsim;': '\u227e', - 'Prime;': '\u2033', - 'prime;': '\u2032', - 'primes;': '\u2119', - 'prnap;': '\u2ab9', - 'prnE;': '\u2ab5', - 'prnsim;': '\u22e8', - 'prod;': '\u220f', - 'Product;': '\u220f', - 'profalar;': '\u232e', - 'profline;': '\u2312', - 'profsurf;': '\u2313', - 'prop;': '\u221d', - 'Proportion;': '\u2237', - 'Proportional;': '\u221d', - 'propto;': '\u221d', - 'prsim;': '\u227e', - 'prurel;': '\u22b0', - 'Pscr;': '\U0001d4ab', - 'pscr;': '\U0001d4c5', - 'Psi;': '\u03a8', - 'psi;': '\u03c8', - 'puncsp;': '\u2008', - 'Qfr;': '\U0001d514', - 'qfr;': '\U0001d52e', - 'qint;': '\u2a0c', - 'Qopf;': '\u211a', - 'qopf;': '\U0001d562', - 'qprime;': '\u2057', - 'Qscr;': '\U0001d4ac', - 'qscr;': '\U0001d4c6', - 'quaternions;': '\u210d', - 'quatint;': '\u2a16', - 'quest;': '?', - 'questeq;': '\u225f', - 'QUOT': '"', - 'quot': '"', - 'QUOT;': '"', - 'quot;': '"', - 'rAarr;': '\u21db', - 'race;': '\u223d\u0331', - 'Racute;': '\u0154', - 'racute;': '\u0155', - 'radic;': '\u221a', - 'raemptyv;': '\u29b3', - 'Rang;': '\u27eb', - 'rang;': '\u27e9', - 'rangd;': '\u2992', - 'range;': '\u29a5', - 'rangle;': '\u27e9', - 'raquo': '\xbb', - 'raquo;': '\xbb', - 'Rarr;': '\u21a0', - 'rArr;': '\u21d2', - 'rarr;': '\u2192', - 'rarrap;': '\u2975', - 'rarrb;': '\u21e5', - 'rarrbfs;': '\u2920', - 'rarrc;': '\u2933', - 'rarrfs;': '\u291e', - 'rarrhk;': '\u21aa', - 'rarrlp;': '\u21ac', - 'rarrpl;': '\u2945', - 'rarrsim;': '\u2974', - 'Rarrtl;': '\u2916', - 'rarrtl;': '\u21a3', - 'rarrw;': '\u219d', - 'rAtail;': '\u291c', - 'ratail;': '\u291a', - 'ratio;': '\u2236', - 'rationals;': '\u211a', - 'RBarr;': '\u2910', - 'rBarr;': '\u290f', - 'rbarr;': '\u290d', - 'rbbrk;': '\u2773', - 'rbrace;': '}', - 'rbrack;': ']', - 'rbrke;': '\u298c', - 'rbrksld;': '\u298e', - 'rbrkslu;': '\u2990', - 'Rcaron;': '\u0158', - 'rcaron;': '\u0159', - 'Rcedil;': '\u0156', - 'rcedil;': '\u0157', - 'rceil;': '\u2309', - 'rcub;': '}', - 'Rcy;': '\u0420', - 'rcy;': '\u0440', - 'rdca;': '\u2937', - 'rdldhar;': '\u2969', - 'rdquo;': '\u201d', - 'rdquor;': '\u201d', - 'rdsh;': '\u21b3', - 'Re;': '\u211c', - 'real;': '\u211c', - 'realine;': '\u211b', - 'realpart;': '\u211c', - 'reals;': '\u211d', - 'rect;': '\u25ad', - 'REG': '\xae', - 'reg': '\xae', - 'REG;': '\xae', - 'reg;': '\xae', - 'ReverseElement;': '\u220b', - 'ReverseEquilibrium;': '\u21cb', - 'ReverseUpEquilibrium;': '\u296f', - 'rfisht;': '\u297d', - 'rfloor;': '\u230b', - 'Rfr;': '\u211c', - 'rfr;': '\U0001d52f', - 'rHar;': '\u2964', - 'rhard;': '\u21c1', - 'rharu;': '\u21c0', - 'rharul;': '\u296c', - 'Rho;': '\u03a1', - 'rho;': '\u03c1', - 'rhov;': '\u03f1', - 'RightAngleBracket;': '\u27e9', - 'RightArrow;': '\u2192', - 'Rightarrow;': '\u21d2', - 'rightarrow;': '\u2192', - 'RightArrowBar;': '\u21e5', - 'RightArrowLeftArrow;': '\u21c4', - 'rightarrowtail;': '\u21a3', - 'RightCeiling;': '\u2309', - 'RightDoubleBracket;': '\u27e7', - 'RightDownTeeVector;': '\u295d', - 'RightDownVector;': '\u21c2', - 'RightDownVectorBar;': '\u2955', - 'RightFloor;': '\u230b', - 'rightharpoondown;': '\u21c1', - 'rightharpoonup;': '\u21c0', - 'rightleftarrows;': '\u21c4', - 'rightleftharpoons;': '\u21cc', - 'rightrightarrows;': '\u21c9', - 'rightsquigarrow;': '\u219d', - 'RightTee;': '\u22a2', - 'RightTeeArrow;': '\u21a6', - 'RightTeeVector;': '\u295b', - 'rightthreetimes;': '\u22cc', - 'RightTriangle;': '\u22b3', - 'RightTriangleBar;': '\u29d0', - 'RightTriangleEqual;': '\u22b5', - 'RightUpDownVector;': '\u294f', - 'RightUpTeeVector;': '\u295c', - 'RightUpVector;': '\u21be', - 'RightUpVectorBar;': '\u2954', - 'RightVector;': '\u21c0', - 'RightVectorBar;': '\u2953', - 'ring;': '\u02da', - 'risingdotseq;': '\u2253', - 'rlarr;': '\u21c4', - 'rlhar;': '\u21cc', - 'rlm;': '\u200f', - 'rmoust;': '\u23b1', - 'rmoustache;': '\u23b1', - 'rnmid;': '\u2aee', - 'roang;': '\u27ed', - 'roarr;': '\u21fe', - 'robrk;': '\u27e7', - 'ropar;': '\u2986', - 'Ropf;': '\u211d', - 'ropf;': '\U0001d563', - 'roplus;': '\u2a2e', - 'rotimes;': '\u2a35', - 'RoundImplies;': '\u2970', - 'rpar;': ')', - 'rpargt;': '\u2994', - 'rppolint;': '\u2a12', - 'rrarr;': '\u21c9', - 'Rrightarrow;': '\u21db', - 'rsaquo;': '\u203a', - 'Rscr;': '\u211b', - 'rscr;': '\U0001d4c7', - 'Rsh;': '\u21b1', - 'rsh;': '\u21b1', - 'rsqb;': ']', - 'rsquo;': '\u2019', - 'rsquor;': '\u2019', - 'rthree;': '\u22cc', - 'rtimes;': '\u22ca', - 'rtri;': '\u25b9', - 'rtrie;': '\u22b5', - 'rtrif;': '\u25b8', - 'rtriltri;': '\u29ce', - 'RuleDelayed;': '\u29f4', - 'ruluhar;': '\u2968', - 'rx;': '\u211e', - 'Sacute;': '\u015a', - 'sacute;': '\u015b', - 'sbquo;': '\u201a', - 'Sc;': '\u2abc', - 'sc;': '\u227b', - 'scap;': '\u2ab8', - 'Scaron;': '\u0160', - 'scaron;': '\u0161', - 'sccue;': '\u227d', - 'scE;': '\u2ab4', - 'sce;': '\u2ab0', - 'Scedil;': '\u015e', - 'scedil;': '\u015f', - 'Scirc;': '\u015c', - 'scirc;': '\u015d', - 'scnap;': '\u2aba', - 'scnE;': '\u2ab6', - 'scnsim;': '\u22e9', - 'scpolint;': '\u2a13', - 'scsim;': '\u227f', - 'Scy;': '\u0421', - 'scy;': '\u0441', - 'sdot;': '\u22c5', - 'sdotb;': '\u22a1', - 'sdote;': '\u2a66', - 'searhk;': '\u2925', - 'seArr;': '\u21d8', - 'searr;': '\u2198', - 'searrow;': '\u2198', - 'sect': '\xa7', - 'sect;': '\xa7', - 'semi;': ';', - 'seswar;': '\u2929', - 'setminus;': '\u2216', - 'setmn;': '\u2216', - 'sext;': '\u2736', - 'Sfr;': '\U0001d516', - 'sfr;': '\U0001d530', - 'sfrown;': '\u2322', - 'sharp;': '\u266f', - 'SHCHcy;': '\u0429', - 'shchcy;': '\u0449', - 'SHcy;': '\u0428', - 'shcy;': '\u0448', - 'ShortDownArrow;': '\u2193', - 'ShortLeftArrow;': '\u2190', - 'shortmid;': '\u2223', - 'shortparallel;': '\u2225', - 'ShortRightArrow;': '\u2192', - 'ShortUpArrow;': '\u2191', - 'shy': '\xad', - 'shy;': '\xad', - 'Sigma;': '\u03a3', - 'sigma;': '\u03c3', - 'sigmaf;': '\u03c2', - 'sigmav;': '\u03c2', - 'sim;': '\u223c', - 'simdot;': '\u2a6a', - 'sime;': '\u2243', - 'simeq;': '\u2243', - 'simg;': '\u2a9e', - 'simgE;': '\u2aa0', - 'siml;': '\u2a9d', - 'simlE;': '\u2a9f', - 'simne;': '\u2246', - 'simplus;': '\u2a24', - 'simrarr;': '\u2972', - 'slarr;': '\u2190', - 'SmallCircle;': '\u2218', - 'smallsetminus;': '\u2216', - 'smashp;': '\u2a33', - 'smeparsl;': '\u29e4', - 'smid;': '\u2223', - 'smile;': '\u2323', - 'smt;': '\u2aaa', - 'smte;': '\u2aac', - 'smtes;': '\u2aac\ufe00', - 'SOFTcy;': '\u042c', - 'softcy;': '\u044c', - 'sol;': '/', - 'solb;': '\u29c4', - 'solbar;': '\u233f', - 'Sopf;': '\U0001d54a', - 'sopf;': '\U0001d564', - 'spades;': '\u2660', - 'spadesuit;': '\u2660', - 'spar;': '\u2225', - 'sqcap;': '\u2293', - 'sqcaps;': '\u2293\ufe00', - 'sqcup;': '\u2294', - 'sqcups;': '\u2294\ufe00', - 'Sqrt;': '\u221a', - 'sqsub;': '\u228f', - 'sqsube;': '\u2291', - 'sqsubset;': '\u228f', - 'sqsubseteq;': '\u2291', - 'sqsup;': '\u2290', - 'sqsupe;': '\u2292', - 'sqsupset;': '\u2290', - 'sqsupseteq;': '\u2292', - 'squ;': '\u25a1', - 'Square;': '\u25a1', - 'square;': '\u25a1', - 'SquareIntersection;': '\u2293', - 'SquareSubset;': '\u228f', - 'SquareSubsetEqual;': '\u2291', - 'SquareSuperset;': '\u2290', - 'SquareSupersetEqual;': '\u2292', - 'SquareUnion;': '\u2294', - 'squarf;': '\u25aa', - 'squf;': '\u25aa', - 'srarr;': '\u2192', - 'Sscr;': '\U0001d4ae', - 'sscr;': '\U0001d4c8', - 'ssetmn;': '\u2216', - 'ssmile;': '\u2323', - 'sstarf;': '\u22c6', - 'Star;': '\u22c6', - 'star;': '\u2606', - 'starf;': '\u2605', - 'straightepsilon;': '\u03f5', - 'straightphi;': '\u03d5', - 'strns;': '\xaf', - 'Sub;': '\u22d0', - 'sub;': '\u2282', - 'subdot;': '\u2abd', - 'subE;': '\u2ac5', - 'sube;': '\u2286', - 'subedot;': '\u2ac3', - 'submult;': '\u2ac1', - 'subnE;': '\u2acb', - 'subne;': '\u228a', - 'subplus;': '\u2abf', - 'subrarr;': '\u2979', - 'Subset;': '\u22d0', - 'subset;': '\u2282', - 'subseteq;': '\u2286', - 'subseteqq;': '\u2ac5', - 'SubsetEqual;': '\u2286', - 'subsetneq;': '\u228a', - 'subsetneqq;': '\u2acb', - 'subsim;': '\u2ac7', - 'subsub;': '\u2ad5', - 'subsup;': '\u2ad3', - 'succ;': '\u227b', - 'succapprox;': '\u2ab8', - 'succcurlyeq;': '\u227d', - 'Succeeds;': '\u227b', - 'SucceedsEqual;': '\u2ab0', - 'SucceedsSlantEqual;': '\u227d', - 'SucceedsTilde;': '\u227f', - 'succeq;': '\u2ab0', - 'succnapprox;': '\u2aba', - 'succneqq;': '\u2ab6', - 'succnsim;': '\u22e9', - 'succsim;': '\u227f', - 'SuchThat;': '\u220b', - 'Sum;': '\u2211', - 'sum;': '\u2211', - 'sung;': '\u266a', - 'sup1': '\xb9', - 'sup1;': '\xb9', - 'sup2': '\xb2', - 'sup2;': '\xb2', - 'sup3': '\xb3', - 'sup3;': '\xb3', - 'Sup;': '\u22d1', - 'sup;': '\u2283', - 'supdot;': '\u2abe', - 'supdsub;': '\u2ad8', - 'supE;': '\u2ac6', - 'supe;': '\u2287', - 'supedot;': '\u2ac4', - 'Superset;': '\u2283', - 'SupersetEqual;': '\u2287', - 'suphsol;': '\u27c9', - 'suphsub;': '\u2ad7', - 'suplarr;': '\u297b', - 'supmult;': '\u2ac2', - 'supnE;': '\u2acc', - 'supne;': '\u228b', - 'supplus;': '\u2ac0', - 'Supset;': '\u22d1', - 'supset;': '\u2283', - 'supseteq;': '\u2287', - 'supseteqq;': '\u2ac6', - 'supsetneq;': '\u228b', - 'supsetneqq;': '\u2acc', - 'supsim;': '\u2ac8', - 'supsub;': '\u2ad4', - 'supsup;': '\u2ad6', - 'swarhk;': '\u2926', - 'swArr;': '\u21d9', - 'swarr;': '\u2199', - 'swarrow;': '\u2199', - 'swnwar;': '\u292a', - 'szlig': '\xdf', - 'szlig;': '\xdf', - 'Tab;': '\t', - 'target;': '\u2316', - 'Tau;': '\u03a4', - 'tau;': '\u03c4', - 'tbrk;': '\u23b4', - 'Tcaron;': '\u0164', - 'tcaron;': '\u0165', - 'Tcedil;': '\u0162', - 'tcedil;': '\u0163', - 'Tcy;': '\u0422', - 'tcy;': '\u0442', - 'tdot;': '\u20db', - 'telrec;': '\u2315', - 'Tfr;': '\U0001d517', - 'tfr;': '\U0001d531', - 'there4;': '\u2234', - 'Therefore;': '\u2234', - 'therefore;': '\u2234', - 'Theta;': '\u0398', - 'theta;': '\u03b8', - 'thetasym;': '\u03d1', - 'thetav;': '\u03d1', - 'thickapprox;': '\u2248', - 'thicksim;': '\u223c', - 'ThickSpace;': '\u205f\u200a', - 'thinsp;': '\u2009', - 'ThinSpace;': '\u2009', - 'thkap;': '\u2248', - 'thksim;': '\u223c', - 'THORN': '\xde', - 'thorn': '\xfe', - 'THORN;': '\xde', - 'thorn;': '\xfe', - 'Tilde;': '\u223c', - 'tilde;': '\u02dc', - 'TildeEqual;': '\u2243', - 'TildeFullEqual;': '\u2245', - 'TildeTilde;': '\u2248', - 'times': '\xd7', - 'times;': '\xd7', - 'timesb;': '\u22a0', - 'timesbar;': '\u2a31', - 'timesd;': '\u2a30', - 'tint;': '\u222d', - 'toea;': '\u2928', - 'top;': '\u22a4', - 'topbot;': '\u2336', - 'topcir;': '\u2af1', - 'Topf;': '\U0001d54b', - 'topf;': '\U0001d565', - 'topfork;': '\u2ada', - 'tosa;': '\u2929', - 'tprime;': '\u2034', - 'TRADE;': '\u2122', - 'trade;': '\u2122', - 'triangle;': '\u25b5', - 'triangledown;': '\u25bf', - 'triangleleft;': '\u25c3', - 'trianglelefteq;': '\u22b4', - 'triangleq;': '\u225c', - 'triangleright;': '\u25b9', - 'trianglerighteq;': '\u22b5', - 'tridot;': '\u25ec', - 'trie;': '\u225c', - 'triminus;': '\u2a3a', - 'TripleDot;': '\u20db', - 'triplus;': '\u2a39', - 'trisb;': '\u29cd', - 'tritime;': '\u2a3b', - 'trpezium;': '\u23e2', - 'Tscr;': '\U0001d4af', - 'tscr;': '\U0001d4c9', - 'TScy;': '\u0426', - 'tscy;': '\u0446', - 'TSHcy;': '\u040b', - 'tshcy;': '\u045b', - 'Tstrok;': '\u0166', - 'tstrok;': '\u0167', - 'twixt;': '\u226c', - 'twoheadleftarrow;': '\u219e', - 'twoheadrightarrow;': '\u21a0', - 'Uacute': '\xda', - 'uacute': '\xfa', - 'Uacute;': '\xda', - 'uacute;': '\xfa', - 'Uarr;': '\u219f', - 'uArr;': '\u21d1', - 'uarr;': '\u2191', - 'Uarrocir;': '\u2949', - 'Ubrcy;': '\u040e', - 'ubrcy;': '\u045e', - 'Ubreve;': '\u016c', - 'ubreve;': '\u016d', - 'Ucirc': '\xdb', - 'ucirc': '\xfb', - 'Ucirc;': '\xdb', - 'ucirc;': '\xfb', - 'Ucy;': '\u0423', - 'ucy;': '\u0443', - 'udarr;': '\u21c5', - 'Udblac;': '\u0170', - 'udblac;': '\u0171', - 'udhar;': '\u296e', - 'ufisht;': '\u297e', - 'Ufr;': '\U0001d518', - 'ufr;': '\U0001d532', - 'Ugrave': '\xd9', - 'ugrave': '\xf9', - 'Ugrave;': '\xd9', - 'ugrave;': '\xf9', - 'uHar;': '\u2963', - 'uharl;': '\u21bf', - 'uharr;': '\u21be', - 'uhblk;': '\u2580', - 'ulcorn;': '\u231c', - 'ulcorner;': '\u231c', - 'ulcrop;': '\u230f', - 'ultri;': '\u25f8', - 'Umacr;': '\u016a', - 'umacr;': '\u016b', - 'uml': '\xa8', - 'uml;': '\xa8', - 'UnderBar;': '_', - 'UnderBrace;': '\u23df', - 'UnderBracket;': '\u23b5', - 'UnderParenthesis;': '\u23dd', - 'Union;': '\u22c3', - 'UnionPlus;': '\u228e', - 'Uogon;': '\u0172', - 'uogon;': '\u0173', - 'Uopf;': '\U0001d54c', - 'uopf;': '\U0001d566', - 'UpArrow;': '\u2191', - 'Uparrow;': '\u21d1', - 'uparrow;': '\u2191', - 'UpArrowBar;': '\u2912', - 'UpArrowDownArrow;': '\u21c5', - 'UpDownArrow;': '\u2195', - 'Updownarrow;': '\u21d5', - 'updownarrow;': '\u2195', - 'UpEquilibrium;': '\u296e', - 'upharpoonleft;': '\u21bf', - 'upharpoonright;': '\u21be', - 'uplus;': '\u228e', - 'UpperLeftArrow;': '\u2196', - 'UpperRightArrow;': '\u2197', - 'Upsi;': '\u03d2', - 'upsi;': '\u03c5', - 'upsih;': '\u03d2', - 'Upsilon;': '\u03a5', - 'upsilon;': '\u03c5', - 'UpTee;': '\u22a5', - 'UpTeeArrow;': '\u21a5', - 'upuparrows;': '\u21c8', - 'urcorn;': '\u231d', - 'urcorner;': '\u231d', - 'urcrop;': '\u230e', - 'Uring;': '\u016e', - 'uring;': '\u016f', - 'urtri;': '\u25f9', - 'Uscr;': '\U0001d4b0', - 'uscr;': '\U0001d4ca', - 'utdot;': '\u22f0', - 'Utilde;': '\u0168', - 'utilde;': '\u0169', - 'utri;': '\u25b5', - 'utrif;': '\u25b4', - 'uuarr;': '\u21c8', - 'Uuml': '\xdc', - 'uuml': '\xfc', - 'Uuml;': '\xdc', - 'uuml;': '\xfc', - 'uwangle;': '\u29a7', - 'vangrt;': '\u299c', - 'varepsilon;': '\u03f5', - 'varkappa;': '\u03f0', - 'varnothing;': '\u2205', - 'varphi;': '\u03d5', - 'varpi;': '\u03d6', - 'varpropto;': '\u221d', - 'vArr;': '\u21d5', - 'varr;': '\u2195', - 'varrho;': '\u03f1', - 'varsigma;': '\u03c2', - 'varsubsetneq;': '\u228a\ufe00', - 'varsubsetneqq;': '\u2acb\ufe00', - 'varsupsetneq;': '\u228b\ufe00', - 'varsupsetneqq;': '\u2acc\ufe00', - 'vartheta;': '\u03d1', - 'vartriangleleft;': '\u22b2', - 'vartriangleright;': '\u22b3', - 'Vbar;': '\u2aeb', - 'vBar;': '\u2ae8', - 'vBarv;': '\u2ae9', - 'Vcy;': '\u0412', - 'vcy;': '\u0432', - 'VDash;': '\u22ab', - 'Vdash;': '\u22a9', - 'vDash;': '\u22a8', - 'vdash;': '\u22a2', - 'Vdashl;': '\u2ae6', - 'Vee;': '\u22c1', - 'vee;': '\u2228', - 'veebar;': '\u22bb', - 'veeeq;': '\u225a', - 'vellip;': '\u22ee', - 'Verbar;': '\u2016', - 'verbar;': '|', - 'Vert;': '\u2016', - 'vert;': '|', - 'VerticalBar;': '\u2223', - 'VerticalLine;': '|', - 'VerticalSeparator;': '\u2758', - 'VerticalTilde;': '\u2240', - 'VeryThinSpace;': '\u200a', - 'Vfr;': '\U0001d519', - 'vfr;': '\U0001d533', - 'vltri;': '\u22b2', - 'vnsub;': '\u2282\u20d2', - 'vnsup;': '\u2283\u20d2', - 'Vopf;': '\U0001d54d', - 'vopf;': '\U0001d567', - 'vprop;': '\u221d', - 'vrtri;': '\u22b3', - 'Vscr;': '\U0001d4b1', - 'vscr;': '\U0001d4cb', - 'vsubnE;': '\u2acb\ufe00', - 'vsubne;': '\u228a\ufe00', - 'vsupnE;': '\u2acc\ufe00', - 'vsupne;': '\u228b\ufe00', - 'Vvdash;': '\u22aa', - 'vzigzag;': '\u299a', - 'Wcirc;': '\u0174', - 'wcirc;': '\u0175', - 'wedbar;': '\u2a5f', - 'Wedge;': '\u22c0', - 'wedge;': '\u2227', - 'wedgeq;': '\u2259', - 'weierp;': '\u2118', - 'Wfr;': '\U0001d51a', - 'wfr;': '\U0001d534', - 'Wopf;': '\U0001d54e', - 'wopf;': '\U0001d568', - 'wp;': '\u2118', - 'wr;': '\u2240', - 'wreath;': '\u2240', - 'Wscr;': '\U0001d4b2', - 'wscr;': '\U0001d4cc', - 'xcap;': '\u22c2', - 'xcirc;': '\u25ef', - 'xcup;': '\u22c3', - 'xdtri;': '\u25bd', - 'Xfr;': '\U0001d51b', - 'xfr;': '\U0001d535', - 'xhArr;': '\u27fa', - 'xharr;': '\u27f7', - 'Xi;': '\u039e', - 'xi;': '\u03be', - 'xlArr;': '\u27f8', - 'xlarr;': '\u27f5', - 'xmap;': '\u27fc', - 'xnis;': '\u22fb', - 'xodot;': '\u2a00', - 'Xopf;': '\U0001d54f', - 'xopf;': '\U0001d569', - 'xoplus;': '\u2a01', - 'xotime;': '\u2a02', - 'xrArr;': '\u27f9', - 'xrarr;': '\u27f6', - 'Xscr;': '\U0001d4b3', - 'xscr;': '\U0001d4cd', - 'xsqcup;': '\u2a06', - 'xuplus;': '\u2a04', - 'xutri;': '\u25b3', - 'xvee;': '\u22c1', - 'xwedge;': '\u22c0', - 'Yacute': '\xdd', - 'yacute': '\xfd', - 'Yacute;': '\xdd', - 'yacute;': '\xfd', - 'YAcy;': '\u042f', - 'yacy;': '\u044f', - 'Ycirc;': '\u0176', - 'ycirc;': '\u0177', - 'Ycy;': '\u042b', - 'ycy;': '\u044b', - 'yen': '\xa5', - 'yen;': '\xa5', - 'Yfr;': '\U0001d51c', - 'yfr;': '\U0001d536', - 'YIcy;': '\u0407', - 'yicy;': '\u0457', - 'Yopf;': '\U0001d550', - 'yopf;': '\U0001d56a', - 'Yscr;': '\U0001d4b4', - 'yscr;': '\U0001d4ce', - 'YUcy;': '\u042e', - 'yucy;': '\u044e', - 'yuml': '\xff', - 'Yuml;': '\u0178', - 'yuml;': '\xff', - 'Zacute;': '\u0179', - 'zacute;': '\u017a', - 'Zcaron;': '\u017d', - 'zcaron;': '\u017e', - 'Zcy;': '\u0417', - 'zcy;': '\u0437', - 'Zdot;': '\u017b', - 'zdot;': '\u017c', - 'zeetrf;': '\u2128', - 'ZeroWidthSpace;': '\u200b', - 'Zeta;': '\u0396', - 'zeta;': '\u03b6', - 'Zfr;': '\u2128', - 'zfr;': '\U0001d537', - 'ZHcy;': '\u0416', - 'zhcy;': '\u0436', - 'zigrarr;': '\u21dd', - 'Zopf;': '\u2124', - 'zopf;': '\U0001d56b', - 'Zscr;': '\U0001d4b5', - 'zscr;': '\U0001d4cf', - 'zwj;': '\u200d', - 'zwnj;': '\u200c', -} - -# maps the Unicode codepoint to the HTML entity name -codepoint2name = {} - -# maps the HTML entity name to the character -# (or a character reference if the character is outside the Latin-1 range) -entitydefs = {} - -for (name, codepoint) in name2codepoint.items(): - codepoint2name[codepoint] = name - entitydefs[name] = chr(codepoint) - -del name, codepoint diff --git a/contrib/python/future/future/backports/html/parser.py b/contrib/python/future/future/backports/html/parser.py deleted file mode 100644 index fb652636d46..00000000000 --- a/contrib/python/future/future/backports/html/parser.py +++ /dev/null @@ -1,536 +0,0 @@ -"""A parser for HTML and XHTML. - -Backported for python-future from Python 3.3. -""" - -# This file is based on sgmllib.py, but the API is slightly different. - -# XXX There should be a way to distinguish between PCDATA (parsed -# character data -- the normal case), RCDATA (replaceable character -# data -- only char and entity references and end tags are special) -# and CDATA (character data -- only end tags are special). - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future.builtins import * -from future.backports import _markupbase -import re -import warnings - -# Regular expressions used for parsing - -interesting_normal = re.compile('[&<]') -incomplete = re.compile('&[a-zA-Z#]') - -entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') -charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') - -starttagopen = re.compile('<[a-zA-Z]') -piclose = re.compile('>') -commentclose = re.compile(r'--\s*>') -tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') -# Note: -# 1) the strict attrfind isn't really strict, but we can't make it -# correctly strict without breaking backward compatibility; -# 2) if you change attrfind remember to update locatestarttagend too; -# 3) if you change attrfind and/or locatestarttagend the parser will -# explode, so don't do it. -attrfind = re.compile( - r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') -locatestarttagend = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:\s+ # whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value - ) - )? - ) - )* - \s* # trailing whitespace -""", re.VERBOSE) -locatestarttagend_tolerant = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:[\s/]* # optional whitespace before attribute name - (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name - (?:\s*=+\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |"[^"]*" # LIT-enclosed value - |(?!['"])[^>\s]* # bare value - ) - (?:\s*,)* # possibly followed by a comma - )?(?:\s|/(?!>))* - )* - )? - \s* # trailing whitespace -""", re.VERBOSE) -endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# </ and the tag name, so maybe this should be fixed -endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>') - - -class HTMLParseError(Exception): - """Exception raised for all parse errors.""" - - def __init__(self, msg, position=(None, None)): - assert msg - self.msg = msg - self.lineno = position[0] - self.offset = position[1] - - def __str__(self): - result = self.msg - if self.lineno is not None: - result = result + ", at line %d" % self.lineno - if self.offset is not None: - result = result + ", column %d" % (self.offset + 1) - return result - - -class HTMLParser(_markupbase.ParserBase): - """Find tags and other markup and call handler functions. - - Usage: - p = HTMLParser() - p.feed(data) - ... - p.close() - - Start tags are handled by calling self.handle_starttag() or - self.handle_startendtag(); end tags by self.handle_endtag(). The - data between tags is passed from the parser to the derived class - by calling self.handle_data() with the data as argument (the data - may be split up in arbitrary chunks). Entity references are - passed by calling self.handle_entityref() with the entity - reference as the argument. Numeric character references are - passed to self.handle_charref() with the string containing the - reference as the argument. - """ - - CDATA_CONTENT_ELEMENTS = ("script", "style") - - def __init__(self, strict=False): - """Initialize and reset this instance. - - If strict is set to False (the default) the parser will parse invalid - markup, otherwise it will raise an error. Note that the strict mode - is deprecated. - """ - if strict: - warnings.warn("The strict mode is deprecated.", - DeprecationWarning, stacklevel=2) - self.strict = strict - self.reset() - - def reset(self): - """Reset this instance. Loses all unprocessed data.""" - self.rawdata = '' - self.lasttag = '???' - self.interesting = interesting_normal - self.cdata_elem = None - _markupbase.ParserBase.reset(self) - - def feed(self, data): - r"""Feed data to the parser. - - Call this as often as you want, with as little or as much text - as you want (may include '\n'). - """ - self.rawdata = self.rawdata + data - self.goahead(0) - - def close(self): - """Handle any buffered data.""" - self.goahead(1) - - def error(self, message): - raise HTMLParseError(message, self.getpos()) - - __starttag_text = None - - def get_starttag_text(self): - """Return full source of start tag: '<...>'.""" - return self.__starttag_text - - def set_cdata_mode(self, elem): - self.cdata_elem = elem.lower() - self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I) - - def clear_cdata_mode(self): - self.interesting = interesting_normal - self.cdata_elem = None - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - match = self.interesting.search(rawdata, i) # < or & - if match: - j = match.start() - else: - if self.cdata_elem: - break - j = n - if i < j: self.handle_data(rawdata[i:j]) - i = self.updatepos(i, j) - if i == n: break - startswith = rawdata.startswith - if startswith('<', i): - if starttagopen.match(rawdata, i): # < + letter - k = self.parse_starttag(i) - elif startswith("</", i): - k = self.parse_endtag(i) - elif startswith("<!--", i): - k = self.parse_comment(i) - elif startswith("<?", i): - k = self.parse_pi(i) - elif startswith("<!", i): - if self.strict: - k = self.parse_declaration(i) - else: - k = self.parse_html_declaration(i) - elif (i + 1) < n: - self.handle_data("<") - k = i + 1 - else: - break - if k < 0: - if not end: - break - if self.strict: - self.error("EOF in middle of construct") - k = rawdata.find('>', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - self.handle_data(rawdata[i:k]) - i = self.updatepos(i, k) - elif startswith("&#", i): - match = charref.match(rawdata, i) - if match: - name = match.group()[2:-1] - self.handle_charref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - else: - if ";" in rawdata[i:]: #bail by consuming &# - self.handle_data(rawdata[0:2]) - i = self.updatepos(i, 2) - break - elif startswith('&', i): - match = entityref.match(rawdata, i) - if match: - name = match.group(1) - self.handle_entityref(name) - k = match.end() - if not startswith(';', k-1): - k = k - 1 - i = self.updatepos(i, k) - continue - match = incomplete.match(rawdata, i) - if match: - # match.group() will contain at least 2 chars - if end and match.group() == rawdata[i:]: - if self.strict: - self.error("EOF in middle of entity or char ref") - else: - if k <= i: - k = n - i = self.updatepos(i, i + 1) - # incomplete - break - elif (i + 1) < n: - # not the end of the buffer, and can't be confused - # with some other construct - self.handle_data("&") - i = self.updatepos(i, i + 1) - else: - break - else: - assert 0, "interesting.search() lied" - # end while - if end and i < n and not self.cdata_elem: - self.handle_data(rawdata[i:n]) - i = self.updatepos(i, n) - self.rawdata = rawdata[i:] - - # Internal -- parse html declarations, return length or -1 if not terminated - # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state - # See also parse_declaration in _markupbase - def parse_html_declaration(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == '<!', ('unexpected call to ' - 'parse_html_declaration()') - if rawdata[i:i+4] == '<!--': - # this case is actually already handled in goahead() - return self.parse_comment(i) - elif rawdata[i:i+3] == '<![': - return self.parse_marked_section(i) - elif rawdata[i:i+9].lower() == '<!doctype': - # find the closing > - gtpos = rawdata.find('>', i+9) - if gtpos == -1: - return -1 - self.handle_decl(rawdata[i+2:gtpos]) - return gtpos+1 - else: - return self.parse_bogus_comment(i) - - # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state - def parse_bogus_comment(self, i, report=1): - rawdata = self.rawdata - assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to ' - 'parse_comment()') - pos = rawdata.find('>', i+2) - if pos == -1: - return -1 - if report: - self.handle_comment(rawdata[i+2:pos]) - return pos + 1 - - # Internal -- parse processing instr, return end or -1 if not terminated - def parse_pi(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()' - match = piclose.search(rawdata, i+2) # > - if not match: - return -1 - j = match.start() - self.handle_pi(rawdata[i+2: j]) - j = match.end() - return j - - # Internal -- handle starttag, return end or -1 if not terminated - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = match.group(1).lower() - while k < endpos: - if self.strict: - m = attrfind.match(rawdata, k) - else: - m = attrfind_tolerant.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - if attrvalue: - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - if self.strict: - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) - self.handle_data(rawdata[i:endpos]) - return endpos - if end.endswith('/>'): - # XHTML-style empty tag: <span attr="value" /> - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - # Internal -- check to see if we have a complete starttag; return end - # or -1 if incomplete. - def check_for_whole_start_tag(self, i): - rawdata = self.rawdata - if self.strict: - m = locatestarttagend.match(rawdata, i) - else: - m = locatestarttagend_tolerant.match(rawdata, i) - if m: - j = m.end() - next = rawdata[j:j+1] - if next == ">": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - if self.strict: - self.updatepos(i, j + 1) - self.error("malformed empty start tag") - if j > i: - return j - else: - return i + 1 - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if self.strict: - self.updatepos(i, j) - self.error("malformed start tag") - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") - - # Internal -- parse endtag, return end or -1 if incomplete - def parse_endtag(self, i): - rawdata = self.rawdata - assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag" - match = endendtag.search(rawdata, i+1) # > - if not match: - return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # </ + tag + > - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - if self.strict: - self.error("bad end tag: %r" % (rawdata[i:gtpos],)) - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind_tolerant.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '</>': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group().lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # </tag attr=">">, but looking for > after tha name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 - - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos - - self.handle_endtag(elem.lower()) - self.clear_cdata_mode() - return gtpos - - # Overridable -- finish processing of start+end tag: <tag.../> - def handle_startendtag(self, tag, attrs): - self.handle_starttag(tag, attrs) - self.handle_endtag(tag) - - # Overridable -- handle start tag - def handle_starttag(self, tag, attrs): - pass - - # Overridable -- handle end tag - def handle_endtag(self, tag): - pass - - # Overridable -- handle character reference - def handle_charref(self, name): - pass - - # Overridable -- handle entity reference - def handle_entityref(self, name): - pass - - # Overridable -- handle data - def handle_data(self, data): - pass - - # Overridable -- handle comment - def handle_comment(self, data): - pass - - # Overridable -- handle declaration - def handle_decl(self, decl): - pass - - # Overridable -- handle processing instruction - def handle_pi(self, data): - pass - - def unknown_decl(self, data): - if self.strict: - self.error("unknown declaration: %r" % (data,)) - - # Internal -- helper to remove special character quoting - def unescape(self, s): - if '&' not in s: - return s - def replaceEntities(s): - s = s.groups()[0] - try: - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:].rstrip(';'), 16) - else: - c = int(s.rstrip(';')) - return chr(c) - except ValueError: - return '&#' + s - else: - from future.backports.html.entities import html5 - if s in html5: - return html5[s] - elif s.endswith(';'): - return '&' + s - for x in range(2, len(s)): - if s[:x] in html5: - return html5[s[:x]] + s[x:] - else: - return '&' + s - - return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))", - replaceEntities, s) diff --git a/contrib/python/future/future/backports/http/__init__.py b/contrib/python/future/future/backports/http/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/future/backports/http/__init__.py +++ /dev/null diff --git a/contrib/python/future/future/backports/http/client.py b/contrib/python/future/future/backports/http/client.py deleted file mode 100644 index e663d125c4a..00000000000 --- a/contrib/python/future/future/backports/http/client.py +++ /dev/null @@ -1,1346 +0,0 @@ -"""HTTP/1.1 client library - -A backport of the Python 3.3 http/client.py module for python-future. - -<intro stuff goes here> -<other stuff, too> - -HTTPConnection goes through a number of "states", which define when a client -may legally make another request or fetch the response for a particular -request. This diagram details these state transitions: - - (null) - | - | HTTPConnection() - v - Idle - | - | putrequest() - v - Request-started - | - | ( putheader() )* endheaders() - v - Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] - |\____________________ - | | - | response.read() | putrequest() - v v - Idle Req-started-unread-response - ______/| - / | - response.read() | | ( putheader() )* endheaders() - v v - Request-started Req-sent-unread-response - | - | response.read() - v - Request-sent - -This diagram presents the following rules: - -- a second request may not be started until {response-headers-read} - -- a response [object] cannot be retrieved until {request-sent} - -- there is no differentiation between an unread response body and a - partially read response body - -Note: this enforcement is applied by the HTTPConnection class. The - HTTPResponse class does not enforce this state machine, which - implies sophisticated clients may accelerate the request/response - pipeline. Caution should be taken, though: accelerating the states - beyond the above pattern may imply knowledge of the server's - connection-close behavior for certain requests. For example, it - is impossible to tell whether the server will close the connection - UNTIL the response headers have been read; this means that further - requests cannot be placed into the pipeline until it is known that - the server will NOT be closing the connection. - -Logical State __state __response -------------- ------- ---------- -Idle _CS_IDLE None -Request-started _CS_REQ_STARTED None -Request-sent _CS_REQ_SENT None -Unread-response _CS_IDLE <response_class> -Req-started-unread-response _CS_REQ_STARTED <response_class> -Req-sent-unread-response _CS_REQ_SENT <response_class> -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future.builtins import bytes, int, str, super -from future.utils import PY2 - -from future.backports.email import parser as email_parser -from future.backports.email import message as email_message -from future.backports.misc import create_connection as socket_create_connection -import io -import os -import socket -from future.backports.urllib.parse import urlsplit -import warnings -from array import array - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - -__all__ = ["HTTPResponse", "HTTPConnection", - "HTTPException", "NotConnected", "UnknownProtocol", - "UnknownTransferEncoding", "UnimplementedFileMode", - "IncompleteRead", "InvalidURL", "ImproperConnectionState", - "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] - -HTTP_PORT = 80 -HTTPS_PORT = 443 - -_UNKNOWN = 'UNKNOWN' - -# connection states -_CS_IDLE = 'Idle' -_CS_REQ_STARTED = 'Request-started' -_CS_REQ_SENT = 'Request-sent' - -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 -PRECONDITION_REQUIRED = 428 -TOO_MANY_REQUESTS = 429 -REQUEST_HEADER_FIELDS_TOO_LARGE = 431 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 -NETWORK_AUTHENTICATION_REQUIRED = 511 - -# Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - 428: 'Precondition Required', - 429: 'Too Many Requests', - 431: 'Request Header Fields Too Large', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', - 511: 'Network Authentication Required', -} - -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - -# maximal line length when calling readline(). -_MAXLINE = 65536 -_MAXHEADERS = 100 - - -class HTTPMessage(email_message.Message): - # XXX The only usage of this method is in - # http.server.CGIHTTPRequestHandler. Maybe move the code there so - # that it doesn't need to be part of the public API. The API has - # never been defined so this could cause backwards compatibility - # issues. - - def getallmatchingheaders(self, name): - """Find all header lines matching a given header name. - - Look through the list of headers and find all lines matching a given - header name (and their continuation lines). A list of the lines is - returned, without interpretation. If the header does not occur, an - empty list is returned. If the header occurs multiple times, all - occurrences are returned. Case is not important in the header name. - - """ - name = name.lower() + ':' - n = len(name) - lst = [] - hit = 0 - for line in self.keys(): - if line[:n].lower() == name: - hit = 1 - elif not line[:1].isspace(): - hit = 0 - if hit: - lst.append(line) - return lst - -def parse_headers(fp, _class=HTTPMessage): - """Parses only RFC2822 headers from a file pointer. - - email Parser wants to see strings rather than bytes. - But a TextIOWrapper around self.rfile would buffer too many bytes - from the stream, bytes which we later need to read as bytes. - So we read the correct bytes here, as bytes, for email Parser - to parse. - - """ - headers = [] - while True: - line = fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - headers.append(line) - if len(headers) > _MAXHEADERS: - raise HTTPException("got more than %d headers" % _MAXHEADERS) - if line in (b'\r\n', b'\n', b''): - break - hstring = bytes(b'').join(headers).decode('iso-8859-1') - return email_parser.Parser(_class=_class).parsestr(hstring) - - -_strict_sentinel = object() - -class HTTPResponse(io.RawIOBase): - - # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - - # The bytes from the socket object are iso-8859-1 strings. - # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded - # text following RFC 2047. The basic status line parsing only - # accepts iso-8859-1. - - def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): - # If the response includes a content-length header, we need to - # make sure that the client doesn't read more than the - # specified number of bytes. If it does, it will block until - # the server times out and closes the connection. This will - # happen if a self.fp.read() is done (without a size) whether - # self.fp is buffered or not. So, no self.fp.read() by - # clients unless they know what they are doing. - self.fp = sock.makefile("rb") - self.debuglevel = debuglevel - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) - self._method = method - - # The HTTPResponse object is returned via urllib. The clients - # of http and urllib expect different attributes for the - # headers. headers is used here and supports urllib. msg is - # provided as a backwards compatibility layer for http - # clients. - - self.headers = self.msg = None - - # from the Status-Line of the response - self.version = _UNKNOWN # HTTP-Version - self.status = _UNKNOWN # Status-Code - self.reason = _UNKNOWN # Reason-Phrase - - self.chunked = _UNKNOWN # is "chunked" being used? - self.chunk_left = _UNKNOWN # bytes left to read in current chunk - self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response - - def _read_status(self): - line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") - if len(line) > _MAXLINE: - raise LineTooLong("status line") - if self.debuglevel > 0: - print("reply:", repr(line)) - if not line: - # Presumably, the server closed the connection before - # sending a valid response. - raise BadStatusLine(line) - try: - version, status, reason = line.split(None, 2) - except ValueError: - try: - version, status = line.split(None, 1) - reason = "" - except ValueError: - # empty version will cause next test to fail. - version = "" - if not version.startswith("HTTP/"): - self._close_conn() - raise BadStatusLine(line) - - # The status code is a three-digit number - try: - status = int(status) - if status < 100 or status > 999: - raise BadStatusLine(line) - except ValueError: - raise BadStatusLine(line) - return version, status, reason - - def begin(self): - if self.headers is not None: - # we've already started reading the response - return - - # read until we get a non-100 response - while True: - version, status, reason = self._read_status() - if status != CONTINUE: - break - # skip the header from the 100 response - while True: - skip = self.fp.readline(_MAXLINE + 1) - if len(skip) > _MAXLINE: - raise LineTooLong("header line") - skip = skip.strip() - if not skip: - break - if self.debuglevel > 0: - print("header:", skip) - - self.code = self.status = status - self.reason = reason.strip() - if version in ("HTTP/1.0", "HTTP/0.9"): - # Some servers might still return "0.9", treat it as 1.0 anyway - self.version = 10 - elif version.startswith("HTTP/1."): - self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - else: - raise UnknownProtocol(version) - - self.headers = self.msg = parse_headers(self.fp) - - if self.debuglevel > 0: - for hdr in self.headers: - print("header:", hdr, end=" ") - - # are we using the chunked-style of transfer encoding? - tr_enc = self.headers.get("transfer-encoding") - if tr_enc and tr_enc.lower() == "chunked": - self.chunked = True - self.chunk_left = None - else: - self.chunked = False - - # will the connection close at the end of the response? - self.will_close = self._check_close() - - # do we have a Content-Length? - # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" - self.length = None - length = self.headers.get("content-length") - - # are we using the chunked-style of transfer encoding? - tr_enc = self.headers.get("transfer-encoding") - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None - else: - self.length = None - - # does the body have a fixed length? (of zero) - if (status == NO_CONTENT or status == NOT_MODIFIED or - 100 <= status < 200 or # 1xx codes - self._method == "HEAD"): - self.length = 0 - - # if the connection remains open, and we aren't using chunked, and - # a content-length was not provided, then assume that the connection - # WILL close. - if (not self.will_close and - not self.chunked and - self.length is None): - self.will_close = True - - def _check_close(self): - conn = self.headers.get("connection") - if self.version == 11: - # An HTTP/1.1 proxy is assumed to stay open unless - # explicitly closed. - conn = self.headers.get("connection") - if conn and "close" in conn.lower(): - return True - return False - - # Some HTTP/1.0 implementations have support for persistent - # connections, using rules different than HTTP/1.1. - - # For older HTTP, Keep-Alive indicates persistent connection. - if self.headers.get("keep-alive"): - return False - - # At least Akamai returns a "Connection: Keep-Alive" header, - # which was supposed to be sent by the client. - if conn and "keep-alive" in conn.lower(): - return False - - # Proxy-Connection is a netscape hack. - pconn = self.headers.get("proxy-connection") - if pconn and "keep-alive" in pconn.lower(): - return False - - # otherwise, assume it will close - return True - - def _close_conn(self): - fp = self.fp - self.fp = None - fp.close() - - def close(self): - super().close() # set "closed" flag - if self.fp: - self._close_conn() - - # These implementations are for the benefit of io.BufferedReader. - - # XXX This class should probably be revised to act more like - # the "raw stream" that BufferedReader expects. - - def flush(self): - super().flush() - if self.fp: - self.fp.flush() - - def readable(self): - return True - - # End of "raw stream" methods - - def isclosed(self): - """True if the connection is closed.""" - # NOTE: it is possible that we will not ever call self.close(). This - # case occurs when will_close is TRUE, length is None, and we - # read up to the last byte, but NOT past it. - # - # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be - # called, meaning self.isclosed() is meaningful. - return self.fp is None - - def read(self, amt=None): - if self.fp is None: - return bytes(b"") - - if self._method == "HEAD": - self._close_conn() - return bytes(b"") - - if amt is not None: - # Amount is given, so call base class version - # (which is implemented in terms of self.readinto) - return bytes(super(HTTPResponse, self).read(amt)) - else: - # Amount is not given (unbounded read) so we must check self.length - # and self.chunked - - if self.chunked: - return self._readall_chunked() - - if self.length is None: - s = self.fp.read() - else: - try: - s = self._safe_read(self.length) - except IncompleteRead: - self._close_conn() - raise - self.length = 0 - self._close_conn() # we read everything - return bytes(s) - - def readinto(self, b): - if self.fp is None: - return 0 - - if self._method == "HEAD": - self._close_conn() - return 0 - - if self.chunked: - return self._readinto_chunked(b) - - if self.length is not None: - if len(b) > self.length: - # clip the read to the "end of response" - b = memoryview(b)[0:self.length] - - # we do not use _safe_read() here because this may be a .will_close - # connection, and the user is reading more bytes than will be provided - # (for example, reading in 1k chunks) - - if PY2: - data = self.fp.read(len(b)) - n = len(data) - b[:n] = data - else: - n = self.fp.readinto(b) - - if not n and b: - # Ideally, we would raise IncompleteRead if the content-length - # wasn't satisfied, but it might break compatibility. - self._close_conn() - elif self.length is not None: - self.length -= n - if not self.length: - self._close_conn() - return n - - def _read_next_chunk_size(self): - # Read the next chunk size from the file - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("chunk size") - i = line.find(b";") - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - return int(line, 16) - except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self._close_conn() - raise - - def _read_and_discard_trailer(self): - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("trailer line") - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line in (b'\r\n', b'\n', b''): - break - - def _readall_chunked(self): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - value = [] - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b'').join(value)) - value.append(self._safe_read(chunk_left)) - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return bytes(b'').join(value) - - def _readinto_chunked(self, b): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - - total_bytes = 0 - mvb = memoryview(b) - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b[0:total_bytes])) - - if len(mvb) < chunk_left: - n = self._safe_readinto(mvb) - self.chunk_left = chunk_left - n - return total_bytes + n - elif len(mvb) == chunk_left: - n = self._safe_readinto(mvb) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return total_bytes + n - else: - temp_mvb = mvb[0:chunk_left] - n = self._safe_readinto(temp_mvb) - mvb = mvb[n:] - total_bytes += n - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return total_bytes - - def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. - - This function should be used when <amt> bytes "should" be present for - reading. If the bytes are truly not available (due to EOF), then the - IncompleteRead exception can be used to detect the problem. - """ - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(bytes(b'').join(s), amt) - s.append(chunk) - amt -= len(chunk) - return bytes(b"").join(s) - - def _safe_readinto(self, b): - """Same as _safe_read, but for reading into a buffer.""" - total_bytes = 0 - mvb = memoryview(b) - while total_bytes < len(b): - if MAXAMOUNT < len(mvb): - temp_mvb = mvb[0:MAXAMOUNT] - if PY2: - data = self.fp.read(len(temp_mvb)) - n = len(data) - temp_mvb[:n] = data - else: - n = self.fp.readinto(temp_mvb) - else: - if PY2: - data = self.fp.read(len(mvb)) - n = len(data) - mvb[:n] = data - else: - n = self.fp.readinto(mvb) - if not n: - raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) - mvb = mvb[n:] - total_bytes += n - return total_bytes - - def fileno(self): - return self.fp.fileno() - - def getheader(self, name, default=None): - if self.headers is None: - raise ResponseNotReady() - headers = self.headers.get_all(name) or default - if isinstance(headers, str) or not hasattr(headers, '__iter__'): - return headers - else: - return ', '.join(headers) - - def getheaders(self): - """Return list of (header, value) tuples.""" - if self.headers is None: - raise ResponseNotReady() - return list(self.headers.items()) - - # We override IOBase.__iter__ so that it doesn't check for closed-ness - - def __iter__(self): - return self - - # For compatibility with old-style urllib responses. - - def info(self): - return self.headers - - def geturl(self): - return self.url - - def getcode(self): - return self.status - -class HTTPConnection(object): - - _http_vsn = 11 - _http_vsn_str = 'HTTP/1.1' - - response_class = HTTPResponse - default_port = HTTP_PORT - auto_open = 1 - debuglevel = 0 - - def __init__(self, host, port=None, strict=_strict_sentinel, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) - self.timeout = timeout - self.source_address = source_address - self.sock = None - self._buffer = [] - self.__response = None - self.__state = _CS_IDLE - self._method = None - self._tunnel_host = None - self._tunnel_port = None - self._tunnel_headers = {} - - self._set_hostport(host, port) - - def set_tunnel(self, host, port=None, headers=None): - """ Sets up the host and the port for the HTTP CONNECT Tunnelling. - - The headers argument should be a mapping of extra HTTP headers - to send with the CONNECT request. - """ - self._tunnel_host = host - self._tunnel_port = port - if headers: - self._tunnel_headers = headers - else: - self._tunnel_headers.clear() - - def _set_hostport(self, host, port): - if port is None: - i = host.rfind(':') - j = host.rfind(']') # ipv6 addresses have [...] - if i > j: - try: - port = int(host[i+1:]) - except ValueError: - if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ - port = self.default_port - else: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) - host = host[:i] - else: - port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] - self.host = host - self.port = port - - def set_debuglevel(self, level): - self.debuglevel = level - - def _tunnel(self): - self._set_hostport(self._tunnel_host, self._tunnel_port) - connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) - connect_bytes = connect_str.encode("ascii") - self.send(connect_bytes) - for header, value in self._tunnel_headers.items(): - header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("latin-1") - self.send(header_bytes) - self.send(bytes(b'\r\n')) - - response = self.response_class(self.sock, method=self._method) - (version, code, message) = response._read_status() - - if code != 200: - self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - # for sites which EOF without sending a trailer - break - if line in (b'\r\n', b'\n', b''): - break - - def connect(self): - """Connect to the host and port specified in __init__.""" - self.sock = socket_create_connection((self.host,self.port), - self.timeout, self.source_address) - if self._tunnel_host: - self._tunnel() - - def close(self): - """Close the connection to the HTTP server.""" - if self.sock: - self.sock.close() # close it manually... there may be other refs - self.sock = None - if self.__response: - self.__response.close() - self.__response = None - self.__state = _CS_IDLE - - def send(self, data): - """Send `data' to the server. - ``data`` can be a string object, a bytes object, an array object, a - file-like object that supports a .read() method, or an iterable object. - """ - - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected() - - if self.debuglevel > 0: - print("send:", repr(data)) - blocksize = 8192 - # Python 2.7 array objects have a read method which is incompatible - # with the 2-arg calling syntax below. - if hasattr(data, "read") and not isinstance(data, array): - if self.debuglevel > 0: - print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") - while 1: - datablock = data.read(blocksize) - if not datablock: - break - if encode: - datablock = datablock.encode("iso-8859-1") - self.sock.sendall(datablock) - return - try: - self.sock.sendall(data) - except TypeError: - if isinstance(data, Iterable): - for d in data: - self.sock.sendall(d) - else: - raise TypeError("data should be a bytes-like object " - "or an iterable, got %r" % type(data)) - - def _output(self, s): - """Add a line of output to the current request buffer. - - Assumes that the line does *not* end with \\r\\n. - """ - self._buffer.append(s) - - def _send_output(self, message_body=None): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - """ - self._buffer.extend((bytes(b""), bytes(b""))) - msg = bytes(b"\r\n").join(self._buffer) - del self._buffer[:] - # If msg and message_body are sent in a single send() call, - # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. - if isinstance(message_body, bytes): - msg += message_body - message_body = None - self.send(msg) - if message_body is not None: - # message_body was not a string (i.e. it is a file), and - # we must run the risk of Nagle. - self.send(message_body) - - def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): - """Send a request to the server. - - `method' specifies an HTTP request method, e.g. 'GET'. - `url' specifies the object being requested, e.g. '/index.html'. - `skip_host' if True does not add automatically a 'Host:' header - `skip_accept_encoding' if True does not add automatically an - 'Accept-Encoding:' header - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # in certain cases, we cannot issue another request on this connection. - # this occurs when: - # 1) we are in the process of sending a request. (_CS_REQ_STARTED) - # 2) a response to a previous request has signalled that it is going - # to close the connection upon completion. - # 3) the headers for the previous response have not been read, thus - # we cannot determine whether point (2) is true. (_CS_REQ_SENT) - # - # if there is no prior response, then we can request at will. - # - # if point (2) is true, then we will have passed the socket to the - # response (effectively meaning, "there is no prior response"), and - # will open a new one when a new request is made. - # - # Note: if a prior response exists, then we *can* start a new request. - # We are not allowed to begin fetching the response to this new - # request, however, until that prior response is complete. - # - if self.__state == _CS_IDLE: - self.__state = _CS_REQ_STARTED - else: - raise CannotSendRequest(self.__state) - - # Save the method we use, we need it later in the response phase - self._method = method - if not url: - url = '/' - request = '%s %s %s' % (method, url, self._http_vsn_str) - - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) - - if self._http_vsn == 11: - # Issue some standard headers for better HTTP/1.1 compliance - - if not skip_host: - # this header is issued *only* for HTTP/1.1 - # connections. more specifically, this means it is - # only issued when the client uses the new - # HTTPConnection() class. backwards-compat clients - # will be using HTTP/1.0 and those clients may be - # issuing this header themselves. we should NOT issue - # it twice; some web servers (such as Apache) barf - # when they see two Host: headers - - # If we need a non-standard port,include it in the - # header. If the request is going through a proxy, - # but the host of the actual URL, not the host of the - # proxy. - - netloc = '' - if url.startswith('http'): - nil, netloc, nil, nil, nil = urlsplit(url) - - if netloc: - try: - netloc_enc = netloc.encode("ascii") - except UnicodeEncodeError: - netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) - else: - try: - host_enc = self.host.encode("ascii") - except UnicodeEncodeError: - host_enc = self.host.encode("idna") - - # As per RFC 273, IPv6 address should be wrapped with [] - # when used as Host header - - if self.host.find(':') >= 0: - host_enc = bytes(b'[' + host_enc + b']') - - if self.port == self.default_port: - self.putheader('Host', host_enc) - else: - host_enc = host_enc.decode("ascii") - self.putheader('Host', "%s:%s" % (host_enc, self.port)) - - # note: we are assuming that clients will not attempt to set these - # headers since *this* library must deal with the - # consequences. this also means that when the supporting - # libraries are updated to recognize other forms, then this - # code should be changed (removed or updated). - - # we only want a Content-Encoding of "identity" since we don't - # support encodings such as x-gzip or x-deflate. - if not skip_accept_encoding: - self.putheader('Accept-Encoding', 'identity') - - # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" - #self.putheader('TE', 'chunked') - - # if TE is supplied in the header, then it must appear in a - # Connection header. - #self.putheader('Connection', 'TE') - - else: - # For HTTP/1.0, the server will assume "not chunked" - pass - - def putheader(self, header, *values): - """Send a request header line to the server. - - For example: h.putheader('Accept', 'text/html') - """ - if self.__state != _CS_REQ_STARTED: - raise CannotSendHeader() - - if hasattr(header, 'encode'): - header = header.encode('ascii') - values = list(values) - for i, one_value in enumerate(values): - if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin-1') - elif isinstance(one_value, int): - values[i] = str(one_value).encode('ascii') - value = bytes(b'\r\n\t').join(values) - header = header + bytes(b': ') + value - self._output(header) - - def endheaders(self, message_body=None): - """Indicate that the last header line has been sent to the server. - - This method sends the request to the server. The optional message_body - argument can be used to pass a message body associated with the - request. The message body will be sent in the same packet as the - message headers if it is a string, otherwise it is sent as a separate - packet. - """ - if self.__state == _CS_REQ_STARTED: - self.__state = _CS_REQ_SENT - else: - raise CannotSendHeader() - self._send_output(message_body) - - def request(self, method, url, body=None, headers={}): - """Send a complete request to the server.""" - self._send_request(method, url, body, headers) - - def _set_content_length(self, body): - # Set the content-length based on the body. - thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length', thelen) - - def _send_request(self, method, url, body, headers): - # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) - skips = {} - if 'host' in header_names: - skips['skip_host'] = 1 - if 'accept-encoding' in header_names: - skips['skip_accept_encoding'] = 1 - - self.putrequest(method, url, **skips) - - if body is not None and ('content-length' not in header_names): - self._set_content_length(body) - for hdr, value in headers.items(): - self.putheader(hdr, value) - if isinstance(body, str): - # RFC 2616 Section 3.7.1 says that text default has a - # default charset of iso-8859-1. - body = body.encode('iso-8859-1') - self.endheaders(body) - - def getresponse(self): - """Get the response from the server. - - If the HTTPConnection is in the correct state, returns an - instance of HTTPResponse or of whatever object is returned by - class the response_class variable. - - If a request has not been sent or if a previous response has - not be handled, ResponseNotReady is raised. If the HTTP - response indicates that the connection should be closed, then - it will be closed before the response is returned. When the - connection is closed, the underlying socket is closed. - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - # if a prior response exists, then it must be completed (otherwise, we - # cannot read this response's header to determine the connection-close - # behavior) - # - # note: if a prior response existed, but was connection-close, then the - # socket and response were made independent of this HTTPConnection - # object since a new request requires that we open a whole new - # connection - # - # this means the prior response had one of two states: - # 1) will_close: this connection was reset and the prior socket and - # response operate independently - # 2) persistent: the response was retained and we await its - # isclosed() status to become true. - # - if self.__state != _CS_REQ_SENT or self.__response: - raise ResponseNotReady(self.__state) - - if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel, - method=self._method) - else: - response = self.response_class(self.sock, method=self._method) - - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE - - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response - - return response - -try: - import ssl - from ssl import SSLContext -except ImportError: - pass -else: - class HTTPSConnection(HTTPConnection): - "This class allows communication via SSL." - - default_port = HTTPS_PORT - - # XXX Should key_file and cert_file be deprecated in favour of context? - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, **_3to2kwargs): - if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname'] - else: check_hostname = None - if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context'] - else: context = None - super(HTTPSConnection, self).__init__(host, port, strict, timeout, - source_address) - self.key_file = key_file - self.cert_file = cert_file - if context is None: - # Some reasonable defaults - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - context.options |= ssl.OP_NO_SSLv2 - will_verify = context.verify_mode != ssl.CERT_NONE - if check_hostname is None: - check_hostname = will_verify - elif check_hostname and not will_verify: - raise ValueError("check_hostname needs a SSL context with " - "either CERT_OPTIONAL or CERT_REQUIRED") - if key_file or cert_file: - context.load_cert_chain(cert_file, key_file) - self._context = context - self._check_hostname = check_hostname - - def connect(self): - "Connect to a host on a given (SSL) port." - - sock = socket_create_connection((self.host, self.port), - self.timeout, self.source_address) - - if self._tunnel_host: - self.sock = sock - self._tunnel() - - server_hostname = self.host if ssl.HAS_SNI else None - self.sock = self._context.wrap_socket(sock, - server_hostname=server_hostname) - try: - if self._check_hostname: - ssl.match_hostname(self.sock.getpeercert(), self.host) - except Exception: - self.sock.shutdown(socket.SHUT_RDWR) - self.sock.close() - raise - - __all__.append("HTTPSConnection") - - - # ###################################### - # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext - # # doesn't exist in the Py2.7 stdlib - # class HTTPSConnection(HTTPConnection): - # "This class allows communication via SSL." - - # default_port = HTTPS_PORT - - # def __init__(self, host, port=None, key_file=None, cert_file=None, - # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - # source_address=None): - # HTTPConnection.__init__(self, host, port, strict, timeout, - # source_address) - # self.key_file = key_file - # self.cert_file = cert_file - - # def connect(self): - # "Connect to a host on a given (SSL) port." - - # sock = socket_create_connection((self.host, self.port), - # self.timeout, self.source_address) - # if self._tunnel_host: - # self.sock = sock - # self._tunnel() - # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) - - # __all__.append("HTTPSConnection") - # ###################################### - - -class HTTPException(Exception): - # Subclasses that define an __init__ must call Exception.__init__ - # or define self.args. Otherwise, str() will fail. - pass - -class NotConnected(HTTPException): - pass - -class InvalidURL(HTTPException): - pass - -class UnknownProtocol(HTTPException): - def __init__(self, version): - self.args = version, - self.version = version - -class UnknownTransferEncoding(HTTPException): - pass - -class UnimplementedFileMode(HTTPException): - pass - -class IncompleteRead(HTTPException): - def __init__(self, partial, expected=None): - self.args = partial, - self.partial = partial - self.expected = expected - def __repr__(self): - if self.expected is not None: - e = ', %i more expected' % self.expected - else: - e = '' - return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) - def __str__(self): - return repr(self) - -class ImproperConnectionState(HTTPException): - pass - -class CannotSendRequest(ImproperConnectionState): - pass - -class CannotSendHeader(ImproperConnectionState): - pass - -class ResponseNotReady(ImproperConnectionState): - pass - -class BadStatusLine(HTTPException): - def __init__(self, line): - if not line: - line = repr(line) - self.args = line, - self.line = line - -class LineTooLong(HTTPException): - def __init__(self, line_type): - HTTPException.__init__(self, "got more than %d bytes when reading %s" - % (_MAXLINE, line_type)) - -# for backwards compatibility -error = HTTPException diff --git a/contrib/python/future/future/backports/http/cookiejar.py b/contrib/python/future/future/backports/http/cookiejar.py deleted file mode 100644 index af3ef4151ae..00000000000 --- a/contrib/python/future/future/backports/http/cookiejar.py +++ /dev/null @@ -1,2110 +0,0 @@ -r"""HTTP cookie handling for web clients. - -This is a backport of the Py3.3 ``http.cookiejar`` module for -python-future. - -This module has (now fairly distant) origins in Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - -Class diagram (note that BSDDBCookieJar and the MSIE* classes are not -distributed with the Python standard library, but are available from -http://wwwsearch.sf.net/): - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -""" - -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import filter, int, map, open, str -from future.utils import as_native_str, PY2 - -__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', - 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] - -import copy -import datetime -import re -if PY2: - re.ASCII = 0 -import time -from future.backports.urllib.parse import urlparse, urlsplit, quote -from future.backports.http.client import HTTP_PORT -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -from calendar import timegm - -debug = False # set to True to enable debugging via the logging module -logger = None - -def _debug(*args): - if not debug: - return - global logger - if not logger: - import logging - logger = logging.getLogger("http.cookiejar") - return logger.debug(*args) - - -DEFAULT_HTTP_PORT = str(HTTP_PORT) -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") - -def _warn_unhandled_exception(): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. Warn if any - # exceptions are caught there. - import io, warnings, traceback - f = io.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) - - -# Date/time conversion -# ----------------------------------------------------------------------------- - -EPOCH_YEAR = 1970 -def _timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( - DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], - dt.year, dt.hour, dt.minute, dt.second) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) -def offset_from_tz_string(tz): - offset = None - if tz in UTC_ZONES: - offset = 0 - else: - m = TIMEZONE_RE.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - # translate month name to number - # month numbers start with 1 (January) - try: - mon = MONTHS_LOWER.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - yr = int(yr) - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = _timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - -STRICT_DATE_RE = re.compile( - r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) -WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) -LOOSE_HTTP_DATE_RE = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X | re.ASCII) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = STRICT_DATE_RE.search(text) - if m: - g = m.groups() - mon = MONTHS_LOWER.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return _timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = WEEKDAY_RE.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = LOOSE_HTTP_DATE_RE.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - -ISO_DATE_RE = re.compile( - """^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X | re. ASCII) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = ISO_DATE_RE.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -# Header parsing -# ----------------------------------------------------------------------------- - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") -HEADER_ESCAPE_RE = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1*<any CHAR except CTLs or separators> - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = <any TEXT except <">> - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert not isinstance(header_values, str) - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = HEADER_TOKEN_RE.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = HEADER_QUOTED_VALUE_RE.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = HEADER_ESCAPE_RE.sub(r"\1", value) - else: - m = HEADER_VALUE_RE.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse (almost) of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - for ii, param in enumerate(re.split(r";\s*", ns_header)): - param = param.rstrip() - if param == "": continue - if "=" not in param: - k, v = param, None - else: - k, v = re.split(r"\s*=\s*", param, 1) - k = k.lstrip() - if ii != 0: - lc = k.lower() - if lc in known_attrs: - k = lc - if k == "version": - # This is an RFC 2109 cookie. - v = strip_quotes(v) - version_set = True - if k == "expires": - # convert expires date to seconds since epoch - v = http2time(strip_quotes(v)) # None if invalid - pairs.append((k, v)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -IPV4_RE = re.compile(r"\.\d+$", re.ASCII) -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - if IPV4_RE.search(text): - return False - return True - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$", re.ASCII) -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = cut_port_re.sub("", host, 1) - return host.lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name). - - As defined by RFC 2965, except both are lowercased. - - """ - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def request_path(request): - """Path component of request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - parts = urlsplit(url) - path = escape_path(parts.path) - if not path.startswith("/"): - # fix bad RFC 2396 absoluteURI - path = "/" + path - return path - -def request_port(request): - host = request.host - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - _debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - path = quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host(request) - if not domain_match(req_host, reach(request.get_origin_req_host())): - return True - else: - return False - - -class Cookie(object): - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return name in self._rest - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - - def is_expired(self, now=None): - if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "<Cookie %s for %s>" % (namevalue, limit) - - @as_native_str() - def __repr__(self): - args = [] - for name in ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ): - attr = getattr(self, name) - ### Python-Future: - # Avoid u'...' prefixes for unicode strings: - if isinstance(attr, str): - attr = str(attr) - ### - args.append(str("%s=%s") % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy(object): - """Defines which cookies get accepted from and returned to server. - - May also modify cookies, though this is probably a bad idea. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server.""" - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies.""" - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """Constructor arguments should be passed as keyword arguments only.""" - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override .set_ok(), be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - _debug(" Set-Cookie2 without version attribute (%s=%s)", - cookie.name, cookie.value) - return False - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - _debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - _debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - _debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - _debug(" domain %s is not in user allow-list", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host(request) - domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - # XXX This should probably be compared with the Konqueror - # (kcookiejar.cpp) and Mozilla implementations, but it's a - # losing battle. - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if sld.lower() in ("co", "ac", "com", "edu", "org", "net", - "gov", "mil", "int", "aero", "biz", "cat", "coop", - "info", "jobs", "mobi", "museum", "name", "pro", - "travel", "eu") and len(tld) == 2: - # domain like .co.uk - _debug(" country-code second level domain %s", domain) - return False - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - _debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - _debug(" effective request-host %s (even with added " - "initial dot) does not end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - _debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - _debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - _debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - _debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override .return_ok(), be sure to call this method. If it - returns false, so should your subclass (assuming your subclass wants to - be more strict about which cookies to return). - - """ - # Path has already been checked by .path_return_ok(), and domain - # blocking done by .domain_return_ok(). - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - for n in "version", "verifiability", "secure", "expires", "port", "domain": - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.type != "https": - _debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - _debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - _debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - _debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - _debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - _debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - req_host, erhn = eff_request_host(request) - if not req_host.startswith("."): - req_host = "."+req_host - if not erhn.startswith("."): - erhn = "."+erhn - if not (req_host.endswith(domain) or erhn.endswith(domain)): - #_debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - _debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - _debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - _debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - _debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = sorted(adict.keys()) - return map(adict.get, keys) - -def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: - mapping = False - try: - obj.items - except AttributeError: - pass - else: - mapping = True - for subobj in deepvalues(obj): - yield subobj - if not mapping: - yield obj - - -# Used as second parameter to dict.get() method, to distinguish absent -# dict key from one with a None value. -class Absent(object): pass - -class CookieJar(object): - """Collection of HTTP cookies. - - You may not need to know about this class: try - urllib.request.build_opener(HTTPCookieProcessor).open(url). - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) - - def __init__(self, policy=None): - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies_lock = _threading.RLock() - self._cookies = {} - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - _debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - _debug(" not returning cookie") - continue - _debug(" it's a match") - cookies.append(cookie) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - like ['foo="bar"; $Path="/"', ...] - - The $Version attribute is also added when appropriate (currently only - once per request). - - """ - # add cookies in order of most specific (ie. longest) path first - cookies.sort(key=lambda a: len(a.path), reverse=True) - - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib.request.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - """ - _debug("add_cookie_header") - self._cookies_lock.acquire() - try: - - self._policy._now = self._now = int(time.time()) - - cookies = self._cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - finally: - self._cookies_lock.release() - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if k in standard: - # only first value is significant - continue - if k == "domain": - if v is None: - _debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - _debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - try: - v = int(v) - except ValueError: - _debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ("port", "comment", "commenturl")): - _debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - elif expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(domain, path, name) - except KeyError: - pass - _debug("Expiring cookie, domain='%s', path='%s', name='%s'", - domain, path, name) - return None - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) - if rfc2109_as_ns is None: - rfc2109_as_ns = not self._policy.rfc2965 - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_ns: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object.""" - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.get_all("Set-Cookie2", []) - ns_hdrs = headers.get_all("Set-Cookie", []) - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except Exception: - _warn_unhandled_exception() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except Exception: - _warn_unhandled_exception() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return key not in lookup - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so.""" - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - - finally: - self._cookies_lock.release() - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set.""" - c = self._cookies - self._cookies_lock.acquire() - try: - if cookie.domain not in c: c[cookie.domain] = {} - c2 = c[cookie.domain] - if cookie.path not in c2: c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - finally: - self._cookies_lock.release() - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request.""" - _debug("extract_cookies: %s", response.info()) - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - for cookie in self.make_cookies(response, request): - if self._policy.set_ok(cookie, request): - _debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - finally: - self._cookies_lock.release() - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Note that the .save() method won't save session cookies anyway, unless - you ask otherwise by passing a true ignore_discard argument. - - """ - self._cookies_lock.acquire() - try: - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the - .save() method won't save expired cookies anyway (unless you ask - otherwise by passing a true ignore_expires argument). - - """ - self._cookies_lock.acquire() - try: - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def __iter__(self): - return deepvalues(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - @as_native_str() - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -# derives from IOError for backwards-compatibility with Python 2.4.0 -class LoadError(IOError): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file.""" - - def __init__(self, filename=None, delayload=False, policy=None): - """ - Cookies are NOT loaded from the named file until either the .load() or - .revert() method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None: - try: - filename+"" - except: - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file.""" - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - self._cookies_lock.acquire() - try: - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise - - finally: - self._cookies_lock.release() - - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in an the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - - keys = sorted(cookie._rest.keys()) - for k in keys: - h.append((k, str(cookie._rest[k]))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of "Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl libary, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - finally: - f.close() - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not self.magic_re.search(magic): - msg = ("%r does not look like a Set-Cookie3 (LWP) format " - "file" % filename) - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Set-Cookie3 format file %r: %r" % - (filename, line)) - - -class MozillaCookieJar(FileCookieJar): - """ - - WARNING: you may want to backup your browser's cookies file if you use - this class to save cookies. I *think* it works, but there have been - bugs in the past! - - This class differs from CookieJar only in the format it uses to save and - load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. - - Don't expect cookies saved while the browser is running to be noticed by - the browser (in fact, Mozilla on unix will overwrite your saved cookies if - you change them on disk while it's running; on Windows, you probably can't - save at all while the browser is running). - - Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to - Netscape cookies on saving. - - In particular, the cookie version and port number information is lost, - together with information about whether or not Path, Port and Discard were - specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the - domain as set in the HTTP header started with a dot (yes, I'm aware some - domains in Netscape files start with a dot and some don't -- trust me, you - really don't want to know any more about this). - - Note that though Mozilla and Netscape use the same format, they use - slightly different headers. The class saves cookies using the Netscape - header by default (Mozilla can cope with that). - - """ - magic_re = re.compile("#( Netscape)? HTTP Cookie File") - header = """\ -# Netscape HTTP Cookie File -# http://www.netscape.com/newsref/std/cookie_spec.html -# This is a generated file! Do not edit. - -""" - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - now = time.time() - - magic = f.readline() - if not self.magic_re.search(magic): - f.close() - raise LoadError( - "%r does not look like a Netscape format cookies file" % - filename) - - try: - while 1: - line = f.readline() - if line == "": break - - # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] - - # skip comments and blank lines XXX what is $ for? - if (line.strip().startswith(("#", "$")) or - line.strip() == ""): - continue - - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t") - secure = (secure == "TRUE") - domain_specified = (domain_specified == "TRUE") - if name == "": - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = value - value = None - - initial_dot = domain.startswith(".") - assert domain_specified == initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - # assume path_specified is false - c = Cookie(0, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - {}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Netscape format cookies file %r: %r" % - (filename, line)) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - f.write(self.header) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: secure = "TRUE" - else: secure = "FALSE" - if cookie.domain.startswith("."): initial_dot = "TRUE" - else: initial_dot = "FALSE" - if cookie.expires is not None: - expires = str(cookie.expires) - else: - expires = "" - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - "\t".join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value])+ - "\n") - finally: - f.close() diff --git a/contrib/python/future/future/backports/http/cookies.py b/contrib/python/future/future/backports/http/cookies.py deleted file mode 100644 index 8bb61e22c4b..00000000000 --- a/contrib/python/future/future/backports/http/cookies.py +++ /dev/null @@ -1,598 +0,0 @@ -#### -# Copyright 2000 by Timothy O'Malley <[email protected]> -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software -# and its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Timothy O'Malley not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS -# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR -# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. -# -#### -# -# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp -# by Timothy O'Malley <[email protected]> -# -# Cookie.py is a Python module for the handling of HTTP -# cookies as a Python dictionary. See RFC 2109 for more -# information on cookies. -# -# The original idea to treat Cookies as a dictionary came from -# Dave Mitchell ([email protected]) in 1995, when he released the -# first version of nscookie.py. -# -#### - -r""" -http.cookies module ported to python-future from Py3.3 - -Here's a sample session to show how to use this module. -At the moment, this is the only documentation. - -The Basics ----------- - -Importing is easy... - - >>> from http import cookies - -Most of the time you start by creating a cookie. - - >>> C = cookies.SimpleCookie() - -Once you've created your Cookie, you can add values just as if it were -a dictionary. - - >>> C = cookies.SimpleCookie() - >>> C["fig"] = "newton" - >>> C["sugar"] = "wafer" - >>> C.output() - 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' - -Notice that the printable representation of a Cookie is the -appropriate format for a Set-Cookie: header. This is the -default behavior. You can change the header and printed -attributes by using the .output() function - - >>> C = cookies.SimpleCookie() - >>> C["rocky"] = "road" - >>> C["rocky"]["path"] = "/cookie" - >>> print(C.output(header="Cookie:")) - Cookie: rocky=road; Path=/cookie - >>> print(C.output(attrs=[], header="Cookie:")) - Cookie: rocky=road - -The load() method of a Cookie extracts cookies from a string. In a -CGI script, you would use this method to extract the cookies from the -HTTP_COOKIE environment variable. - - >>> C = cookies.SimpleCookie() - >>> C.load("chips=ahoy; vienna=finger") - >>> C.output() - 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' - -The load() method is darn-tootin smart about identifying cookies -within a string. Escaped quotation marks, nested semicolons, and other -such trickeries do not confuse it. - - >>> C = cookies.SimpleCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') - >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" - -Each element of the Cookie also supports all of the RFC 2109 -Cookie attributes. Here's an example which sets the Path -attribute. - - >>> C = cookies.SimpleCookie() - >>> C["oreo"] = "doublestuff" - >>> C["oreo"]["path"] = "/" - >>> print(C) - Set-Cookie: oreo=doublestuff; Path=/ - -Each dictionary element has a 'value' attribute, which gives you -back the value associated with the key. - - >>> C = cookies.SimpleCookie() - >>> C["twix"] = "none for you" - >>> C["twix"].value - 'none for you' - -The SimpleCookie expects that all values should be standard strings. -Just to be sure, SimpleCookie invokes the str() builtin to convert -the value to a string, when the values are set dictionary-style. - - >>> C = cookies.SimpleCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - '7' - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' - -Finis. -""" -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import chr, dict, int, str -from future.utils import PY2, as_native_str - -# -# Import our required modules -# -import re -if PY2: - re.ASCII = 0 # for py2 compatibility -import string - -__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] - -_nulljoin = ''.join -_semispacejoin = '; '.join -_spacejoin = ' '.join - -# -# Define an exception visible to External modules -# -class CookieError(Exception): - pass - - -# These quoting routines conform to the RFC2109 specification, which in -# turn references the character definitions from RFC2068. They provide -# a two-way quoting algorithm. Any non-text character is translated -# into a 4 character sequence: a forward-slash followed by the -# three-digit octal equivalent of the character. Any '\' or '"' is -# quoted with a preceeding '\' slash. -# -# These are taken from RFC2068 and RFC2109. -# _LegalChars is the list of chars which don't require "'s -# _Translator hash-table for fast quoting -# -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" -_Translator = { - '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', - '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', - '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', - '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', - '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', - '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', - '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', - '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', - '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', - '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', - '\036' : '\\036', '\037' : '\\037', - - # Because of the way browsers really handle cookies (as opposed - # to what the RFC says) we also encode , and ; - - ',' : '\\054', ';' : '\\073', - - '"' : '\\"', '\\' : '\\\\', - - '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', - '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', - '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', - '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', - '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', - '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', - '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', - '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', - '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', - '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', - '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', - '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', - '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', - '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', - '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', - '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', - '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', - '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', - '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', - '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', - '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', - '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', - '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', - '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', - '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', - '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', - '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', - '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', - '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', - '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', - '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', - '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', - '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', - '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', - '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', - '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', - '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', - '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', - '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', - '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', - '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', - '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', - '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' - } - -def _quote(str, LegalChars=_LegalChars): - r"""Quote a string for use in a cookie header. - - If the string does not need to be double-quoted, then just return the - string. Otherwise, surround the string in doublequotes and quote - (with a \) special characters. - """ - if all(c in LegalChars for c in str): - return str - else: - return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' - - -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") - -def _unquote(mystr): - # If there aren't any doublequotes, - # then there can't be any special characters. See RFC 2109. - if len(mystr) < 2: - return mystr - if mystr[0] != '"' or mystr[-1] != '"': - return mystr - - # We have to assume that we must decode this string. - # Down to work. - - # Remove the "s - mystr = mystr[1:-1] - - # Check for special sequences. Examples: - # \012 --> \n - # \" --> " - # - i = 0 - n = len(mystr) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(mystr, i) - q_match = _QuotePatt.search(mystr, i) - if not o_match and not q_match: # Neither matched - res.append(mystr[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(mystr[i:k]) - res.append(mystr[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(mystr[i:j]) - res.append(chr(int(mystr[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) - -# The _getdate() routine is used to set the expiration time in the cookie's HTTP -# header. By default, _getdate() returns the current time in the appropriate -# "expires" format for a Set-Cookie header. The one optional argument is an -# offset from now, in seconds. For example, an offset of -3600 means "one hour -# ago". The offset may be a floating point number. -# - -_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - -_monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - -def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): - from time import gmtime, time - now = time() - year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) - return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ - (weekdayname[wd], day, monthname[month], year, hh, mm, ss) - - -class Morsel(dict): - """A class to hold ONE (key, value) pair. - - In a cookie, each such pair may have several attributes, so this class is - used to keep the attributes associated with the appropriate key,value pair. - This class also includes a coded_value attribute, which is used to hold - the network representation of the value. This is most useful when Python - objects are pickled for network transit. - """ - # RFC 2109 lists these attributes as reserved: - # path comment domain - # max-age secure version - # - # For historical reasons, these attributes are also reserved: - # expires - # - # This is an extension from Microsoft: - # httponly - # - # This dictionary provides a mapping from the lowercase - # variant on the left to the appropriate traditional - # formatting on the right. - _reserved = { - "expires" : "expires", - "path" : "Path", - "comment" : "Comment", - "domain" : "Domain", - "max-age" : "Max-Age", - "secure" : "secure", - "httponly" : "httponly", - "version" : "Version", - } - - _flags = set(['secure', 'httponly']) - - def __init__(self): - # Set defaults - self.key = self.value = self.coded_value = None - - # Set default attributes - for key in self._reserved: - dict.__setitem__(self, key, "") - - def __setitem__(self, K, V): - K = K.lower() - if not K in self._reserved: - raise CookieError("Invalid Attribute %s" % K) - dict.__setitem__(self, K, V) - - def isReservedKey(self, K): - return K.lower() in self._reserved - - def set(self, key, val, coded_val, LegalChars=_LegalChars): - # First we verify that the key isn't a reserved word - # Second we make sure it only contains legal characters - if key.lower() in self._reserved: - raise CookieError("Attempt to set a reserved key: %s" % key) - if any(c not in LegalChars for c in key): - raise CookieError("Illegal key value: %s" % key) - - # It's a good key, so save it. - self.key = key - self.value = val - self.coded_value = coded_val - - def output(self, attrs=None, header="Set-Cookie:"): - return "%s %s" % (header, self.OutputString(attrs)) - - __str__ = output - - @as_native_str() - def __repr__(self): - if PY2 and isinstance(self.value, unicode): - val = str(self.value) # make it a newstr to remove the u prefix - else: - val = self.value - return '<%s: %s=%s>' % (self.__class__.__name__, - str(self.key), repr(val)) - - def js_output(self, attrs=None): - # Print javascript - return """ - <script type="text/javascript"> - <!-- begin hiding - document.cookie = \"%s\"; - // end hiding --> - </script> - """ % (self.OutputString(attrs).replace('"', r'\"')) - - def OutputString(self, attrs=None): - # Build up our result - # - result = [] - append = result.append - - # First, the key=value pair - append("%s=%s" % (self.key, self.coded_value)) - - # Now add any defined attributes - if attrs is None: - attrs = self._reserved - items = sorted(self.items()) - for key, value in items: - if value == "": - continue - if key not in attrs: - continue - if key == "expires" and isinstance(value, int): - append("%s=%s" % (self._reserved[key], _getdate(value))) - elif key == "max-age" and isinstance(value, int): - append("%s=%d" % (self._reserved[key], value)) - elif key == "secure": - append(str(self._reserved[key])) - elif key == "httponly": - append(str(self._reserved[key])) - else: - append("%s=%s" % (self._reserved[key], value)) - - # Return the result - return _semispacejoin(result) - - -# -# Pattern for finding cookie -# -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. -# - -_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" -_CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern - (?P<key> # Start of group 'key' - """ + _LegalCharsPatt + r"""+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P<val> # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string - | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr - | # or - """ + _LegalCharsPatt + r"""* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII) # May be removed if safe. - - -# At long last, here is the cookie class. Using this class is almost just like -# using a dictionary. See this module's docstring for example usage. -# -class BaseCookie(dict): - """A container class for a set of Morsels.""" - - def value_decode(self, val): - """real_value, coded_value = value_decode(STRING) - Called prior to setting a cookie's value from the network - representation. The VALUE is the value read from HTTP - header. - Override this function to modify the behavior of cookies. - """ - return val, val - - def value_encode(self, val): - """real_value, coded_value = value_encode(VALUE) - Called prior to setting a cookie's value from the dictionary - representation. The VALUE is the value being assigned. - Override this function to modify the behavior of cookies. - """ - strval = str(val) - return strval, strval - - def __init__(self, input=None): - if input: - self.load(input) - - def __set(self, key, real_value, coded_value): - """Private method for setting a cookie's value""" - M = self.get(key, Morsel()) - M.set(key, real_value, coded_value) - dict.__setitem__(self, key, M) - - def __setitem__(self, key, value): - """Dictionary style assignment.""" - rval, cval = self.value_encode(value) - self.__set(key, rval, cval) - - def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): - """Return a string suitable for HTTP.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.output(attrs, header)) - return sep.join(result) - - __str__ = output - - @as_native_str() - def __repr__(self): - l = [] - items = sorted(self.items()) - for key, value in items: - if PY2 and isinstance(value.value, unicode): - val = str(value.value) # make it a newstr to remove the u prefix - else: - val = value.value - l.append('%s=%s' % (str(key), repr(val))) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) - - def js_output(self, attrs=None): - """Return a string suitable for JavaScript.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.js_output(attrs)) - return _nulljoin(result) - - def load(self, rawdata): - """Load cookies from a string (presumably HTTP_COOKIE) or - from a dictionary. Loading cookies from a dictionary 'd' - is equivalent to calling: - map(Cookie.__setitem__, d.keys(), d.values()) - """ - if isinstance(rawdata, str): - self.__parse_string(rawdata) - else: - # self.update() wouldn't call our custom __setitem__ - for key, value in rawdata.items(): - self[key] = value - return - - def __parse_string(self, mystr, patt=_CookiePattern): - i = 0 # Our starting point - n = len(mystr) # Length of string - M = None # current morsel - - while 0 <= i < n: - # Start looking for a cookie - match = patt.search(mystr, i) - if not match: - # No more cookies - break - - key, value = match.group("key"), match.group("val") - - i = match.end(0) - - # Parse the key, value in case it's metainfo - if key[0] == "$": - # We ignore attributes which pertain to the cookie - # mechanism as a whole. See RFC 2109. - # (Does anyone care?) - if M: - M[key[1:]] = value - elif key.lower() in Morsel._reserved: - if M: - if value is None: - if key.lower() in Morsel._flags: - M[key] = True - else: - M[key] = _unquote(value) - elif value is not None: - rval, cval = self.value_decode(value) - self.__set(key, rval, cval) - M = self[key] - - -class SimpleCookie(BaseCookie): - """ - SimpleCookie supports strings as cookie values. When setting - the value using the dictionary assignment notation, SimpleCookie - calls the builtin str() to convert the value to a string. Values - received from HTTP are kept as strings. - """ - def value_decode(self, val): - return _unquote(val), val - - def value_encode(self, val): - strval = str(val) - return strval, _quote(strval) diff --git a/contrib/python/future/future/backports/http/server.py b/contrib/python/future/future/backports/http/server.py deleted file mode 100644 index b1c11e0c73a..00000000000 --- a/contrib/python/future/future/backports/http/server.py +++ /dev/null @@ -1,1226 +0,0 @@ -"""HTTP server classes. - -From Python 3.3 - -Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see -SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and CGIHTTPRequestHandler for CGI scripts. - -It does, however, optionally implement HTTP/1.1 persistent connections, -as of version 0.3. - -Notes on CGIHTTPRequestHandler ------------------------------- - -This class implements GET and POST requests to cgi-bin scripts. - -If the os.fork() function is not present (e.g. on Windows), -subprocess.Popen() is used as a fallback, with slightly altered semantics. - -In all cases, the implementation is intentionally naive -- all -requests are executed synchronously. - -SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL --- it may execute arbitrary Python code or external programs. - -Note that status code 200 is sent prior to execution of a CGI script, so -scripts cannot send other status codes such as 302 (redirect). - -XXX To do: - -- log requests even later (to capture byte count) -- log user-agent header and other interesting goodies -- send error log to separate file -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import utils -from future.builtins import * - - -# See also: -# -# HTTP Working Group T. Berners-Lee -# INTERNET-DRAFT R. T. Fielding -# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen -# Expires September 8, 1995 March 8, 1995 -# -# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt -# -# and -# -# Network Working Group R. Fielding -# Request for Comments: 2616 et al -# Obsoletes: 2068 June 1999 -# Category: Standards Track -# -# URL: http://www.faqs.org/rfcs/rfc2616.html - -# Log files -# --------- -# -# Here's a quote from the NCSA httpd docs about log file format. -# -# | The logfile format is as follows. Each line consists of: -# | -# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb -# | -# | host: Either the DNS name or the IP number of the remote client -# | rfc931: Any information returned by identd for this person, -# | - otherwise. -# | authuser: If user sent a userid for authentication, the user name, -# | - otherwise. -# | DD: Day -# | Mon: Month (calendar name) -# | YYYY: Year -# | hh: hour (24-hour format, the machine's timezone) -# | mm: minutes -# | ss: seconds -# | request: The first line of the HTTP request as sent by the client. -# | ddd: the status code returned by the server, - if not available. -# | bbbb: the total number of bytes sent, -# | *not including the HTTP/1.0 header*, - if not available -# | -# | You can determine the name of the file accessed through request. -# -# (Actually, the latter is only true if you know the server configuration -# at the time the request was made!) - -__version__ = "0.6" - -__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] - -from future.backports import html -from future.backports.http import client as http_client -from future.backports.urllib import parse as urllib_parse -from future.backports import socketserver - -import io -import mimetypes -import os -import posixpath -import select -import shutil -import socket # For gethostbyaddr() -import sys -import time -import copy -import argparse - - -# Default error message template -DEFAULT_ERROR_MESSAGE = """\ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> - <head> - <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> - <title>Error response</title> - </head> - <body> - <h1>Error response</h1> - <p>Error code: %(code)d</p> - <p>Message: %(message)s.</p> - <p>Error code explanation: %(code)s - %(explain)s.</p> - </body> -</html> -""" - -DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" - -def _quote_html(html): - return html.replace("&", "&").replace("<", "<").replace(">", ">") - -class HTTPServer(socketserver.TCPServer): - - allow_reuse_address = 1 # Seems to make sense in testing environment - - def server_bind(self): - """Override server_bind to store the server name.""" - socketserver.TCPServer.server_bind(self) - host, port = self.socket.getsockname()[:2] - self.server_name = socket.getfqdn(host) - self.server_port = port - - -class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): - - """HTTP request handler base class. - - The following explanation of HTTP serves to guide you through the - code as well as to expose any misunderstandings I may have about - HTTP (so you don't need to read the code to figure out I'm wrong - :-). - - HTTP (HyperText Transfer Protocol) is an extensible protocol on - top of a reliable stream transport (e.g. TCP/IP). The protocol - recognizes three parts to a request: - - 1. One line identifying the request type and path - 2. An optional set of RFC-822-style headers - 3. An optional data part - - The headers and data are separated by a blank line. - - The first line of the request has the form - - <command> <path> <version> - - where <command> is a (case-sensitive) keyword such as GET or POST, - <path> is a string containing path information for the request, - and <version> should be the string "HTTP/1.0" or "HTTP/1.1". - <path> is encoded using the URL encoding scheme (using %xx to signify - the ASCII character with hex code xx). - - The specification specifies that lines are separated by CRLF but - for compatibility with the widest range of clients recommends - servers also handle LF. Similarly, whitespace in the request line - is treated sensibly (allowing multiple spaces between components - and allowing trailing whitespace). - - Similarly, for output, lines ought to be separated by CRLF pairs - but most clients grok LF characters just fine. - - If the first line of the request has the form - - <command> <path> - - (i.e. <version> is left out) then this is assumed to be an HTTP - 0.9 request; this form has no optional headers and data part and - the reply consists of just the data. - - The reply form of the HTTP 1.x protocol again has three parts: - - 1. One line giving the response code - 2. An optional set of RFC-822-style headers - 3. The data - - Again, the headers and data are separated by a blank line. - - The response code line has the form - - <version> <responsecode> <responsestring> - - where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), - <responsecode> is a 3-digit response code indicating success or - failure of the request, and <responsestring> is an optional - human-readable string explaining what the response code means. - - This server parses the request and the headers, and then calls a - function specific to the request type (<command>). Specifically, - a request SPAM will be handled by a method do_SPAM(). If no - such method exists the server sends an error response to the - client. If it exists, it is called with no arguments: - - do_SPAM() - - Note that the request name is case sensitive (i.e. SPAM and spam - are different requests). - - The various request details are stored in instance variables: - - - client_address is the client IP address in the form (host, - port); - - - command, path and version are the broken-down request line; - - - headers is an instance of email.message.Message (or a derived - class) containing the header information; - - - rfile is a file object open for reading positioned at the - start of the optional input data part; - - - wfile is a file object open for writing. - - IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! - - The first thing to be written must be the response line. Then - follow 0 or more header lines, then a blank line, and then the - actual data (if any). The meaning of the header lines depends on - the command executed by the server; in most cases, when data is - returned, there should be at least one header line of the form - - Content-type: <type>/<subtype> - - where <type> and <subtype> should be registered MIME types, - e.g. "text/html" or "text/plain". - - """ - - # The Python system version, truncated to its first component. - sys_version = "Python/" + sys.version.split()[0] - - # The server software version. You may want to override this. - # The format is multiple whitespace-separated strings, - # where each string is of the form name[/version]. - server_version = "BaseHTTP/" + __version__ - - error_message_format = DEFAULT_ERROR_MESSAGE - error_content_type = DEFAULT_ERROR_CONTENT_TYPE - - # The default request version. This only affects responses up until - # the point where the request line is parsed, so it mainly decides what - # the client gets back when sending a malformed request line. - # Most web servers default to HTTP 0.9, i.e. don't send a status line. - default_request_version = "HTTP/0.9" - - def parse_request(self): - """Parse a request (internal). - - The request should be stored in self.raw_requestline; the results - are in self.command, self.path, self.request_version and - self.headers. - - Return True for success, False for failure; on failure, an - error is sent back. - - """ - self.command = None # set in case of error on the first line - self.request_version = version = self.default_request_version - self.close_connection = 1 - requestline = str(self.raw_requestline, 'iso-8859-1') - requestline = requestline.rstrip('\r\n') - self.requestline = requestline - words = requestline.split() - if len(words) == 3: - command, path, version = words - if version[:5] != 'HTTP/': - self.send_error(400, "Bad request version (%r)" % version) - return False - try: - base_version_number = version.split('/', 1)[1] - version_number = base_version_number.split(".") - # RFC 2145 section 3.1 says there can be only one "." and - # - major and minor numbers MUST be treated as - # separate integers; - # - HTTP/2.4 is a lower version than HTTP/2.13, which in - # turn is lower than HTTP/12.3; - # - Leading zeros MUST be ignored by recipients. - if len(version_number) != 2: - raise ValueError - version_number = int(version_number[0]), int(version_number[1]) - except (ValueError, IndexError): - self.send_error(400, "Bad request version (%r)" % version) - return False - if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": - self.close_connection = 0 - if version_number >= (2, 0): - self.send_error(505, - "Invalid HTTP Version (%s)" % base_version_number) - return False - elif len(words) == 2: - command, path = words - self.close_connection = 1 - if command != 'GET': - self.send_error(400, - "Bad HTTP/0.9 request type (%r)" % command) - return False - elif not words: - return False - else: - self.send_error(400, "Bad request syntax (%r)" % requestline) - return False - self.command, self.path, self.request_version = command, path, version - - # Examine the headers and look for a Connection directive. - try: - self.headers = http_client.parse_headers(self.rfile, - _class=self.MessageClass) - except http_client.LineTooLong: - self.send_error(400, "Line too long") - return False - - conntype = self.headers.get('Connection', "") - if conntype.lower() == 'close': - self.close_connection = 1 - elif (conntype.lower() == 'keep-alive' and - self.protocol_version >= "HTTP/1.1"): - self.close_connection = 0 - # Examine the headers and look for an Expect directive - expect = self.headers.get('Expect', "") - if (expect.lower() == "100-continue" and - self.protocol_version >= "HTTP/1.1" and - self.request_version >= "HTTP/1.1"): - if not self.handle_expect_100(): - return False - return True - - def handle_expect_100(self): - """Decide what to do with an "Expect: 100-continue" header. - - If the client is expecting a 100 Continue response, we must - respond with either a 100 Continue or a final response before - waiting for the request body. The default is to always respond - with a 100 Continue. You can behave differently (for example, - reject unauthorized requests) by overriding this method. - - This method should either return True (possibly after sending - a 100 Continue response) or send an error response and return - False. - - """ - self.send_response_only(100) - self.flush_headers() - return True - - def handle_one_request(self): - """Handle a single HTTP request. - - You normally don't need to override this method; see the class - __doc__ string for information on how to handle specific HTTP - commands such as GET and POST. - - """ - try: - self.raw_requestline = self.rfile.readline(65537) - if len(self.raw_requestline) > 65536: - self.requestline = '' - self.request_version = '' - self.command = '' - self.send_error(414) - return - if not self.raw_requestline: - self.close_connection = 1 - return - if not self.parse_request(): - # An error code has been sent, just exit - return - mname = 'do_' + self.command - if not hasattr(self, mname): - self.send_error(501, "Unsupported method (%r)" % self.command) - return - method = getattr(self, mname) - method() - self.wfile.flush() #actually send the response if not already done. - except socket.timeout as e: - #a read or a write timed out. Discard this connection - self.log_error("Request timed out: %r", e) - self.close_connection = 1 - return - - def handle(self): - """Handle multiple requests if necessary.""" - self.close_connection = 1 - - self.handle_one_request() - while not self.close_connection: - self.handle_one_request() - - def send_error(self, code, message=None): - """Send and log an error reply. - - Arguments are the error code, and a detailed message. - The detailed message defaults to the short entry matching the - response code. - - This sends an error response (so it must be called before any - output has been generated), logs the error, and finally sends - a piece of HTML explaining the error to the user. - - """ - - try: - shortmsg, longmsg = self.responses[code] - except KeyError: - shortmsg, longmsg = '???', '???' - if message is None: - message = shortmsg - explain = longmsg - self.log_error("code %d, message %s", code, message) - # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) - content = (self.error_message_format % - {'code': code, 'message': _quote_html(message), 'explain': explain}) - self.send_response(code, message) - self.send_header("Content-Type", self.error_content_type) - self.send_header('Connection', 'close') - self.end_headers() - if self.command != 'HEAD' and code >= 200 and code not in (204, 304): - self.wfile.write(content.encode('UTF-8', 'replace')) - - def send_response(self, code, message=None): - """Add the response header to the headers buffer and log the - response code. - - Also send two standard headers with the server software - version and the current date. - - """ - self.log_request(code) - self.send_response_only(code, message) - self.send_header('Server', self.version_string()) - self.send_header('Date', self.date_time_string()) - - def send_response_only(self, code, message=None): - """Send the response header only.""" - if message is None: - if code in self.responses: - message = self.responses[code][0] - else: - message = '' - if self.request_version != 'HTTP/0.9': - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode( - 'latin-1', 'strict')) - - def send_header(self, keyword, value): - """Send a MIME header to the headers buffer.""" - if self.request_version != 'HTTP/0.9': - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append( - ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) - - if keyword.lower() == 'connection': - if value.lower() == 'close': - self.close_connection = 1 - elif value.lower() == 'keep-alive': - self.close_connection = 0 - - def end_headers(self): - """Send the blank line ending the MIME headers.""" - if self.request_version != 'HTTP/0.9': - self._headers_buffer.append(b"\r\n") - self.flush_headers() - - def flush_headers(self): - if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) - self._headers_buffer = [] - - def log_request(self, code='-', size='-'): - """Log an accepted request. - - This is called by send_response(). - - """ - - self.log_message('"%s" %s %s', - self.requestline, str(code), str(size)) - - def log_error(self, format, *args): - """Log an error. - - This is called when a request cannot be fulfilled. By - default it passes the message on to log_message(). - - Arguments are the same as for log_message(). - - XXX This should go to the separate error log. - - """ - - self.log_message(format, *args) - - def log_message(self, format, *args): - """Log an arbitrary message. - - This is used by all other logging functions. Override - it if you have specific logging wishes. - - The first argument, FORMAT, is a format string for the - message to be logged. If the format string contains - any % escapes requiring parameters, they should be - specified as subsequent arguments (it's just like - printf!). - - The client ip and current date/time are prefixed to - every message. - - """ - - sys.stderr.write("%s - - [%s] %s\n" % - (self.address_string(), - self.log_date_time_string(), - format%args)) - - def version_string(self): - """Return the server software version string.""" - return self.server_version + ' ' + self.sys_version - - def date_time_string(self, timestamp=None): - """Return the current date and time formatted for a message header.""" - if timestamp is None: - timestamp = time.time() - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) - s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - self.weekdayname[wd], - day, self.monthname[month], year, - hh, mm, ss) - return s - - def log_date_time_string(self): - """Return the current time formatted for logging.""" - now = time.time() - year, month, day, hh, mm, ss, x, y, z = time.localtime(now) - s = "%02d/%3s/%04d %02d:%02d:%02d" % ( - day, self.monthname[month], year, hh, mm, ss) - return s - - weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - - monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - - def address_string(self): - """Return the client address.""" - - return self.client_address[0] - - # Essentially static class variables - - # The version of the HTTP protocol we support. - # Set this to HTTP/1.1 to enable automatic keepalive - protocol_version = "HTTP/1.0" - - # MessageClass used to parse headers - MessageClass = http_client.HTTPMessage - - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. - # See RFC 2616 and 6585. - responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this resource.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - 428: ('Precondition Required', - 'The origin server requires the request to be conditional.'), - 429: ('Too Many Requests', 'The user has sent too many requests ' - 'in a given amount of time ("rate limiting").'), - 431: ('Request Header Fields Too Large', 'The server is unwilling to ' - 'process the request because its header fields are too large.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), - 511: ('Network Authentication Required', - 'The client needs to authenticate to gain network access.'), - } - - -class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): - - """Simple HTTP request handler with GET and HEAD commands. - - This serves files from the current directory and any of its - subdirectories. The MIME type for files is determined by - calling the .guess_type() method. - - The GET and HEAD requests are identical except that the HEAD - request omits the actual contents of the file. - - """ - - server_version = "SimpleHTTP/" + __version__ - - def do_GET(self): - """Serve a GET request.""" - f = self.send_head() - if f: - self.copyfile(f, self.wfile) - f.close() - - def do_HEAD(self): - """Serve a HEAD request.""" - f = self.send_head() - if f: - f.close() - - def send_head(self): - """Common code for GET and HEAD commands. - - This sends the response code and MIME headers. - - Return value is either a file object (which has to be copied - to the outputfile by the caller unless the command was HEAD, - and must be closed by the caller under all circumstances), or - None, in which case the caller has nothing further to do. - - """ - path = self.translate_path(self.path) - f = None - if os.path.isdir(path): - if not self.path.endswith('/'): - # redirect browser - doing basically what apache does - self.send_response(301) - self.send_header("Location", self.path + "/") - self.end_headers() - return None - for index in "index.html", "index.htm": - index = os.path.join(path, index) - if os.path.exists(index): - path = index - break - else: - return self.list_directory(path) - ctype = self.guess_type(path) - try: - f = open(path, 'rb') - except IOError: - self.send_error(404, "File not found") - return None - self.send_response(200) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - - def list_directory(self, path): - """Helper to produce a directory listing (absent index.html). - - Return value is either a file object, or None (indicating an - error). In either case, the headers are sent, making the - interface the same as for send_head(). - - """ - try: - list = os.listdir(path) - except os.error: - self.send_error(404, "No permission to list directory") - return None - list.sort(key=lambda a: a.lower()) - r = [] - displaypath = html.escape(urllib_parse.unquote(self.path)) - enc = sys.getfilesystemencoding() - title = 'Directory listing for %s' % displaypath - r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' - '"http://www.w3.org/TR/html4/strict.dtd">') - r.append('<html>\n<head>') - r.append('<meta http-equiv="Content-Type" ' - 'content="text/html; charset=%s">' % enc) - r.append('<title>%s</title>\n</head>' % title) - r.append('<body>\n<h1>%s</h1>' % title) - r.append('<hr>\n<ul>') - for name in list: - fullname = os.path.join(path, name) - displayname = linkname = name - # Append / for directories or @ for symbolic links - if os.path.isdir(fullname): - displayname = name + "/" - linkname = name + "/" - if os.path.islink(fullname): - displayname = name + "@" - # Note: a link to a directory displays with @ and links with / - r.append('<li><a href="%s">%s</a></li>' - % (urllib_parse.quote(linkname), html.escape(displayname))) - # # Use this instead: - # r.append('<li><a href="%s">%s</a></li>' - # % (urllib.quote(linkname), cgi.escape(displayname))) - r.append('</ul>\n<hr>\n</body>\n</html>\n') - encoded = '\n'.join(r).encode(enc) - f = io.BytesIO() - f.write(encoded) - f.seek(0) - self.send_response(200) - self.send_header("Content-type", "text/html; charset=%s" % enc) - self.send_header("Content-Length", str(len(encoded))) - self.end_headers() - return f - - def translate_path(self, path): - """Translate a /-separated PATH to the local filename syntax. - - Components that mean special things to the local file system - (e.g. drive or directory names) are ignored. (XXX They should - probably be diagnosed.) - - """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] - path = posixpath.normpath(urllib_parse.unquote(path)) - words = path.split('/') - words = filter(None, words) - path = os.getcwd() - for word in words: - drive, word = os.path.splitdrive(word) - head, word = os.path.split(word) - if word in (os.curdir, os.pardir): continue - path = os.path.join(path, word) - return path - - def copyfile(self, source, outputfile): - """Copy all data between two file objects. - - The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). - - The only reason for overriding this would be to change - the block size or perhaps to replace newlines by CRLF - -- note however that this the default server uses this - to copy binary data as well. - - """ - shutil.copyfileobj(source, outputfile) - - def guess_type(self, path): - """Guess the type of a file. - - Argument is a PATH (a filename). - - Return value is a string of the form type/subtype, - usable for a MIME Content-type header. - - The default implementation looks the file's extension - up in the table self.extensions_map, using application/octet-stream - as a default; however it would be permissible (if - slow) to look inside the data to make a better guess. - - """ - - base, ext = posixpath.splitext(path) - if ext in self.extensions_map: - return self.extensions_map[ext] - ext = ext.lower() - if ext in self.extensions_map: - return self.extensions_map[ext] - else: - return self.extensions_map[''] - - if not mimetypes.inited: - mimetypes.init() # try to read system mime.types - extensions_map = mimetypes.types_map.copy() - extensions_map.update({ - '': 'application/octet-stream', # Default - '.py': 'text/plain', - '.c': 'text/plain', - '.h': 'text/plain', - }) - - -# Utilities for CGIHTTPRequestHandler - -def _url_collapse_path(path): - """ - Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references and returns a colllapsed path. - - Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. - The utility of this function is limited to is_cgi method and helps - preventing some security attacks. - - Returns: A tuple of (head, tail) where tail is everything after the final / - and head is everything before it. Head will always start with a '/' and, - if it contains anything else, never have a trailing '/'. - - Raises: IndexError if too many '..' occur within the path. - - """ - # Similar to os.path.split(os.path.normpath(path)) but specific to URL - # path semantics rather than local operating system semantics. - path_parts = path.split('/') - head_parts = [] - for part in path_parts[:-1]: - if part == '..': - head_parts.pop() # IndexError if more '..' than prior parts - elif part and part != '.': - head_parts.append( part ) - if path_parts: - tail_part = path_parts.pop() - if tail_part: - if tail_part == '..': - head_parts.pop() - tail_part = '' - elif tail_part == '.': - tail_part = '' - else: - tail_part = '' - - splitpath = ('/' + '/'.join(head_parts), tail_part) - collapsed_path = "/".join(splitpath) - - return collapsed_path - - - -nobody = None - -def nobody_uid(): - """Internal routine to get nobody's uid""" - global nobody - if nobody: - return nobody - try: - import pwd - except ImportError: - return -1 - try: - nobody = pwd.getpwnam('nobody')[2] - except KeyError: - nobody = 1 + max(x[2] for x in pwd.getpwall()) - return nobody - - -def executable(path): - """Test for executable file.""" - return os.access(path, os.X_OK) - - -class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): - - """Complete HTTP server with GET, HEAD and POST commands. - - GET and HEAD also support running CGI scripts. - - The POST command is *only* implemented for CGI scripts. - - """ - - # Determine platform specifics - have_fork = hasattr(os, 'fork') - - # Make rfile unbuffered -- we need to read one line and then pass - # the rest to a subprocess, so we can't use buffered input. - rbufsize = 0 - - def do_POST(self): - """Serve a POST request. - - This is only implemented for CGI scripts. - - """ - - if self.is_cgi(): - self.run_cgi() - else: - self.send_error(501, "Can only POST to CGI scripts") - - def send_head(self): - """Version of send_head that support CGI scripts""" - if self.is_cgi(): - return self.run_cgi() - else: - return SimpleHTTPRequestHandler.send_head(self) - - def is_cgi(self): - """Test whether self.path corresponds to a CGI script. - - Returns True and updates the cgi_info attribute to the tuple - (dir, rest) if self.path requires running a CGI script. - Returns False otherwise. - - If any exception is raised, the caller should assume that - self.path was rejected as invalid and act accordingly. - - The default implementation tests whether the normalized url - path begins with one of the strings in self.cgi_directories - (and the next character is a '/' or the end of the string). - - """ - collapsed_path = _url_collapse_path(self.path) - dir_sep = collapsed_path.find('/', 1) - head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] - if head in self.cgi_directories: - self.cgi_info = head, tail - return True - return False - - - cgi_directories = ['/cgi-bin', '/htbin'] - - def is_executable(self, path): - """Test whether argument path is an executable file.""" - return executable(path) - - def is_python(self, path): - """Test whether argument path is a Python script.""" - head, tail = os.path.splitext(path) - return tail.lower() in (".py", ".pyw") - - def run_cgi(self): - """Execute a CGI script.""" - path = self.path - dir, rest = self.cgi_info - - i = path.find('/', len(dir) + 1) - while i >= 0: - nextdir = path[:i] - nextrest = path[i+1:] - - scriptdir = self.translate_path(nextdir) - if os.path.isdir(scriptdir): - dir, rest = nextdir, nextrest - i = path.find('/', len(dir) + 1) - else: - break - - # find an explicit query string, if present. - i = rest.rfind('?') - if i >= 0: - rest, query = rest[:i], rest[i+1:] - else: - query = '' - - # dissect the part after the directory name into a script name & - # a possible additional path, to be stored in PATH_INFO. - i = rest.find('/') - if i >= 0: - script, rest = rest[:i], rest[i:] - else: - script, rest = rest, '' - - scriptname = dir + '/' + script - scriptfile = self.translate_path(scriptname) - if not os.path.exists(scriptfile): - self.send_error(404, "No such CGI script (%r)" % scriptname) - return - if not os.path.isfile(scriptfile): - self.send_error(403, "CGI script is not a plain file (%r)" % - scriptname) - return - ispy = self.is_python(scriptname) - if self.have_fork or not ispy: - if not self.is_executable(scriptfile): - self.send_error(403, "CGI script is not executable (%r)" % - scriptname) - return - - # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html - # XXX Much of the following could be prepared ahead of time! - env = copy.deepcopy(os.environ) - env['SERVER_SOFTWARE'] = self.version_string() - env['SERVER_NAME'] = self.server.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PROTOCOL'] = self.protocol_version - env['SERVER_PORT'] = str(self.server.server_port) - env['REQUEST_METHOD'] = self.command - uqrest = urllib_parse.unquote(rest) - env['PATH_INFO'] = uqrest - env['PATH_TRANSLATED'] = self.translate_path(uqrest) - env['SCRIPT_NAME'] = scriptname - if query: - env['QUERY_STRING'] = query - env['REMOTE_ADDR'] = self.client_address[0] - authorization = self.headers.get("authorization") - if authorization: - authorization = authorization.split() - if len(authorization) == 2: - import base64, binascii - env['AUTH_TYPE'] = authorization[0] - if authorization[0].lower() == "basic": - try: - authorization = authorization[1].encode('ascii') - if utils.PY3: - # In Py3.3, was: - authorization = base64.decodebytes(authorization).\ - decode('ascii') - else: - # Backport to Py2.7: - authorization = base64.decodestring(authorization).\ - decode('ascii') - except (binascii.Error, UnicodeError): - pass - else: - authorization = authorization.split(':') - if len(authorization) == 2: - env['REMOTE_USER'] = authorization[0] - # XXX REMOTE_IDENT - if self.headers.get('content-type') is None: - env['CONTENT_TYPE'] = self.headers.get_content_type() - else: - env['CONTENT_TYPE'] = self.headers['content-type'] - length = self.headers.get('content-length') - if length: - env['CONTENT_LENGTH'] = length - referer = self.headers.get('referer') - if referer: - env['HTTP_REFERER'] = referer - accept = [] - for line in self.headers.getallmatchingheaders('accept'): - if line[:1] in "\t\n\r ": - accept.append(line.strip()) - else: - accept = accept + line[7:].split(',') - env['HTTP_ACCEPT'] = ','.join(accept) - ua = self.headers.get('user-agent') - if ua: - env['HTTP_USER_AGENT'] = ua - co = filter(None, self.headers.get_all('cookie', [])) - cookie_str = ', '.join(co) - if cookie_str: - env['HTTP_COOKIE'] = cookie_str - # XXX Other HTTP_* headers - # Since we're setting the env in the parent, provide empty - # values to override previously set values - for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', - 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): - env.setdefault(k, "") - - self.send_response(200, "Script output follows") - self.flush_headers() - - decoded_query = query.replace('+', ' ') - - if self.have_fork: - # Unix -- fork as we should - args = [script] - if '=' not in decoded_query: - args.append(decoded_query) - nobody = nobody_uid() - self.wfile.flush() # Always flush before forking - pid = os.fork() - if pid != 0: - # Parent - pid, sts = os.waitpid(pid, 0) - # throw away additional data [see bug #427345] - while select.select([self.rfile], [], [], 0)[0]: - if not self.rfile.read(1): - break - if sts: - self.log_error("CGI script exit status %#x", sts) - return - # Child - try: - try: - os.setuid(nobody) - except os.error: - pass - os.dup2(self.rfile.fileno(), 0) - os.dup2(self.wfile.fileno(), 1) - os.execve(scriptfile, args, env) - except: - self.server.handle_error(self.request, self.client_address) - os._exit(127) - - else: - # Non-Unix -- use subprocess - import subprocess - cmdline = [scriptfile] - if self.is_python(scriptfile): - interp = sys.executable - if interp.lower().endswith("w.exe"): - # On Windows, use python.exe, not pythonw.exe - interp = interp[:-5] + interp[-4:] - cmdline = [interp, '-u'] + cmdline - if '=' not in query: - cmdline.append(query) - self.log_message("command: %s", subprocess.list2cmdline(cmdline)) - try: - nbytes = int(length) - except (TypeError, ValueError): - nbytes = 0 - p = subprocess.Popen(cmdline, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env = env - ) - if self.command.lower() == "post" and nbytes > 0: - data = self.rfile.read(nbytes) - else: - data = None - # throw away additional data [see bug #427345] - while select.select([self.rfile._sock], [], [], 0)[0]: - if not self.rfile._sock.recv(1): - break - stdout, stderr = p.communicate(data) - self.wfile.write(stdout) - if stderr: - self.log_error('%s', stderr) - p.stderr.close() - p.stdout.close() - status = p.returncode - if status: - self.log_error("CGI script exit status %#x", status) - else: - self.log_message("CGI script exited OK") - - -def test(HandlerClass = BaseHTTPRequestHandler, - ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000): - """Test the HTTP request handler class. - - This runs an HTTP server on port 8000 (or the first command line - argument). - - """ - server_address = ('', port) - - HandlerClass.protocol_version = protocol - httpd = ServerClass(server_address, HandlerClass) - - sa = httpd.socket.getsockname() - print("Serving HTTP on", sa[0], "port", sa[1], "...") - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\nKeyboard interrupt received, exiting.") - httpd.server_close() - sys.exit(0) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--cgi', action='store_true', - help='Run as CGI Server') - parser.add_argument('port', action='store', - default=8000, type=int, - nargs='?', - help='Specify alternate port [default: 8000]') - args = parser.parse_args() - if args.cgi: - test(HandlerClass=CGIHTTPRequestHandler, port=args.port) - else: - test(HandlerClass=SimpleHTTPRequestHandler, port=args.port) diff --git a/contrib/python/future/future/backports/misc.py b/contrib/python/future/future/backports/misc.py deleted file mode 100644 index 098a0667e86..00000000000 --- a/contrib/python/future/future/backports/misc.py +++ /dev/null @@ -1,944 +0,0 @@ -""" -Miscellaneous function (re)definitions from the Py3.4+ standard library -for Python 2.6/2.7. - -- math.ceil (for Python 2.7) -- collections.OrderedDict (for Python 2.6) -- collections.Counter (for Python 2.6) -- collections.ChainMap (for all versions prior to Python 3.3) -- itertools.count (for Python 2.6, with step parameter) -- subprocess.check_output (for Python 2.6) -- reprlib.recursive_repr (for Python 2.6+) -- functools.cmp_to_key (for Python 2.6) -""" - -from __future__ import absolute_import - -import subprocess -from math import ceil as oldceil - -from operator import itemgetter as _itemgetter, eq as _eq -import sys -import heapq as _heapq -from _weakref import proxy as _proxy -from itertools import repeat as _repeat, chain as _chain, starmap as _starmap -from socket import getaddrinfo, SOCK_STREAM, error, socket - -from future.utils import iteritems, itervalues, PY2, PY26, PY3 - -if PY2: - from collections import Mapping, MutableMapping -else: - from collections.abc import Mapping, MutableMapping - - -def ceil(x): - """ - Return the ceiling of x as an int. - This is the smallest integral value >= x. - """ - return int(oldceil(x)) - - -######################################################################## -### reprlib.recursive_repr decorator from Py3.4 -######################################################################## - -from itertools import islice - -if PY3: - try: - from _thread import get_ident - except ImportError: - from _dummy_thread import get_ident -else: - try: - from thread import get_ident - except ImportError: - from dummy_thread import get_ident - - -def recursive_repr(fillvalue='...'): - 'Decorator to make a repr function return fillvalue for a recursive call' - - def decorating_function(user_function): - repr_running = set() - - def wrapper(self): - key = id(self), get_ident() - if key in repr_running: - return fillvalue - repr_running.add(key) - try: - result = user_function(self) - finally: - repr_running.discard(key) - return result - - # Can't use functools.wraps() here because of bootstrap issues - wrapper.__module__ = getattr(user_function, '__module__') - wrapper.__doc__ = getattr(user_function, '__doc__') - wrapper.__name__ = getattr(user_function, '__name__') - wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) - return wrapper - - return decorating_function - - -################################################################################ -### OrderedDict -################################################################################ - -class _Link(object): - __slots__ = 'prev', 'next', 'key', '__weakref__' - -class OrderedDict(dict): - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as regular dictionaries. - - # The internal self.__map dict maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # The sentinel is in self.__hardroot with a weakref proxy in self.__root. - # The prev links are weakref proxies (to prevent circular references). - # Individual links are kept alive by the hard reference in self.__map. - # Those hard references disappear when a key is deleted from an OrderedDict. - - def __init__(*args, **kwds): - '''Initialize an ordered dictionary. The signature is the same as - regular dictionaries, but keyword arguments are not recommended because - their insertion order is arbitrary. - - ''' - if not args: - raise TypeError("descriptor '__init__' of 'OrderedDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__hardroot = _Link() - self.__root = root = _proxy(self.__hardroot) - root.prev = root.next = root - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, - dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link at the end of the linked list, - # and the inherited dictionary is updated with the new key/value pair. - if key not in self: - self.__map[key] = link = Link() - root = self.__root - last = root.prev - link.prev, link.next, link.key = last, root, key - last.next = link - root.prev = proxy(link) - dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which gets - # removed by updating the links in the predecessor and successor nodes. - dict_delitem(self, key) - link = self.__map.pop(key) - link_prev = link.prev - link_next = link.next - link_prev.next = link_next - link_next.prev = link_prev - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - # Traverse the linked list in order. - root = self.__root - curr = root.next - while curr is not root: - yield curr.key - curr = curr.next - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - # Traverse the linked list in reverse order. - root = self.__root - curr = root.prev - while curr is not root: - yield curr.key - curr = curr.prev - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - root = self.__root - root.prev = root.next = root - self.__map.clear() - dict.clear(self) - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - root = self.__root - if last: - link = root.prev - link_prev = link.prev - link_prev.next = root - root.prev = link_prev - else: - link = root.next - link_next = link.next - root.next = link_next - link_next.prev = root - key = link.key - del self.__map[key] - value = dict.pop(self, key) - return key, value - - def move_to_end(self, key, last=True): - '''Move an existing element to the end (or beginning if last==False). - - Raises KeyError if the element does not exist. - When last=True, acts like a fast version of self[key]=self.pop(key). - - ''' - link = self.__map[key] - link_prev = link.prev - link_next = link.next - link_prev.next = link_next - link_next.prev = link_prev - root = self.__root - if last: - last = root.prev - link.prev = last - link.next = root - last.next = root.prev = link - else: - first = root.next - link.prev = root - link.next = first - root.next = first.prev = link - - def __sizeof__(self): - sizeof = sys.getsizeof - n = len(self) + 1 # number of links including root - size = sizeof(self.__dict__) # instance dictionary - size += sizeof(self.__map) * 2 # internal dict and inherited dict - size += sizeof(self.__hardroot) * n # link objects - size += sizeof(self.__root) * n # proxy objects - return size - - update = __update = MutableMapping.update - keys = MutableMapping.keys - values = MutableMapping.values - items = MutableMapping.items - __ne__ = MutableMapping.__ne__ - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding - value. If key is not found, d is returned if given, otherwise KeyError - is raised. - - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - @recursive_repr() - def __repr__(self): - 'od.__repr__() <==> repr(od)' - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, list(self.items())) - - def __reduce__(self): - 'Return state information for pickling' - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - return self.__class__, (), inst_dict or None, None, iter(self.items()) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. - If not specified, the value defaults to None. - - ''' - self = cls() - for key in iterable: - self[key] = value - return self - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - if isinstance(other, OrderedDict): - return dict.__eq__(self, other) and all(map(_eq, self, other)) - return dict.__eq__(self, other) - - -# {{{ http://code.activestate.com/recipes/576611/ (r11) - -try: - from operator import itemgetter - from heapq import nlargest -except ImportError: - pass - -######################################################################## -### Counter -######################################################################## - -def _count_elements(mapping, iterable): - 'Tally elements from the iterable.' - mapping_get = mapping.get - for elem in iterable: - mapping[elem] = mapping_get(elem, 0) + 1 - -class Counter(dict): - '''Dict subclass for counting hashable items. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> c = Counter('abcdeabcdabcaba') # count elements from a string - - >>> c.most_common(3) # three most common elements - [('a', 5), ('b', 4), ('c', 3)] - >>> sorted(c) # list all unique elements - ['a', 'b', 'c', 'd', 'e'] - >>> ''.join(sorted(c.elements())) # list elements with repetitions - 'aaaaabbbbcccdde' - >>> sum(c.values()) # total of all counts - 15 - - >>> c['a'] # count of letter 'a' - 5 - >>> for elem in 'shazam': # update counts from an iterable - ... c[elem] += 1 # by adding 1 to each element's count - >>> c['a'] # now there are seven 'a' - 7 - >>> del c['b'] # remove all 'b' - >>> c['b'] # now there are zero 'b' - 0 - - >>> d = Counter('simsalabim') # make another counter - >>> c.update(d) # add in the second counter - >>> c['a'] # now there are nine 'a' - 9 - - >>> c.clear() # empty the counter - >>> c - Counter() - - Note: If a count is set to zero or reduced to zero, it will remain - in the counter until the entry is deleted or the counter is cleared: - - >>> c = Counter('aaabbc') - >>> c['b'] -= 2 # reduce the count of 'b' by two - >>> c.most_common() # 'b' is still in, but its count is zero - [('a', 3), ('c', 1), ('b', 0)] - - ''' - # References: - # http://en.wikipedia.org/wiki/Multiset - # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html - # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm - # http://code.activestate.com/recipes/259174/ - # Knuth, TAOCP Vol. II section 4.6.3 - - def __init__(*args, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - if not args: - raise TypeError("descriptor '__init__' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - super(Counter, self).__init__() - self.update(*args, **kwds) - - def __missing__(self, key): - 'The count of elements not in the Counter is zero.' - # Needed so that self[missing_item] does not raise KeyError - return 0 - - def most_common(self, n=None): - '''List the n most common elements and their counts from the most - common to the least. If n is None, then list all element counts. - - >>> Counter('abcdeabcdabcaba').most_common(3) - [('a', 5), ('b', 4), ('c', 3)] - - ''' - # Emulate Bag.sortedByCount from Smalltalk - if n is None: - return sorted(self.items(), key=_itemgetter(1), reverse=True) - return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) - - def elements(self): - '''Iterator over elements repeating each as many times as its count. - - >>> c = Counter('ABCABC') - >>> sorted(c.elements()) - ['A', 'A', 'B', 'B', 'C', 'C'] - - # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 - >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) - >>> product = 1 - >>> for factor in prime_factors.elements(): # loop over factors - ... product *= factor # and multiply them - >>> product - 1836 - - Note, if an element's count has been set to zero or is a negative - number, elements() will ignore it. - - ''' - # Emulate Bag.do from Smalltalk and Multiset.begin from C++. - return _chain.from_iterable(_starmap(_repeat, self.items())) - - # Override dict methods where necessary - - @classmethod - def fromkeys(cls, iterable, v=None): - # There is no equivalent method for counters because setting v=1 - # means that no element can have a count greater than one. - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(*args, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts - # being mixed-in with all of the other counts for a mismash that - # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we implement straight-addition. Both the inputs - # and outputs are allowed to contain zero and negative counts. - - if not args: - raise TypeError("descriptor 'update' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - iterable = args[0] if args else None - if iterable is not None: - if isinstance(iterable, Mapping): - if self: - self_get = self.get - for elem, count in iterable.items(): - self[elem] = count + self_get(elem, 0) - else: - super(Counter, self).update(iterable) # fast path when counter is empty - else: - _count_elements(self, iterable) - if kwds: - self.update(kwds) - - def subtract(*args, **kwds): - '''Like dict.update() but subtracts counts instead of replacing them. - Counts can be reduced below zero. Both the inputs and outputs are - allowed to contain zero and negative counts. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.subtract('witch') # subtract elements from another iterable - >>> c.subtract(Counter('watch')) # subtract elements from another counter - >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch - 0 - >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch - -1 - - ''' - if not args: - raise TypeError("descriptor 'subtract' of 'Counter' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - iterable = args[0] if args else None - if iterable is not None: - self_get = self.get - if isinstance(iterable, Mapping): - for elem, count in iterable.items(): - self[elem] = self_get(elem, 0) - count - else: - for elem in iterable: - self[elem] = self_get(elem, 0) - 1 - if kwds: - self.subtract(kwds) - - def copy(self): - 'Return a shallow copy.' - return self.__class__(self) - - def __reduce__(self): - return self.__class__, (dict(self),) - - def __delitem__(self, elem): - 'Like dict.__delitem__() but does not raise KeyError for missing values.' - if elem in self: - super(Counter, self).__delitem__(elem) - - def __repr__(self): - if not self: - return '%s()' % self.__class__.__name__ - try: - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) - except TypeError: - # handle case where values are not orderable - return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) - - # Multiset-style mathematical operations discussed in: - # Knuth TAOCP Volume II section 4.6.3 exercise 19 - # and at http://en.wikipedia.org/wiki/Multiset - # - # Outputs guaranteed to only include positive counts. - # - # To strip negative and zero counts, add-in an empty counter: - # c += Counter() - - def __add__(self, other): - '''Add counts from two counters. - - >>> Counter('abbb') + Counter('bcc') - Counter({'b': 4, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count + other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. - - >>> Counter('abbbc') - Counter('bccd') - Counter({'b': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count - other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count < 0: - result[elem] = 0 - count - return result - - def __or__(self, other): - '''Union is the maximum of value in either of the input counters. - - >>> Counter('abbb') | Counter('bcc') - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = other_count if count < other_count else count - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. - - >>> Counter('abbb') & Counter('bcc') - Counter({'b': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = count if count < other_count else other_count - if newcount > 0: - result[elem] = newcount - return result - - def __pos__(self): - 'Adds an empty counter, effectively stripping negative and zero counts' - return self + Counter() - - def __neg__(self): - '''Subtracts from an empty counter. Strips positive and zero counts, - and flips the sign on negative counts. - - ''' - return Counter() - self - - def _keep_positive(self): - '''Internal method to strip elements with a negative or zero count''' - nonpositive = [elem for elem, count in self.items() if not count > 0] - for elem in nonpositive: - del self[elem] - return self - - def __iadd__(self, other): - '''Inplace add from another counter, keeping only positive counts. - - >>> c = Counter('abbb') - >>> c += Counter('bcc') - >>> c - Counter({'b': 4, 'c': 2, 'a': 1}) - - ''' - for elem, count in other.items(): - self[elem] += count - return self._keep_positive() - - def __isub__(self, other): - '''Inplace subtract counter, but keep only results with positive counts. - - >>> c = Counter('abbbc') - >>> c -= Counter('bccd') - >>> c - Counter({'b': 2, 'a': 1}) - - ''' - for elem, count in other.items(): - self[elem] -= count - return self._keep_positive() - - def __ior__(self, other): - '''Inplace union is the maximum of value from either counter. - - >>> c = Counter('abbb') - >>> c |= Counter('bcc') - >>> c - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - for elem, other_count in other.items(): - count = self[elem] - if other_count > count: - self[elem] = other_count - return self._keep_positive() - - def __iand__(self, other): - '''Inplace intersection is the minimum of corresponding counts. - - >>> c = Counter('abbb') - >>> c &= Counter('bcc') - >>> c - Counter({'b': 1}) - - ''' - for elem, count in self.items(): - other_count = other[elem] - if other_count < count: - self[elem] = other_count - return self._keep_positive() - - -def check_output(*popenargs, **kwargs): - """ - For Python 2.6 compatibility: see - http://stackoverflow.com/questions/4814970/ - """ - - if 'stdout' in kwargs: - raise ValueError('stdout argument not allowed, it will be overridden.') - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, unused_err = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise subprocess.CalledProcessError(retcode, cmd) - return output - - -def count(start=0, step=1): - """ - ``itertools.count`` in Py 2.6 doesn't accept a step - parameter. This is an enhanced version of ``itertools.count`` - for Py2.6 equivalent to ``itertools.count`` in Python 2.7+. - """ - while True: - yield start - start += step - - -######################################################################## -### ChainMap (helper for configparser and string.Template) -### From the Py3.4 source code. See also: -### https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py -######################################################################## - -class ChainMap(MutableMapping): - ''' A ChainMap groups multiple dicts (or other mappings) together - to create a single, updateable view. - - The underlying mappings are stored in a list. That list is public and can - accessed or updated using the *maps* attribute. There is no other state. - - Lookups search the underlying mappings successively until a key is found. - In contrast, writes, updates, and deletions only operate on the first - mapping. - - ''' - - def __init__(self, *maps): - '''Initialize a ChainMap by setting *maps* to the given mappings. - If no mappings are provided, a single empty dictionary is used. - - ''' - self.maps = list(maps) or [{}] # always at least one map - - def __missing__(self, key): - raise KeyError(key) - - def __getitem__(self, key): - for mapping in self.maps: - try: - return mapping[key] # can't use 'key in mapping' with defaultdict - except KeyError: - pass - return self.__missing__(key) # support subclasses that define __missing__ - - def get(self, key, default=None): - return self[key] if key in self else default - - def __len__(self): - return len(set().union(*self.maps)) # reuses stored hash values if possible - - def __iter__(self): - return iter(set().union(*self.maps)) - - def __contains__(self, key): - return any(key in m for m in self.maps) - - def __bool__(self): - return any(self.maps) - - # Py2 compatibility: - __nonzero__ = __bool__ - - @recursive_repr() - def __repr__(self): - return '{0.__class__.__name__}({1})'.format( - self, ', '.join(map(repr, self.maps))) - - @classmethod - def fromkeys(cls, iterable, *args): - 'Create a ChainMap with a single dict created from the iterable.' - return cls(dict.fromkeys(iterable, *args)) - - def copy(self): - 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' - return self.__class__(self.maps[0].copy(), *self.maps[1:]) - - __copy__ = copy - - def new_child(self, m=None): # like Django's Context.push() - ''' - New ChainMap with a new map followed by all previous maps. If no - map is provided, an empty dict is used. - ''' - if m is None: - m = {} - return self.__class__(m, *self.maps) - - @property - def parents(self): # like Django's Context.pop() - 'New ChainMap from maps[1:].' - return self.__class__(*self.maps[1:]) - - def __setitem__(self, key, value): - self.maps[0][key] = value - - def __delitem__(self, key): - try: - del self.maps[0][key] - except KeyError: - raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) - - def popitem(self): - 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' - try: - return self.maps[0].popitem() - except KeyError: - raise KeyError('No keys found in the first mapping.') - - def pop(self, key, *args): - 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' - try: - return self.maps[0].pop(key, *args) - except KeyError: - raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) - - def clear(self): - 'Clear maps[0], leaving maps[1:] intact.' - self.maps[0].clear() - - -# Re-use the same sentinel as in the Python stdlib socket module: -from socket import _GLOBAL_DEFAULT_TIMEOUT -# Was: _GLOBAL_DEFAULT_TIMEOUT = object() - - -def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - """Backport of 3-argument create_connection() for Py2.6. - - Connect to *address* and return the socket object. - - Convenience function. Connect to *address* (a 2-tuple ``(host, - port)``) and return the socket object. Passing the optional - *timeout* parameter will set the timeout on the socket instance - before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` - is used. If *source_address* is set it must be a tuple of (host, port) - for the socket to bind as a source address before making the connection. - An host of '' or port 0 tells the OS to use the default. - """ - - host, port = address - err = None - for res in getaddrinfo(host, port, 0, SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket(af, socktype, proto) - if timeout is not _GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - - except error as _: - err = _ - if sock is not None: - sock.close() - - if err is not None: - raise err - else: - raise error("getaddrinfo returns an empty list") - -# Backport from Py2.7 for Py2.6: -def cmp_to_key(mycmp): - """Convert a cmp= function into a key= function""" - class K(object): - __slots__ = ['obj'] - def __init__(self, obj, *args): - self.obj = obj - def __lt__(self, other): - return mycmp(self.obj, other.obj) < 0 - def __gt__(self, other): - return mycmp(self.obj, other.obj) > 0 - def __eq__(self, other): - return mycmp(self.obj, other.obj) == 0 - def __le__(self, other): - return mycmp(self.obj, other.obj) <= 0 - def __ge__(self, other): - return mycmp(self.obj, other.obj) >= 0 - def __ne__(self, other): - return mycmp(self.obj, other.obj) != 0 - def __hash__(self): - raise TypeError('hash not implemented') - return K - -# Back up our definitions above in case they're useful -_OrderedDict = OrderedDict -_Counter = Counter -_check_output = check_output -_count = count -_ceil = ceil -__count_elements = _count_elements -_recursive_repr = recursive_repr -_ChainMap = ChainMap -_create_connection = create_connection -_cmp_to_key = cmp_to_key - -# Overwrite the definitions above with the usual ones -# from the standard library: -if sys.version_info >= (2, 7): - from collections import OrderedDict, Counter - from itertools import count - from functools import cmp_to_key - try: - from subprocess import check_output - except ImportError: - # Not available. This happens with Google App Engine: see issue #231 - pass - from socket import create_connection - -if sys.version_info >= (3, 0): - from math import ceil - from collections import _count_elements - -if sys.version_info >= (3, 3): - from reprlib import recursive_repr - from collections import ChainMap diff --git a/contrib/python/future/future/backports/socket.py b/contrib/python/future/future/backports/socket.py deleted file mode 100644 index eea20f7fd0a..00000000000 --- a/contrib/python/future/future/backports/socket.py +++ /dev/null @@ -1,454 +0,0 @@ -# Wrapper module for _socket, providing some additional facilities -# implemented in Python. - -"""\ -This module provides socket operations and some related functions. -On Unix, it supports IP (Internet Protocol) and Unix domain sockets. -On other systems, it only supports IP. Functions specific for a -socket are available as methods of the socket object. - -Functions: - -socket() -- create a new socket object -socketpair() -- create a pair of new socket objects [*] -fromfd() -- create a socket object from an open file descriptor [*] -fromshare() -- create a socket object from data received from socket.share() [*] -gethostname() -- return the current hostname -gethostbyname() -- map a hostname to its IP number -gethostbyaddr() -- map an IP number or hostname to DNS info -getservbyname() -- map a service name and a protocol name to a port number -getprotobyname() -- map a protocol name (e.g. 'tcp') to a number -ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order -htons(), htonl() -- convert 16, 32 bit int from host to network byte order -inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format -inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) -socket.getdefaulttimeout() -- get the default timeout value -socket.setdefaulttimeout() -- set the default timeout value -create_connection() -- connects to an address, with an optional timeout and - optional source address. - - [*] not available on all platforms! - -Special objects: - -SocketType -- type object for socket objects -error -- exception raised for I/O errors -has_ipv6 -- boolean value indicating if IPv6 is supported - -Integer constants: - -AF_INET, AF_UNIX -- socket domains (first argument to socket() call) -SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) - -Many other constants may be defined; these may be used in calls to -the setsockopt() and getsockopt() methods. -""" - -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import super - -import _socket -from _socket import * - -import os, sys, io - -try: - import errno -except ImportError: - errno = None -EBADF = getattr(errno, 'EBADF', 9) -EAGAIN = getattr(errno, 'EAGAIN', 11) -EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11) - -__all__ = ["getfqdn", "create_connection"] -__all__.extend(os._get_exports_list(_socket)) - - -_realsocket = socket - -# WSA error codes -if sys.platform.lower().startswith("win"): - errorTab = {} - errorTab[10004] = "The operation was interrupted." - errorTab[10009] = "A bad file handle was passed." - errorTab[10013] = "Permission denied." - errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT - errorTab[10022] = "An invalid operation was attempted." - errorTab[10035] = "The socket operation would block" - errorTab[10036] = "A blocking operation is already in progress." - errorTab[10048] = "The network address is in use." - errorTab[10054] = "The connection has been reset." - errorTab[10058] = "The network has been shut down." - errorTab[10060] = "The operation timed out." - errorTab[10061] = "Connection refused." - errorTab[10063] = "The name is too long." - errorTab[10064] = "The host is down." - errorTab[10065] = "The host is unreachable." - __all__.append("errorTab") - - -class socket(_socket.socket): - - """A subclass of _socket.socket adding the makefile() method.""" - - __slots__ = ["_io_refs", "_closed"] - - def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None): - if fileno is None: - _socket.socket.__init__(self, family, type, proto) - else: - _socket.socket.__init__(self, family, type, proto, fileno) - self._io_refs = 0 - self._closed = False - - def __enter__(self): - return self - - def __exit__(self, *args): - if not self._closed: - self.close() - - def __repr__(self): - """Wrap __repr__() to reveal the real class name.""" - s = _socket.socket.__repr__(self) - if s.startswith("<socket object"): - s = "<%s.%s%s%s" % (self.__class__.__module__, - self.__class__.__name__, - getattr(self, '_closed', False) and " [closed] " or "", - s[7:]) - return s - - def __getstate__(self): - raise TypeError("Cannot serialize socket object") - - def dup(self): - """dup() -> socket object - - Return a new socket object connected to the same system resource. - """ - fd = dup(self.fileno()) - sock = self.__class__(self.family, self.type, self.proto, fileno=fd) - sock.settimeout(self.gettimeout()) - return sock - - def accept(self): - """accept() -> (socket object, address info) - - Wait for an incoming connection. Return a new socket - representing the connection, and the address of the client. - For IP sockets, the address info is a pair (hostaddr, port). - """ - fd, addr = self._accept() - sock = socket(self.family, self.type, self.proto, fileno=fd) - # Issue #7995: if no default timeout is set and the listening - # socket had a (non-zero) timeout, force the new socket in blocking - # mode to override platform-specific socket flags inheritance. - if getdefaulttimeout() is None and self.gettimeout(): - sock.setblocking(True) - return sock, addr - - def makefile(self, mode="r", buffering=None, **_3to2kwargs): - """makefile(...) -> an I/O stream connected to the socket - - The arguments are as for io.open() after the filename, - except the only mode characters supported are 'r', 'w' and 'b'. - The semantics are similar too. (XXX refactor to share code?) - """ - if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline'] - else: newline = None - if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors'] - else: errors = None - if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding'] - else: encoding = None - for c in mode: - if c not in ("r", "w", "b"): - raise ValueError("invalid mode %r (only r, w, b allowed)") - writing = "w" in mode - reading = "r" in mode or not writing - assert reading or writing - binary = "b" in mode - rawmode = "" - if reading: - rawmode += "r" - if writing: - rawmode += "w" - raw = SocketIO(self, rawmode) - self._io_refs += 1 - if buffering is None: - buffering = -1 - if buffering < 0: - buffering = io.DEFAULT_BUFFER_SIZE - if buffering == 0: - if not binary: - raise ValueError("unbuffered streams must be binary") - return raw - if reading and writing: - buffer = io.BufferedRWPair(raw, raw, buffering) - elif reading: - buffer = io.BufferedReader(raw, buffering) - else: - assert writing - buffer = io.BufferedWriter(raw, buffering) - if binary: - return buffer - text = io.TextIOWrapper(buffer, encoding, errors, newline) - text.mode = mode - return text - - def _decref_socketios(self): - if self._io_refs > 0: - self._io_refs -= 1 - if self._closed: - self.close() - - def _real_close(self, _ss=_socket.socket): - # This function should not reference any globals. See issue #808164. - _ss.close(self) - - def close(self): - # This function should not reference any globals. See issue #808164. - self._closed = True - if self._io_refs <= 0: - self._real_close() - - def detach(self): - """detach() -> file descriptor - - Close the socket object without closing the underlying file descriptor. - The object cannot be used after this call, but the file descriptor - can be reused for other purposes. The file descriptor is returned. - """ - self._closed = True - return super().detach() - -def fromfd(fd, family, type, proto=0): - """ fromfd(fd, family, type[, proto]) -> socket object - - Create a socket object from a duplicate of the given file - descriptor. The remaining arguments are the same as for socket(). - """ - nfd = dup(fd) - return socket(family, type, proto, nfd) - -if hasattr(_socket.socket, "share"): - def fromshare(info): - """ fromshare(info) -> socket object - - Create a socket object from a the bytes object returned by - socket.share(pid). - """ - return socket(0, 0, 0, info) - -if hasattr(_socket, "socketpair"): - - def socketpair(family=None, type=SOCK_STREAM, proto=0): - """socketpair([family[, type[, proto]]]) -> (socket object, socket object) - - Create a pair of socket objects from the sockets returned by the platform - socketpair() function. - The arguments are the same as for socket() except the default family is - AF_UNIX if defined on the platform; otherwise, the default is AF_INET. - """ - if family is None: - try: - family = AF_UNIX - except NameError: - family = AF_INET - a, b = _socket.socketpair(family, type, proto) - a = socket(family, type, proto, a.detach()) - b = socket(family, type, proto, b.detach()) - return a, b - - -_blocking_errnos = set([EAGAIN, EWOULDBLOCK]) - -class SocketIO(io.RawIOBase): - - """Raw I/O implementation for stream sockets. - - This class supports the makefile() method on sockets. It provides - the raw I/O interface on top of a socket object. - """ - - # One might wonder why not let FileIO do the job instead. There are two - # main reasons why FileIO is not adapted: - # - it wouldn't work under Windows (where you can't used read() and - # write() on a socket handle) - # - it wouldn't work with socket timeouts (FileIO would ignore the - # timeout and consider the socket non-blocking) - - # XXX More docs - - def __init__(self, sock, mode): - if mode not in ("r", "w", "rw", "rb", "wb", "rwb"): - raise ValueError("invalid mode: %r" % mode) - io.RawIOBase.__init__(self) - self._sock = sock - if "b" not in mode: - mode += "b" - self._mode = mode - self._reading = "r" in mode - self._writing = "w" in mode - self._timeout_occurred = False - - def readinto(self, b): - """Read up to len(b) bytes into the writable buffer *b* and return - the number of bytes read. If the socket is non-blocking and no bytes - are available, None is returned. - - If *b* is non-empty, a 0 return value indicates that the connection - was shutdown at the other end. - """ - self._checkClosed() - self._checkReadable() - if self._timeout_occurred: - raise IOError("cannot read from timed out object") - while True: - try: - return self._sock.recv_into(b) - except timeout: - self._timeout_occurred = True - raise - # except InterruptedError: - # continue - except error as e: - if e.args[0] in _blocking_errnos: - return None - raise - - def write(self, b): - """Write the given bytes or bytearray object *b* to the socket - and return the number of bytes written. This can be less than - len(b) if not all data could be written. If the socket is - non-blocking and no bytes could be written None is returned. - """ - self._checkClosed() - self._checkWritable() - try: - return self._sock.send(b) - except error as e: - # XXX what about EINTR? - if e.args[0] in _blocking_errnos: - return None - raise - - def readable(self): - """True if the SocketIO is open for reading. - """ - if self.closed: - raise ValueError("I/O operation on closed socket.") - return self._reading - - def writable(self): - """True if the SocketIO is open for writing. - """ - if self.closed: - raise ValueError("I/O operation on closed socket.") - return self._writing - - def seekable(self): - """True if the SocketIO is open for seeking. - """ - if self.closed: - raise ValueError("I/O operation on closed socket.") - return super().seekable() - - def fileno(self): - """Return the file descriptor of the underlying socket. - """ - self._checkClosed() - return self._sock.fileno() - - @property - def name(self): - if not self.closed: - return self.fileno() - else: - return -1 - - @property - def mode(self): - return self._mode - - def close(self): - """Close the SocketIO object. This doesn't close the underlying - socket, except if all references to it have disappeared. - """ - if self.closed: - return - io.RawIOBase.close(self) - self._sock._decref_socketios() - self._sock = None - - -def getfqdn(name=''): - """Get fully qualified domain name from name. - - An empty argument is interpreted as meaning the local host. - - First the hostname returned by gethostbyaddr() is checked, then - possibly existing aliases. In case no FQDN is available, hostname - from gethostname() is returned. - """ - name = name.strip() - if not name or name == '0.0.0.0': - name = gethostname() - try: - hostname, aliases, ipaddrs = gethostbyaddr(name) - except error: - pass - else: - aliases.insert(0, hostname) - for name in aliases: - if '.' in name: - break - else: - name = hostname - return name - - -# Re-use the same sentinel as in the Python stdlib socket module: -from socket import _GLOBAL_DEFAULT_TIMEOUT -# Was: _GLOBAL_DEFAULT_TIMEOUT = object() - - -def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - """Connect to *address* and return the socket object. - - Convenience function. Connect to *address* (a 2-tuple ``(host, - port)``) and return the socket object. Passing the optional - *timeout* parameter will set the timeout on the socket instance - before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` - is used. If *source_address* is set it must be a tuple of (host, port) - for the socket to bind as a source address before making the connection. - An host of '' or port 0 tells the OS to use the default. - """ - - host, port = address - err = None - for res in getaddrinfo(host, port, 0, SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket(af, socktype, proto) - if timeout is not _GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - - except error as _: - err = _ - if sock is not None: - sock.close() - - if err is not None: - raise err - else: - raise error("getaddrinfo returns an empty list") diff --git a/contrib/python/future/future/backports/socketserver.py b/contrib/python/future/future/backports/socketserver.py deleted file mode 100644 index d1e24a6dd0b..00000000000 --- a/contrib/python/future/future/backports/socketserver.py +++ /dev/null @@ -1,747 +0,0 @@ -"""Generic socket server classes. - -This module tries to capture the various aspects of defining a server: - -For socket-based servers: - -- address family: - - AF_INET{,6}: IP (Internet Protocol) sockets (default) - - AF_UNIX: Unix domain sockets - - others, e.g. AF_DECNET are conceivable (see <socket.h> -- socket type: - - SOCK_STREAM (reliable stream, e.g. TCP) - - SOCK_DGRAM (datagrams, e.g. UDP) - -For request-based servers (including socket-based): - -- client address verification before further looking at the request - (This is actually a hook for any processing that needs to look - at the request before anything else, e.g. logging) -- how to handle multiple requests: - - synchronous (one request is handled at a time) - - forking (each request is handled by a new process) - - threading (each request is handled by a new thread) - -The classes in this module favor the server type that is simplest to -write: a synchronous TCP/IP server. This is bad class design, but -save some typing. (There's also the issue that a deep class hierarchy -slows down method lookups.) - -There are five classes in an inheritance diagram, four of which represent -synchronous servers of four types: - - +------------+ - | BaseServer | - +------------+ - | - v - +-----------+ +------------------+ - | TCPServer |------->| UnixStreamServer | - +-----------+ +------------------+ - | - v - +-----------+ +--------------------+ - | UDPServer |------->| UnixDatagramServer | - +-----------+ +--------------------+ - -Note that UnixDatagramServer derives from UDPServer, not from -UnixStreamServer -- the only difference between an IP and a Unix -stream server is the address family, which is simply repeated in both -unix server classes. - -Forking and threading versions of each type of server can be created -using the ForkingMixIn and ThreadingMixIn mix-in classes. For -instance, a threading UDP server class is created as follows: - - class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass - -The Mix-in class must come first, since it overrides a method defined -in UDPServer! Setting the various member variables also changes -the behavior of the underlying server mechanism. - -To implement a service, you must derive a class from -BaseRequestHandler and redefine its handle() method. You can then run -various versions of the service by combining one of the server classes -with your request handler class. - -The request handler class must be different for datagram or stream -services. This can be hidden by using the request handler -subclasses StreamRequestHandler or DatagramRequestHandler. - -Of course, you still have to use your head! - -For instance, it makes no sense to use a forking server if the service -contains state in memory that can be modified by requests (since the -modifications in the child process would never reach the initial state -kept in the parent process and passed to each child). In this case, -you can use a threading server, but you will probably have to use -locks to avoid two requests that come in nearly simultaneous to apply -conflicting changes to the server state. - -On the other hand, if you are building e.g. an HTTP server, where all -data is stored externally (e.g. in the file system), a synchronous -class will essentially render the service "deaf" while one request is -being handled -- which may be for a very long time if a client is slow -to read all the data it has requested. Here a threading or forking -server is appropriate. - -In some cases, it may be appropriate to process part of a request -synchronously, but to finish processing in a forked child depending on -the request data. This can be implemented by using a synchronous -server and doing an explicit fork in the request handler class -handle() method. - -Another approach to handling multiple simultaneous requests in an -environment that supports neither threads nor fork (or where these are -too expensive or inappropriate for the service) is to maintain an -explicit table of partially finished requests and to use select() to -decide which request to work on next (or whether to handle a new -incoming request). This is particularly important for stream services -where each client can potentially be connected for a long time (if -threads or subprocesses cannot be used). - -Future work: -- Standard classes for Sun RPC (which uses either UDP or TCP) -- Standard mix-in classes to implement various authentication - and encryption schemes -- Standard framework for select-based multiplexing - -XXX Open problems: -- What to do with out-of-band data? - -BaseServer: -- split generic "request" functionality out into BaseServer class. - Copyright (C) 2000 Luke Kenneth Casson Leighton <[email protected]> - - example: read entries from a SQL database (requires overriding - get_request() to return a table entry from the database). - entry is processed by a RequestHandlerClass. - -""" - -# Author of the BaseServer patch: Luke Kenneth Casson Leighton - -# XXX Warning! -# There is a test suite for this module, but it cannot be run by the -# standard regression test. -# To run it manually, run Lib/test/test_socketserver.py. - -from __future__ import (absolute_import, print_function) - -__version__ = "0.4" - - -import socket -import select -import sys -import os -import errno -try: - import threading -except ImportError: - import dummy_threading as threading - -__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer", - "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler", - "StreamRequestHandler","DatagramRequestHandler", - "ThreadingMixIn", "ForkingMixIn"] -if hasattr(socket, "AF_UNIX"): - __all__.extend(["UnixStreamServer","UnixDatagramServer", - "ThreadingUnixStreamServer", - "ThreadingUnixDatagramServer"]) - -def _eintr_retry(func, *args): - """restart a system call interrupted by EINTR""" - while True: - try: - return func(*args) - except OSError as e: - if e.errno != errno.EINTR: - raise - -class BaseServer(object): - - """Base class for server classes. - - Methods for the caller: - - - __init__(server_address, RequestHandlerClass) - - serve_forever(poll_interval=0.5) - - shutdown() - - handle_request() # if you do not use serve_forever() - - fileno() -> int # for select() - - Methods that may be overridden: - - - server_bind() - - server_activate() - - get_request() -> request, client_address - - handle_timeout() - - verify_request(request, client_address) - - server_close() - - process_request(request, client_address) - - shutdown_request(request) - - close_request(request) - - service_actions() - - handle_error() - - Methods for derived classes: - - - finish_request(request, client_address) - - Class variables that may be overridden by derived classes or - instances: - - - timeout - - address_family - - socket_type - - allow_reuse_address - - Instance variables: - - - RequestHandlerClass - - socket - - """ - - timeout = None - - def __init__(self, server_address, RequestHandlerClass): - """Constructor. May be extended, do not override.""" - self.server_address = server_address - self.RequestHandlerClass = RequestHandlerClass - self.__is_shut_down = threading.Event() - self.__shutdown_request = False - - def server_activate(self): - """Called by constructor to activate the server. - - May be overridden. - - """ - pass - - def serve_forever(self, poll_interval=0.5): - """Handle one request at a time until shutdown. - - Polls for shutdown every poll_interval seconds. Ignores - self.timeout. If you need to do periodic tasks, do them in - another thread. - """ - self.__is_shut_down.clear() - try: - while not self.__shutdown_request: - # XXX: Consider using another file descriptor or - # connecting to the socket to wake this up instead of - # polling. Polling reduces our responsiveness to a - # shutdown request and wastes cpu at all other times. - r, w, e = _eintr_retry(select.select, [self], [], [], - poll_interval) - if self in r: - self._handle_request_noblock() - - self.service_actions() - finally: - self.__shutdown_request = False - self.__is_shut_down.set() - - def shutdown(self): - """Stops the serve_forever loop. - - Blocks until the loop has finished. This must be called while - serve_forever() is running in another thread, or it will - deadlock. - """ - self.__shutdown_request = True - self.__is_shut_down.wait() - - def service_actions(self): - """Called by the serve_forever() loop. - - May be overridden by a subclass / Mixin to implement any code that - needs to be run during the loop. - """ - pass - - # The distinction between handling, getting, processing and - # finishing a request is fairly arbitrary. Remember: - # - # - handle_request() is the top-level call. It calls - # select, get_request(), verify_request() and process_request() - # - get_request() is different for stream or datagram sockets - # - process_request() is the place that may fork a new process - # or create a new thread to finish the request - # - finish_request() instantiates the request handler class; - # this constructor will handle the request all by itself - - def handle_request(self): - """Handle one request, possibly blocking. - - Respects self.timeout. - """ - # Support people who used socket.settimeout() to escape - # handle_request before self.timeout was available. - timeout = self.socket.gettimeout() - if timeout is None: - timeout = self.timeout - elif self.timeout is not None: - timeout = min(timeout, self.timeout) - fd_sets = _eintr_retry(select.select, [self], [], [], timeout) - if not fd_sets[0]: - self.handle_timeout() - return - self._handle_request_noblock() - - def _handle_request_noblock(self): - """Handle one request, without blocking. - - I assume that select.select has returned that the socket is - readable before this function was called, so there should be - no risk of blocking in get_request(). - """ - try: - request, client_address = self.get_request() - except socket.error: - return - if self.verify_request(request, client_address): - try: - self.process_request(request, client_address) - except: - self.handle_error(request, client_address) - self.shutdown_request(request) - - def handle_timeout(self): - """Called if no new request arrives within self.timeout. - - Overridden by ForkingMixIn. - """ - pass - - def verify_request(self, request, client_address): - """Verify the request. May be overridden. - - Return True if we should proceed with this request. - - """ - return True - - def process_request(self, request, client_address): - """Call finish_request. - - Overridden by ForkingMixIn and ThreadingMixIn. - - """ - self.finish_request(request, client_address) - self.shutdown_request(request) - - def server_close(self): - """Called to clean-up the server. - - May be overridden. - - """ - pass - - def finish_request(self, request, client_address): - """Finish one request by instantiating RequestHandlerClass.""" - self.RequestHandlerClass(request, client_address, self) - - def shutdown_request(self, request): - """Called to shutdown and close an individual request.""" - self.close_request(request) - - def close_request(self, request): - """Called to clean up an individual request.""" - pass - - def handle_error(self, request, client_address): - """Handle an error gracefully. May be overridden. - - The default is to print a traceback and continue. - - """ - print('-'*40) - print('Exception happened during processing of request from', end=' ') - print(client_address) - import traceback - traceback.print_exc() # XXX But this goes to stderr! - print('-'*40) - - -class TCPServer(BaseServer): - - """Base class for various socket-based server classes. - - Defaults to synchronous IP stream (i.e., TCP). - - Methods for the caller: - - - __init__(server_address, RequestHandlerClass, bind_and_activate=True) - - serve_forever(poll_interval=0.5) - - shutdown() - - handle_request() # if you don't use serve_forever() - - fileno() -> int # for select() - - Methods that may be overridden: - - - server_bind() - - server_activate() - - get_request() -> request, client_address - - handle_timeout() - - verify_request(request, client_address) - - process_request(request, client_address) - - shutdown_request(request) - - close_request(request) - - handle_error() - - Methods for derived classes: - - - finish_request(request, client_address) - - Class variables that may be overridden by derived classes or - instances: - - - timeout - - address_family - - socket_type - - request_queue_size (only for stream sockets) - - allow_reuse_address - - Instance variables: - - - server_address - - RequestHandlerClass - - socket - - """ - - address_family = socket.AF_INET - - socket_type = socket.SOCK_STREAM - - request_queue_size = 5 - - allow_reuse_address = False - - def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True): - """Constructor. May be extended, do not override.""" - BaseServer.__init__(self, server_address, RequestHandlerClass) - self.socket = socket.socket(self.address_family, - self.socket_type) - if bind_and_activate: - self.server_bind() - self.server_activate() - - def server_bind(self): - """Called by constructor to bind the socket. - - May be overridden. - - """ - if self.allow_reuse_address: - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.socket.bind(self.server_address) - self.server_address = self.socket.getsockname() - - def server_activate(self): - """Called by constructor to activate the server. - - May be overridden. - - """ - self.socket.listen(self.request_queue_size) - - def server_close(self): - """Called to clean-up the server. - - May be overridden. - - """ - self.socket.close() - - def fileno(self): - """Return socket file number. - - Interface required by select(). - - """ - return self.socket.fileno() - - def get_request(self): - """Get the request and client address from the socket. - - May be overridden. - - """ - return self.socket.accept() - - def shutdown_request(self, request): - """Called to shutdown and close an individual request.""" - try: - #explicitly shutdown. socket.close() merely releases - #the socket and waits for GC to perform the actual close. - request.shutdown(socket.SHUT_WR) - except socket.error: - pass #some platforms may raise ENOTCONN here - self.close_request(request) - - def close_request(self, request): - """Called to clean up an individual request.""" - request.close() - - -class UDPServer(TCPServer): - - """UDP server class.""" - - allow_reuse_address = False - - socket_type = socket.SOCK_DGRAM - - max_packet_size = 8192 - - def get_request(self): - data, client_addr = self.socket.recvfrom(self.max_packet_size) - return (data, self.socket), client_addr - - def server_activate(self): - # No need to call listen() for UDP. - pass - - def shutdown_request(self, request): - # No need to shutdown anything. - self.close_request(request) - - def close_request(self, request): - # No need to close anything. - pass - -class ForkingMixIn(object): - - """Mix-in class to handle each request in a new process.""" - - timeout = 300 - active_children = None - max_children = 40 - - def collect_children(self): - """Internal routine to wait for children that have exited.""" - if self.active_children is None: return - while len(self.active_children) >= self.max_children: - # XXX: This will wait for any child process, not just ones - # spawned by this library. This could confuse other - # libraries that expect to be able to wait for their own - # children. - try: - pid, status = os.waitpid(0, 0) - except os.error: - pid = None - if pid not in self.active_children: continue - self.active_children.remove(pid) - - # XXX: This loop runs more system calls than it ought - # to. There should be a way to put the active_children into a - # process group and then use os.waitpid(-pgid) to wait for any - # of that set, but I couldn't find a way to allocate pgids - # that couldn't collide. - for child in self.active_children: - try: - pid, status = os.waitpid(child, os.WNOHANG) - except os.error: - pid = None - if not pid: continue - try: - self.active_children.remove(pid) - except ValueError as e: - raise ValueError('%s. x=%d and list=%r' % (e.message, pid, - self.active_children)) - - def handle_timeout(self): - """Wait for zombies after self.timeout seconds of inactivity. - - May be extended, do not override. - """ - self.collect_children() - - def service_actions(self): - """Collect the zombie child processes regularly in the ForkingMixIn. - - service_actions is called in the BaseServer's serve_forver loop. - """ - self.collect_children() - - def process_request(self, request, client_address): - """Fork a new subprocess to process the request.""" - pid = os.fork() - if pid: - # Parent process - if self.active_children is None: - self.active_children = [] - self.active_children.append(pid) - self.close_request(request) - return - else: - # Child process. - # This must never return, hence os._exit()! - try: - self.finish_request(request, client_address) - self.shutdown_request(request) - os._exit(0) - except: - try: - self.handle_error(request, client_address) - self.shutdown_request(request) - finally: - os._exit(1) - - -class ThreadingMixIn(object): - """Mix-in class to handle each request in a new thread.""" - - # Decides how threads will act upon termination of the - # main process - daemon_threads = False - - def process_request_thread(self, request, client_address): - """Same as in BaseServer but as a thread. - - In addition, exception handling is done here. - - """ - try: - self.finish_request(request, client_address) - self.shutdown_request(request) - except: - self.handle_error(request, client_address) - self.shutdown_request(request) - - def process_request(self, request, client_address): - """Start a new thread to process the request.""" - t = threading.Thread(target = self.process_request_thread, - args = (request, client_address)) - t.daemon = self.daemon_threads - t.start() - - -class ForkingUDPServer(ForkingMixIn, UDPServer): pass -class ForkingTCPServer(ForkingMixIn, TCPServer): pass - -class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass -class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass - -if hasattr(socket, 'AF_UNIX'): - - class UnixStreamServer(TCPServer): - address_family = socket.AF_UNIX - - class UnixDatagramServer(UDPServer): - address_family = socket.AF_UNIX - - class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass - - class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass - -class BaseRequestHandler(object): - - """Base class for request handler classes. - - This class is instantiated for each request to be handled. The - constructor sets the instance variables request, client_address - and server, and then calls the handle() method. To implement a - specific service, all you need to do is to derive a class which - defines a handle() method. - - The handle() method can find the request as self.request, the - client address as self.client_address, and the server (in case it - needs access to per-server information) as self.server. Since a - separate instance is created for each request, the handle() method - can define arbitrary other instance variariables. - - """ - - def __init__(self, request, client_address, server): - self.request = request - self.client_address = client_address - self.server = server - self.setup() - try: - self.handle() - finally: - self.finish() - - def setup(self): - pass - - def handle(self): - pass - - def finish(self): - pass - - -# The following two classes make it possible to use the same service -# class for stream or datagram servers. -# Each class sets up these instance variables: -# - rfile: a file object from which receives the request is read -# - wfile: a file object to which the reply is written -# When the handle() method returns, wfile is flushed properly - - -class StreamRequestHandler(BaseRequestHandler): - - """Define self.rfile and self.wfile for stream sockets.""" - - # Default buffer sizes for rfile, wfile. - # We default rfile to buffered because otherwise it could be - # really slow for large data (a getc() call per byte); we make - # wfile unbuffered because (a) often after a write() we want to - # read and we need to flush the line; (b) big writes to unbuffered - # files are typically optimized by stdio even when big reads - # aren't. - rbufsize = -1 - wbufsize = 0 - - # A timeout to apply to the request socket, if not None. - timeout = None - - # Disable nagle algorithm for this socket, if True. - # Use only when wbufsize != 0, to avoid small packets. - disable_nagle_algorithm = False - - def setup(self): - self.connection = self.request - if self.timeout is not None: - self.connection.settimeout(self.timeout) - if self.disable_nagle_algorithm: - self.connection.setsockopt(socket.IPPROTO_TCP, - socket.TCP_NODELAY, True) - self.rfile = self.connection.makefile('rb', self.rbufsize) - self.wfile = self.connection.makefile('wb', self.wbufsize) - - def finish(self): - if not self.wfile.closed: - try: - self.wfile.flush() - except socket.error: - # An final socket error may have occurred here, such as - # the local error ECONNABORTED. - pass - self.wfile.close() - self.rfile.close() - - -class DatagramRequestHandler(BaseRequestHandler): - - # XXX Regrettably, I cannot get this working on Linux; - # s.recvfrom() doesn't return a meaningful client address. - - """Define self.rfile and self.wfile for datagram sockets.""" - - def setup(self): - from io import BytesIO - self.packet, self.socket = self.request - self.rfile = BytesIO(self.packet) - self.wfile = BytesIO() - - def finish(self): - self.socket.sendto(self.wfile.getvalue(), self.client_address) diff --git a/contrib/python/future/future/backports/total_ordering.py b/contrib/python/future/future/backports/total_ordering.py deleted file mode 100644 index 760f06d6c35..00000000000 --- a/contrib/python/future/future/backports/total_ordering.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -For Python < 2.7.2. total_ordering in versions prior to 2.7.2 is buggy. -See http://bugs.python.org/issue10042 for details. For these versions use -code borrowed from Python 2.7.3. - -From django.utils. -""" - -import sys -if sys.version_info >= (2, 7, 2): - from functools import total_ordering -else: - def total_ordering(cls): - """Class decorator that fills in missing ordering methods""" - convert = { - '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), - ('__le__', lambda self, other: self < other or self == other), - ('__ge__', lambda self, other: not self < other)], - '__le__': [('__ge__', lambda self, other: not self <= other or self == other), - ('__lt__', lambda self, other: self <= other and not self == other), - ('__gt__', lambda self, other: not self <= other)], - '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), - ('__ge__', lambda self, other: self > other or self == other), - ('__le__', lambda self, other: not self > other)], - '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), - ('__gt__', lambda self, other: self >= other and not self == other), - ('__lt__', lambda self, other: not self >= other)] - } - roots = set(dir(cls)) & set(convert) - if not roots: - raise ValueError('must define at least one ordering operation: < > <= >=') - root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ - for opname, opfunc in convert[root]: - if opname not in roots: - opfunc.__name__ = opname - opfunc.__doc__ = getattr(int, opname).__doc__ - setattr(cls, opname, opfunc) - return cls diff --git a/contrib/python/future/future/backports/urllib/__init__.py b/contrib/python/future/future/backports/urllib/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/future/backports/urllib/__init__.py +++ /dev/null diff --git a/contrib/python/future/future/backports/urllib/error.py b/contrib/python/future/future/backports/urllib/error.py deleted file mode 100644 index a473e4453d7..00000000000 --- a/contrib/python/future/future/backports/urllib/error.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Exception classes raised by urllib. - -The base exception class is URLError, which inherits from IOError. It -doesn't define any behavior of its own, but is the base class for all -exceptions defined in this package. - -HTTPError is an exception class that is also a valid HTTP response -instance. It behaves this way because HTTP protocol errors are valid -responses, with a status code, headers, and a body. In some contexts, -an application may want to handle an exception like a regular -response. -""" -from __future__ import absolute_import, division, unicode_literals -from future import standard_library - -from future.backports.urllib import response as urllib_response - - -__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] - - -# do these error classes make sense? -# make sure all of the IOError stuff is overridden. we just want to be -# subtypes. - -class URLError(IOError): - # URLError is a sub-type of IOError, but it doesn't share any of - # the implementation. need to override __init__ and __str__. - # It sets self.args for compatibility with other EnvironmentError - # subclasses, but args doesn't have the typical format with errno in - # slot 0 and strerror in slot 1. This may be better than nothing. - def __init__(self, reason, filename=None): - self.args = reason, - self.reason = reason - if filename is not None: - self.filename = filename - - def __str__(self): - return '<urlopen error %s>' % self.reason - -class HTTPError(URLError, urllib_response.addinfourl): - """Raised when HTTP error occurs, but also acts like non-error return""" - __super_init = urllib_response.addinfourl.__init__ - - def __init__(self, url, code, msg, hdrs, fp): - self.code = code - self.msg = msg - self.hdrs = hdrs - self.fp = fp - self.filename = url - # The addinfourl classes depend on fp being a valid file - # object. In some cases, the HTTPError may not have a valid - # file object. If this happens, the simplest workaround is to - # not initialize the base classes. - if fp is not None: - self.__super_init(fp, hdrs, url, code) - - def __str__(self): - return 'HTTP Error %s: %s' % (self.code, self.msg) - - # since URLError specifies a .reason attribute, HTTPError should also - # provide this attribute. See issue13211 for discussion. - @property - def reason(self): - return self.msg - - def info(self): - return self.hdrs - - -# exception raised when downloaded size does not match content-length -class ContentTooShortError(URLError): - def __init__(self, message, content): - URLError.__init__(self, message) - self.content = content diff --git a/contrib/python/future/future/backports/urllib/parse.py b/contrib/python/future/future/backports/urllib/parse.py deleted file mode 100644 index 04e52d49258..00000000000 --- a/contrib/python/future/future/backports/urllib/parse.py +++ /dev/null @@ -1,991 +0,0 @@ -""" -Ported using Python-Future from the Python 3.3 standard library. - -Parse (absolute and relative) URLs. - -urlparse module is based upon the following RFC specifications. - -RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -and L. Masinter, January 2005. - -RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -and L.Masinter, December 1999. - -RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. -Berners-Lee, R. Fielding, and L. Masinter, August 1998. - -RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. - -RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June -1995. - -RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. -McCahill, December 1994 - -RFC 3986 is considered the current standard and any future changes to -urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to defacto -scenarios for parsing, and for backward compatibility purposes, some -parsing quirks from older RFCs are retained. The testcases in -test_urlparse.py provides a good indicator of parsing behavior. -""" -from __future__ import absolute_import, division, unicode_literals -from future.builtins import bytes, chr, dict, int, range, str -from future.utils import raise_with_traceback - -import re -import sys -import collections - -__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit", "urlencode", "parse_qs", - "parse_qsl", "quote", "quote_plus", "quote_from_bytes", - "unquote", "unquote_plus", "unquote_to_bytes"] - -# A classification of schemes ('' means apply by default) -uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp', - 'svn', 'svn+ssh'] -uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] -uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', - 'mms', '', 'sftp', 'tel'] - -# These are not actually used anymore, but should stay for backwards -# compatibility. (They are undocumented, but have a public-looking name.) -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] - -# Characters valid in scheme names -scheme_chars = ('abcdefghijklmnopqrstuvwxyz' - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - '0123456789' - '+-.') - -# XXX: Consider replacing with functools.lru_cache -MAX_CACHE_SIZE = 20 -_parse_cache = {} - -def clear_cache(): - """Clear the parse cache and the quoters cache.""" - _parse_cache.clear() - _safe_quoters.clear() - - -# Helpers for bytes handling -# For 3.2, we deliberately require applications that -# handle improperly quoted URLs to do their own -# decoding and encoding. If valid use cases are -# presented, we may relax this by using latin-1 -# decoding internally for 3.3 -_implicit_encoding = 'ascii' -_implicit_errors = 'strict' - -def _noop(obj): - return obj - -def _encode_result(obj, encoding=_implicit_encoding, - errors=_implicit_errors): - return obj.encode(encoding, errors) - -def _decode_args(args, encoding=_implicit_encoding, - errors=_implicit_errors): - return tuple(x.decode(encoding, errors) if x else '' for x in args) - -def _coerce_args(*args): - # Invokes decode if necessary to create str args - # and returns the coerced inputs along with - # an appropriate result coercion function - # - noop for str inputs - # - encoding function otherwise - str_input = isinstance(args[0], str) - for arg in args[1:]: - # We special-case the empty string to support the - # "scheme=''" default argument to some functions - if arg and isinstance(arg, str) != str_input: - raise TypeError("Cannot mix str and non-str arguments") - if str_input: - return args + (_noop,) - return _decode_args(args) + (_encode_result,) - -# Result objects are more helpful than simple tuples -class _ResultMixinStr(object): - """Standard approach to encoding parsed results from str to bytes""" - __slots__ = () - - def encode(self, encoding='ascii', errors='strict'): - return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) - - -class _ResultMixinBytes(object): - """Standard approach to decoding parsed results from bytes to str""" - __slots__ = () - - def decode(self, encoding='ascii', errors='strict'): - return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) - - -class _NetlocResultMixinBase(object): - """Shared methods for the parsed result objects containing a netloc element""" - __slots__ = () - - @property - def username(self): - return self._userinfo[0] - - @property - def password(self): - return self._userinfo[1] - - @property - def hostname(self): - hostname = self._hostinfo[0] - if not hostname: - hostname = None - elif hostname is not None: - hostname = hostname.lower() - return hostname - - @property - def port(self): - port = self._hostinfo[1] - if port is not None: - port = int(port, 10) - # Return None on an illegal port - if not ( 0 <= port <= 65535): - return None - return port - - -class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): - __slots__ = () - - @property - def _userinfo(self): - netloc = self.netloc - userinfo, have_info, hostinfo = netloc.rpartition('@') - if have_info: - username, have_password, password = userinfo.partition(':') - if not have_password: - password = None - else: - username = password = None - return username, password - - @property - def _hostinfo(self): - netloc = self.netloc - _, _, hostinfo = netloc.rpartition('@') - _, have_open_br, bracketed = hostinfo.partition('[') - if have_open_br: - hostname, _, port = bracketed.partition(']') - _, have_port, port = port.partition(':') - else: - hostname, have_port, port = hostinfo.partition(':') - if not have_port: - port = None - return hostname, port - - -class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): - __slots__ = () - - @property - def _userinfo(self): - netloc = self.netloc - userinfo, have_info, hostinfo = netloc.rpartition(b'@') - if have_info: - username, have_password, password = userinfo.partition(b':') - if not have_password: - password = None - else: - username = password = None - return username, password - - @property - def _hostinfo(self): - netloc = self.netloc - _, _, hostinfo = netloc.rpartition(b'@') - _, have_open_br, bracketed = hostinfo.partition(b'[') - if have_open_br: - hostname, _, port = bracketed.partition(b']') - _, have_port, port = port.partition(b':') - else: - hostname, have_port, port = hostinfo.partition(b':') - if not have_port: - port = None - return hostname, port - - -from collections import namedtuple - -_DefragResultBase = namedtuple('DefragResult', 'url fragment') -_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') -_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') - -# For backwards compatibility, alias _NetlocResultMixinStr -# ResultBase is no longer part of the documented API, but it is -# retained since deprecating it isn't worth the hassle -ResultBase = _NetlocResultMixinStr - -# Structured result objects for string data -class DefragResult(_DefragResultBase, _ResultMixinStr): - __slots__ = () - def geturl(self): - if self.fragment: - return self.url + '#' + self.fragment - else: - return self.url - -class SplitResult(_SplitResultBase, _NetlocResultMixinStr): - __slots__ = () - def geturl(self): - return urlunsplit(self) - -class ParseResult(_ParseResultBase, _NetlocResultMixinStr): - __slots__ = () - def geturl(self): - return urlunparse(self) - -# Structured result objects for bytes data -class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): - __slots__ = () - def geturl(self): - if self.fragment: - return self.url + b'#' + self.fragment - else: - return self.url - -class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): - __slots__ = () - def geturl(self): - return urlunsplit(self) - -class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): - __slots__ = () - def geturl(self): - return urlunparse(self) - -# Set up the encode/decode result pairs -def _fix_result_transcoding(): - _result_pairs = ( - (DefragResult, DefragResultBytes), - (SplitResult, SplitResultBytes), - (ParseResult, ParseResultBytes), - ) - for _decoded, _encoded in _result_pairs: - _decoded._encoded_counterpart = _encoded - _encoded._decoded_counterpart = _decoded - -_fix_result_transcoding() -del _fix_result_transcoding - -def urlparse(url, scheme='', allow_fragments=True): - """Parse a URL into 6 components: - <scheme>://<netloc>/<path>;<params>?<query>#<fragment> - Return a 6-tuple: (scheme, netloc, path, params, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - url, scheme, _coerce_result = _coerce_args(url, scheme) - splitresult = urlsplit(url, scheme, allow_fragments) - scheme, netloc, url, query, fragment = splitresult - if scheme in uses_params and ';' in url: - url, params = _splitparams(url) - else: - params = '' - result = ParseResult(scheme, netloc, url, params, query, fragment) - return _coerce_result(result) - -def _splitparams(url): - if '/' in url: - i = url.find(';', url.rfind('/')) - if i < 0: - return url, '' - else: - i = url.find(';') - return url[:i], url[i+1:] - -def _splitnetloc(url, start=0): - delim = len(url) # position of end of domain part of url, default is end - for c in '/?#': # look for delimiters; the order is NOT important - wdelim = url.find(c, start) # find first of this delim - if wdelim >= 0: # if found - delim = min(delim, wdelim) # use earliest delim position - return url[start:delim], url[delim:] # return (domain, rest) - -def urlsplit(url, scheme='', allow_fragments=True): - """Parse a URL into 5 components: - <scheme>://<netloc>/<path>?<query>#<fragment> - Return a 5-tuple: (scheme, netloc, path, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - url, scheme, _coerce_result = _coerce_args(url, scheme) - allow_fragments = bool(allow_fragments) - key = url, scheme, allow_fragments, type(url), type(scheme) - cached = _parse_cache.get(key, None) - if cached: - return _coerce_result(cached) - if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth - clear_cache() - netloc = query = fragment = '' - i = url.find(':') - if i > 0: - if url[:i] == 'http': # optimize the common case - scheme = url[:i].lower() - url = url[i+1:] - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return _coerce_result(v) - for c in url[:i]: - if c not in scheme_chars: - break - else: - # make sure "url" is not actually a port number (in which case - # "scheme" is really part of the path) - rest = url[i+1:] - if not rest or any(c not in '0123456789' for c in rest): - # not a port number - scheme, url = url[:i].lower(), rest - - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return _coerce_result(v) - -def urlunparse(components): - """Put a parsed URL back together again. This may result in a - slightly different, but equivalent URL, if the URL that was parsed - originally had redundant delimiters, e.g. a ? with an empty query - (the draft states that these are equivalent).""" - scheme, netloc, url, params, query, fragment, _coerce_result = ( - _coerce_args(*components)) - if params: - url = "%s;%s" % (url, params) - return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) - -def urlunsplit(components): - """Combine the elements of a tuple as returned by urlsplit() into a - complete URL as a string. The data argument can be any five-item iterable. - This may result in a slightly different, but equivalent URL, if the URL that - was parsed originally had unnecessary delimiters (for example, a ? with an - empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment, _coerce_result = ( - _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): - if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url - if scheme: - url = scheme + ':' + url - if query: - url = url + '?' + query - if fragment: - url = url + '#' + fragment - return _coerce_result(url) - -def urljoin(base, url, allow_fragments=True): - """Join a base URL and a possibly relative URL to form an absolute - interpretation of the latter.""" - if not base: - return url - if not url: - return base - base, url, _coerce_result = _coerce_args(base, url) - bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ - urlparse(base, '', allow_fragments) - scheme, netloc, path, params, query, fragment = \ - urlparse(url, bscheme, allow_fragments) - if scheme != bscheme or scheme not in uses_relative: - return _coerce_result(url) - if scheme in uses_netloc: - if netloc: - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - netloc = bnetloc - if path[:1] == '/': - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - if not path and not params: - path = bpath - params = bparams - if not query: - query = bquery - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - segments = bpath.split('/')[:-1] + path.split('/') - # XXX The stuff below is bogus in various ways... - if segments[-1] == '.': - segments[-1] = '' - while '.' in segments: - segments.remove('.') - while 1: - i = 1 - n = len(segments) - 1 - while i < n: - if (segments[i] == '..' - and segments[i-1] not in ('', '..')): - del segments[i-1:i+1] - break - i = i+1 - else: - break - if segments == ['', '..']: - segments[-1] = '' - elif len(segments) >= 2 and segments[-1] == '..': - segments[-2:] = [''] - return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), - params, query, fragment))) - -def urldefrag(url): - """Removes any existing fragment from URL. - - Returns a tuple of the defragmented URL and the fragment. If - the URL contained no fragments, the second element is the - empty string. - """ - url, _coerce_result = _coerce_args(url) - if '#' in url: - s, n, p, a, q, frag = urlparse(url) - defrag = urlunparse((s, n, p, a, q, '')) - else: - frag = '' - defrag = url - return _coerce_result(DefragResult(defrag, frag)) - -_hexdig = '0123456789ABCDEFabcdef' -_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) - for a in _hexdig for b in _hexdig) - -def unquote_to_bytes(string): - """unquote_to_bytes('abc%20def') -> b'abc def'.""" - # Note: strings are encoded as UTF-8. This is only an issue if it contains - # unescaped non-ASCII characters, which URIs should not. - if not string: - # Is it a string-like object? - string.split - return bytes(b'') - if isinstance(string, str): - string = string.encode('utf-8') - ### For Python-Future: - # It is already a byte-string object, but force it to be newbytes here on - # Py2: - string = bytes(string) - ### - bits = string.split(b'%') - if len(bits) == 1: - return string - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(_hextobyte[item[:2]]) - append(item[2:]) - except KeyError: - append(b'%') - append(item) - return bytes(b'').join(res) - -_asciire = re.compile('([\x00-\x7f]+)') - -def unquote(string, encoding='utf-8', errors='replace'): - """Replace %xx escapes by their single-character equivalent. The optional - encoding and errors parameters specify how to decode percent-encoded - sequences into Unicode characters, as accepted by the bytes.decode() - method. - By default, percent-encoded sequences are decoded with UTF-8, and invalid - sequences are replaced by a placeholder character. - - unquote('abc%20def') -> 'abc def'. - """ - if '%' not in string: - string.split - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - bits = _asciire.split(string) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(unquote_to_bytes(bits[i]).decode(encoding, errors)) - append(bits[i + 1]) - return ''.join(res) - -def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - encoding and errors: specify how to decode percent-encoded sequences - into Unicode characters, as accepted by the bytes.decode() method. - """ - parsed_result = {} - pairs = parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result - -def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - encoding and errors: specify how to decode percent-encoded sequences - into Unicode characters, as accepted by the bytes.decode() method. - - Returns a list, as G-d intended. - """ - qs, _coerce_result = _coerce_args(qs) - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError("bad query field: %r" % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = unquote(name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = unquote(value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - -def unquote_plus(string, encoding='utf-8', errors='replace'): - """Like unquote(), but also replace plus signs by spaces, as required for - unquoting HTML form values. - - unquote_plus('%7e/abc+def') -> '~/abc def' - """ - string = string.replace('+', ' ') - return unquote(string, encoding, errors) - -_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - b'abcdefghijklmnopqrstuvwxyz' - b'0123456789' - b'_.-')) -_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -_safe_quoters = {} - -class Quoter(collections.defaultdict): - """A mapping from bytes (in range(0,256)) to strings. - - String values are percent-encoded byte values, unless the key < 128, and - in the "safe" set (either the specified safe set, or default set). - """ - # Keeps a cache internally, using defaultdict, for efficiency (lookups - # of cached keys don't call Python code at all). - def __init__(self, safe): - """safe: bytes object.""" - self.safe = _ALWAYS_SAFE.union(bytes(safe)) - - def __repr__(self): - # Without this, will just display as a defaultdict - return "<Quoter %r>" % dict(self) - - def __missing__(self, b): - # Handle a cache miss. Store quoted string in cache and return. - res = chr(b) if b in self.safe else '%{0:02X}'.format(b) - self[b] = res - return res - -def quote(string, safe='/', encoding=None, errors=None): - """quote('abc def') -> 'abc%20def' - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - - string and safe may be either str or bytes objects. encoding must - not be specified if string is a str. - - The optional encoding and errors parameters specify how to deal with - non-ASCII characters, as accepted by the str.encode method. - By default, encoding='utf-8' (characters are encoded with UTF-8), and - errors='strict' (unsupported characters raise a UnicodeEncodeError). - """ - if isinstance(string, str): - if not string: - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'strict' - string = string.encode(encoding, errors) - else: - if encoding is not None: - raise TypeError("quote() doesn't support 'encoding' for bytes") - if errors is not None: - raise TypeError("quote() doesn't support 'errors' for bytes") - return quote_from_bytes(string, safe) - -def quote_plus(string, safe='', encoding=None, errors=None): - """Like quote(), but also replace ' ' with '+', as required for quoting - HTML form values. Plus signs in the original string are escaped unless - they are included in safe. It also does not have safe default to '/'. - """ - # Check if ' ' in string, where string may either be a str or bytes. If - # there are no spaces, the regular quote will produce the right answer. - if ((isinstance(string, str) and ' ' not in string) or - (isinstance(string, bytes) and b' ' not in string)): - return quote(string, safe, encoding, errors) - if isinstance(safe, str): - space = str(' ') - else: - space = bytes(b' ') - string = quote(string, safe + space, encoding, errors) - return string.replace(' ', '+') - -def quote_from_bytes(bs, safe='/'): - """Like quote(), but accepts a bytes object rather than a str, and does - not perform string-to-bytes encoding. It always returns an ASCII string. - quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' - """ - if not isinstance(bs, (bytes, bytearray)): - raise TypeError("quote_from_bytes() expected bytes") - if not bs: - return str('') - ### For Python-Future: - bs = bytes(bs) - ### - if isinstance(safe, str): - # Normalize 'safe' by converting to bytes and removing non-ASCII chars - safe = str(safe).encode('ascii', 'ignore') - else: - ### For Python-Future: - safe = bytes(safe) - ### - safe = bytes([c for c in safe if c < 128]) - if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): - return bs.decode() - try: - quoter = _safe_quoters[safe] - except KeyError: - _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ - return str('').join([quoter(char) for char in bs]) - -def urlencode(query, doseq=False, safe='', encoding=None, errors=None): - """Encode a sequence of two-element tuples or dictionary into a URL query string. - - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. - - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. - - The query arg may be either a string or a bytes type. When query arg is a - string, the safe, encoding and error parameters are sent the quote_plus for - encoding. - """ - - if hasattr(query, "items"): - query = query.items() - else: - # It's a bother at times that strings and string-like objects are - # sequences. - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # Zero-length sequences of all types will get here and succeed, - # but that's a minor nit. Since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty, va, tb = sys.exc_info() - raise_with_traceback(TypeError("not a valid non-string sequence " - "or mapping object"), tb) - - l = [] - if not doseq: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) - - if isinstance(v, bytes): - v = quote_plus(v, safe) - else: - v = quote_plus(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) - - if isinstance(v, bytes): - v = quote_plus(v, safe) - l.append(k + '=' + v) - elif isinstance(v, str): - v = quote_plus(v, safe, encoding, errors) - l.append(k + '=' + v) - else: - try: - # Is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - if isinstance(elt, bytes): - elt = quote_plus(elt, safe) - else: - elt = quote_plus(str(elt), safe, encoding, errors) - l.append(k + '=' + elt) - return str('&').join(l) - -# Utilities to parse URLs (most of these return None for missing parts): -# unwrap('<URL:type://host/path>') --> 'type://host/path' -# splittype('type:opaquestring') --> 'type', 'opaquestring' -# splithost('//host[:port]/path') --> 'host[:port]', '/path' -# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' -# splitpasswd('user:passwd') -> 'user', 'passwd' -# splitport('host:port') --> 'host', 'port' -# splitquery('/path?query') --> '/path', 'query' -# splittag('/path#tag') --> '/path', 'tag' -# splitattr('/path;attr1=value1;attr2=value2;...') -> -# '/path', ['attr1=value1', 'attr2=value2', ...] -# splitvalue('attr=value') --> 'attr', 'value' -# urllib.parse.unquote('abc%20def') -> 'abc def' -# quote('abc def') -> 'abc%20def') - -def to_bytes(url): - """to_bytes(u"URL") --> 'URL'.""" - # Most URL schemes require ASCII. If that changes, the conversion - # can be relaxed. - # XXX get rid of to_bytes() - if isinstance(url, str): - try: - url = url.encode("ASCII").decode() - except UnicodeError: - raise UnicodeError("URL " + repr(url) + - " contains non-ASCII characters") - return url - -def unwrap(url): - """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" - url = str(url).strip() - if url[:1] == '<' and url[-1:] == '>': - url = url[1:-1].strip() - if url[:4] == 'URL:': url = url[4:].strip() - return url - -_typeprog = None -def splittype(url): - """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" - global _typeprog - if _typeprog is None: - import re - _typeprog = re.compile('^([^/:]+):') - - match = _typeprog.match(url) - if match: - scheme = match.group(1) - return scheme.lower(), url[len(scheme) + 1:] - return None, url - -_hostprog = None -def splithost(url): - """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" - global _hostprog - if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') - - match = _hostprog.match(url) - if match: - host_port = match.group(1) - path = match.group(2) - if path and not path.startswith('/'): - path = '/' + path - return host_port, path - return None, url - -_userprog = None -def splituser(host): - """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" - global _userprog - if _userprog is None: - import re - _userprog = re.compile('^(.*)@(.*)$') - - match = _userprog.match(host) - if match: return match.group(1, 2) - return None, host - -_passwdprog = None -def splitpasswd(user): - """splitpasswd('user:passwd') -> 'user', 'passwd'.""" - global _passwdprog - if _passwdprog is None: - import re - _passwdprog = re.compile('^([^:]*):(.*)$',re.S) - - match = _passwdprog.match(user) - if match: return match.group(1, 2) - return user, None - -# splittag('/path#tag') --> '/path', 'tag' -_portprog = None -def splitport(host): - """splitport('host:port') --> 'host', 'port'.""" - global _portprog - if _portprog is None: - import re - _portprog = re.compile('^(.*):([0-9]+)$') - - match = _portprog.match(host) - if match: return match.group(1, 2) - return host, None - -_nportprog = None -def splitnport(host, defport=-1): - """Split host and port, returning numeric port. - Return given default port if no ':' found; defaults to -1. - Return numerical port if a valid number are found after ':'. - Return None if ':' but not a valid number.""" - global _nportprog - if _nportprog is None: - import re - _nportprog = re.compile('^(.*):(.*)$') - - match = _nportprog.match(host) - if match: - host, port = match.group(1, 2) - try: - if not port: raise ValueError("no digits") - nport = int(port) - except ValueError: - nport = None - return host, nport - return host, defport - -_queryprog = None -def splitquery(url): - """splitquery('/path?query') --> '/path', 'query'.""" - global _queryprog - if _queryprog is None: - import re - _queryprog = re.compile('^(.*)\?([^?]*)$') - - match = _queryprog.match(url) - if match: return match.group(1, 2) - return url, None - -_tagprog = None -def splittag(url): - """splittag('/path#tag') --> '/path', 'tag'.""" - global _tagprog - if _tagprog is None: - import re - _tagprog = re.compile('^(.*)#([^#]*)$') - - match = _tagprog.match(url) - if match: return match.group(1, 2) - return url, None - -def splitattr(url): - """splitattr('/path;attr1=value1;attr2=value2;...') -> - '/path', ['attr1=value1', 'attr2=value2', ...].""" - words = url.split(';') - return words[0], words[1:] - -_valueprog = None -def splitvalue(attr): - """splitvalue('attr=value') --> 'attr', 'value'.""" - global _valueprog - if _valueprog is None: - import re - _valueprog = re.compile('^([^=]*)=(.*)$') - - match = _valueprog.match(attr) - if match: return match.group(1, 2) - return attr, None diff --git a/contrib/python/future/future/backports/urllib/request.py b/contrib/python/future/future/backports/urllib/request.py deleted file mode 100644 index baee5401aaa..00000000000 --- a/contrib/python/future/future/backports/urllib/request.py +++ /dev/null @@ -1,2647 +0,0 @@ -""" -Ported using Python-Future from the Python 3.3 standard library. - -An extensible library for opening URLs using a variety of protocols - -The simplest way to use this module is to call the urlopen function, -which accepts a string containing a URL or a Request object (described -below). It opens the URL and returns the results as file-like -object; the returned object has some extra methods described below. - -The OpenerDirector manages a collection of Handler objects that do -all the actual work. Each Handler implements a particular protocol or -option. The OpenerDirector is a composite object that invokes the -Handlers needed to open the requested URL. For example, the -HTTPHandler performs HTTP GET and POST requests and deals with -non-error returns. The HTTPRedirectHandler automatically deals with -HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler -deals with digest authentication. - -urlopen(url, data=None) -- Basic usage is the same as original -urllib. pass the url and optionally data to post to an HTTP URL, and -get a file-like object back. One difference is that you can also pass -a Request instance instead of URL. Raises a URLError (subclass of -IOError); for HTTP errors, raises an HTTPError, which can also be -treated as a valid response. - -build_opener -- Function that creates a new OpenerDirector instance. -Will install the default handlers. Accepts one or more Handlers as -arguments, either instances or Handler classes that it will -instantiate. If one of the argument is a subclass of the default -handler, the argument will be installed instead of the default. - -install_opener -- Installs a new opener as the default opener. - -objects of interest: - -OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages -the Handler classes, while dealing with requests and responses. - -Request -- An object that encapsulates the state of a request. The -state can be as simple as the URL. It can also include extra HTTP -headers, e.g. a User-Agent. - -BaseHandler -- - -internals: -BaseHandler and parent -_call_chain conventions - -Example usage: - -import urllib.request - -# set up authentication info -authinfo = urllib.request.HTTPBasicAuthHandler() -authinfo.add_password(realm='PDQ Application', - uri='https://mahler:8092/site-updates.py', - user='klem', - passwd='geheim$parole') - -proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) - -# build a new opener that adds authentication and caching FTP handlers -opener = urllib.request.build_opener(proxy_support, authinfo, - urllib.request.CacheFTPHandler) - -# install it -urllib.request.install_opener(opener) - -f = urllib.request.urlopen('http://www.python.org/') -""" - -# XXX issues: -# If an authentication error handler that tries to perform -# authentication for some reason but fails, how should the error be -# signalled? The client needs to know the HTTP error code. But if -# the handler knows that the problem was, e.g., that it didn't know -# that hash algo that requested in the challenge, it would be good to -# pass that information along to the client, too. -# ftp errors aren't handled cleanly -# check digest against correct (i.e. non-apache) implementation - -# Possible extensions: -# complex proxies XXX not sure what exactly was meant by this -# abstract factory for opener - -from __future__ import absolute_import, division, print_function, unicode_literals -from future.builtins import bytes, dict, filter, input, int, map, open, str -from future.utils import PY2, PY3, raise_with_traceback - -import base64 -import bisect -import hashlib -import array - -from future.backports import email -from future.backports.http import client as http_client -from .error import URLError, HTTPError, ContentTooShortError -from .parse import ( - urlparse, urlsplit, urljoin, unwrap, quote, unquote, - splittype, splithost, splitport, splituser, splitpasswd, - splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) -from .response import addinfourl, addclosehook - -import io -import os -import posixpath -import re -import socket -import sys -import time -import tempfile -import contextlib -import warnings - -from future.utils import PY2 - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - -# check for SSL -try: - import ssl - # Not available in the SSL module in Py2: - from ssl import SSLContext -except ImportError: - _have_ssl = False -else: - _have_ssl = True - -__all__ = [ - # Classes - 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', - 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', - 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', - 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', - 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', - 'UnknownHandler', 'HTTPErrorProcessor', - # Functions - 'urlopen', 'install_opener', 'build_opener', - 'pathname2url', 'url2pathname', 'getproxies', - # Legacy interface - 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', -] - -# used in User-Agent header sent -__version__ = sys.version[:3] - -_opener = None -def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs): - if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault'] - else: cadefault = False - if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath'] - else: capath = None - if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile'] - else: cafile = None - global _opener - if cafile or capath or cadefault: - if not _have_ssl: - raise ValueError('SSL support not available') - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - context.options |= ssl.OP_NO_SSLv2 - context.verify_mode = ssl.CERT_REQUIRED - if cafile or capath: - context.load_verify_locations(cafile, capath) - else: - context.set_default_verify_paths() - https_handler = HTTPSHandler(context=context, check_hostname=True) - opener = build_opener(https_handler) - elif _opener is None: - _opener = opener = build_opener() - else: - opener = _opener - return opener.open(url, data, timeout) - -def install_opener(opener): - global _opener - _opener = opener - -_url_tempfiles = [] -def urlretrieve(url, filename=None, reporthook=None, data=None): - """ - Retrieve a URL into a temporary location on disk. - - Requires a URL argument. If a filename is passed, it is used as - the temporary file location. The reporthook argument should be - a callable that accepts a block number, a read size, and the - total file size of the URL target. The data argument should be - valid URL encoded data. - - If a filename is passed and the URL points to a local resource, - the result is a copy from local file to new file. - - Returns a tuple containing the path to the newly created - data file as well as the resulting HTTPMessage object. - """ - url_type, path = splittype(url) - - with contextlib.closing(urlopen(url, data)) as fp: - headers = fp.info() - - # Just return the local path and the "headers" for file:// - # URLs. No sense in performing a copy unless requested. - if url_type == "file" and not filename: - return os.path.normpath(path), headers - - # Handle temporary file setup. - if filename: - tfp = open(filename, 'wb') - else: - tfp = tempfile.NamedTemporaryFile(delete=False) - filename = tfp.name - _url_tempfiles.append(filename) - - with tfp: - result = filename, headers - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - - if reporthook: - reporthook(blocknum, bs, size) - - while True: - block = fp.read(bs) - if not block: - break - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - - if size >= 0 and read < size: - raise ContentTooShortError( - "retrieval incomplete: got only %i out of %i bytes" - % (read, size), result) - - return result - -def urlcleanup(): - for temp_file in _url_tempfiles: - try: - os.unlink(temp_file) - except EnvironmentError: - pass - - del _url_tempfiles[:] - global _opener - if _opener: - _opener = None - -if PY3: - _cut_port_re = re.compile(r":\d+$", re.ASCII) -else: - _cut_port_re = re.compile(r":\d+$") - -def request_host(request): - - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.full_url - host = urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = _cut_port_re.sub("", host, 1) - return host.lower() - -class Request(object): - - def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False, - method=None): - # unwrap('<URL:type://host/path>') --> 'type://host/path' - self.full_url = unwrap(url) - self.full_url, self.fragment = splittag(self.full_url) - self.data = data - self.headers = {} - self._tunnel_host = None - for key, value in headers.items(): - self.add_header(key, value) - self.unredirected_hdrs = {} - if origin_req_host is None: - origin_req_host = request_host(self) - self.origin_req_host = origin_req_host - self.unverifiable = unverifiable - self.method = method - self._parse() - - def _parse(self): - self.type, rest = splittype(self.full_url) - if self.type is None: - raise ValueError("unknown url type: %r" % self.full_url) - self.host, self.selector = splithost(rest) - if self.host: - self.host = unquote(self.host) - - def get_method(self): - """Return a string indicating the HTTP request method.""" - if self.method is not None: - return self.method - elif self.data is not None: - return "POST" - else: - return "GET" - - def get_full_url(self): - if self.fragment: - return '%s#%s' % (self.full_url, self.fragment) - else: - return self.full_url - - # Begin deprecated methods - - def add_data(self, data): - msg = "Request.add_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - self.data = data - - def has_data(self): - msg = "Request.has_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data is not None - - def get_data(self): - msg = "Request.get_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data - - def get_type(self): - msg = "Request.get_type method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.type - - def get_host(self): - msg = "Request.get_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.host - - def get_selector(self): - msg = "Request.get_selector method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.selector - - def is_unverifiable(self): - msg = "Request.is_unverifiable method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.unverifiable - - def get_origin_req_host(self): - msg = "Request.get_origin_req_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.origin_req_host - - # End deprecated methods - - def set_proxy(self, host, type): - if self.type == 'https' and not self._tunnel_host: - self._tunnel_host = self.host - else: - self.type= type - self.selector = self.full_url - self.host = host - - def has_proxy(self): - return self.selector == self.full_url - - def add_header(self, key, val): - # useful for something like authentication - self.headers[key.capitalize()] = val - - def add_unredirected_header(self, key, val): - # will not be added to a redirected request - self.unredirected_hdrs[key.capitalize()] = val - - def has_header(self, header_name): - return (header_name in self.headers or - header_name in self.unredirected_hdrs) - - def get_header(self, header_name, default=None): - return self.headers.get( - header_name, - self.unredirected_hdrs.get(header_name, default)) - - def header_items(self): - hdrs = self.unredirected_hdrs.copy() - hdrs.update(self.headers) - return list(hdrs.items()) - -class OpenerDirector(object): - def __init__(self): - client_version = "Python-urllib/%s" % __version__ - self.addheaders = [('User-agent', client_version)] - # self.handlers is retained only for backward compatibility - self.handlers = [] - # manage the individual handlers - self.handle_open = {} - self.handle_error = {} - self.process_response = {} - self.process_request = {} - - def add_handler(self, handler): - if not hasattr(handler, "add_parent"): - raise TypeError("expected BaseHandler instance, got %r" % - type(handler)) - - added = False - for meth in dir(handler): - if meth in ["redirect_request", "do_open", "proxy_open"]: - # oops, coincidental match - continue - - i = meth.find("_") - protocol = meth[:i] - condition = meth[i+1:] - - if condition.startswith("error"): - j = condition.find("_") + i + 1 - kind = meth[j+1:] - try: - kind = int(kind) - except ValueError: - pass - lookup = self.handle_error.get(protocol, {}) - self.handle_error[protocol] = lookup - elif condition == "open": - kind = protocol - lookup = self.handle_open - elif condition == "response": - kind = protocol - lookup = self.process_response - elif condition == "request": - kind = protocol - lookup = self.process_request - else: - continue - - handlers = lookup.setdefault(kind, []) - if handlers: - bisect.insort(handlers, handler) - else: - handlers.append(handler) - added = True - - if added: - bisect.insort(self.handlers, handler) - handler.add_parent(self) - - def close(self): - # Only exists for backwards compatibility. - pass - - def _call_chain(self, chain, kind, meth_name, *args): - # Handlers raise an exception if no one else should try to handle - # the request, or return None if they can't but another handler - # could. Otherwise, they return the response. - handlers = chain.get(kind, ()) - for handler in handlers: - func = getattr(handler, meth_name) - result = func(*args) - if result is not None: - return result - - def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - """ - Accept a URL or a Request object - - Python-Future: if the URL is passed as a byte-string, decode it first. - """ - if isinstance(fullurl, bytes): - fullurl = fullurl.decode() - if isinstance(fullurl, str): - req = Request(fullurl, data) - else: - req = fullurl - if data is not None: - req.data = data - - req.timeout = timeout - protocol = req.type - - # pre-process request - meth_name = protocol+"_request" - for processor in self.process_request.get(protocol, []): - meth = getattr(processor, meth_name) - req = meth(req) - - response = self._open(req, data) - - # post-process response - meth_name = protocol+"_response" - for processor in self.process_response.get(protocol, []): - meth = getattr(processor, meth_name) - response = meth(req, response) - - return response - - def _open(self, req, data=None): - result = self._call_chain(self.handle_open, 'default', - 'default_open', req) - if result: - return result - - protocol = req.type - result = self._call_chain(self.handle_open, protocol, protocol + - '_open', req) - if result: - return result - - return self._call_chain(self.handle_open, 'unknown', - 'unknown_open', req) - - def error(self, proto, *args): - if proto in ('http', 'https'): - # XXX http[s] protocols are special-cased - dict = self.handle_error['http'] # https is not different than http - proto = args[2] # YUCK! - meth_name = 'http_error_%s' % proto - http_err = 1 - orig_args = args - else: - dict = self.handle_error - meth_name = proto + '_error' - http_err = 0 - args = (dict, proto, meth_name) + args - result = self._call_chain(*args) - if result: - return result - - if http_err: - args = (dict, 'default', 'http_error_default') + orig_args - return self._call_chain(*args) - -# XXX probably also want an abstract factory that knows when it makes -# sense to skip a superclass in favor of a subclass and when it might -# make sense to include both - -def build_opener(*handlers): - """Create an opener object from a list of handlers. - - The opener will use several default handlers, including support - for HTTP, FTP and when applicable HTTPS. - - If any of the handlers passed as arguments are subclasses of the - default handlers, the default handlers will not be used. - """ - def isclass(obj): - return isinstance(obj, type) or hasattr(obj, "__bases__") - - opener = OpenerDirector() - default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, - HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler, HTTPErrorProcessor] - if hasattr(http_client, "HTTPSConnection"): - default_classes.append(HTTPSHandler) - skip = set() - for klass in default_classes: - for check in handlers: - if isclass(check): - if issubclass(check, klass): - skip.add(klass) - elif isinstance(check, klass): - skip.add(klass) - for klass in skip: - default_classes.remove(klass) - - for klass in default_classes: - opener.add_handler(klass()) - - for h in handlers: - if isclass(h): - h = h() - opener.add_handler(h) - return opener - -class BaseHandler(object): - handler_order = 500 - - def add_parent(self, parent): - self.parent = parent - - def close(self): - # Only exists for backwards compatibility - pass - - def __lt__(self, other): - if not hasattr(other, "handler_order"): - # Try to preserve the old behavior of having custom classes - # inserted after default ones (works only for custom user - # classes which are not aware of handler_order). - return True - return self.handler_order < other.handler_order - - -class HTTPErrorProcessor(BaseHandler): - """Process HTTP error responses.""" - handler_order = 1000 # after all other processing - - def http_response(self, request, response): - code, msg, hdrs = response.code, response.msg, response.info() - - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if not (200 <= code < 300): - response = self.parent.error( - 'http', request, response, code, msg, hdrs) - - return response - - https_response = http_response - -class HTTPDefaultErrorHandler(BaseHandler): - def http_error_default(self, req, fp, code, msg, hdrs): - raise HTTPError(req.full_url, code, msg, hdrs, fp) - -class HTTPRedirectHandler(BaseHandler): - # maximum number of redirections to any single URL - # this is needed because of the state that cookies introduce - max_repeats = 4 - # maximum total number of redirections (regardless of URL) before - # assuming we're in a loop - max_redirections = 10 - - def redirect_request(self, req, fp, code, msg, headers, newurl): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a - redirection response is received. If a redirection should - take place, return a new Request to allow http_error_30x to - perform the redirect. Otherwise, raise HTTPError if no-one - else should try to handle this url. Return None if you can't - but another Handler might. - """ - m = req.get_method() - if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") - or code in (301, 302, 303) and m == "POST")): - raise HTTPError(req.full_url, code, msg, headers, fp) - - # Strictly (according to RFC 2616), 301 or 302 in response to - # a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib.request, in this case). In practice, - # essentially all clients do redirect in this case, so we do - # the same. - # be conciliant with URIs containing a space - newurl = newurl.replace(' ', '%20') - CONTENT_HEADERS = ("content-length", "content-type") - newheaders = dict((k, v) for k, v in req.headers.items() - if k.lower() not in CONTENT_HEADERS) - return Request(newurl, - headers=newheaders, - origin_req_host=req.origin_req_host, - unverifiable=True) - - # Implementation note: To avoid the server sending us into an - # infinite loop, the request object needs to track what URLs we - # have already seen. Do this by adding a handler-specific - # attribute to the Request object. - def http_error_302(self, req, fp, code, msg, headers): - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - if "location" in headers: - newurl = headers["location"] - elif "uri" in headers: - newurl = headers["uri"] - else: - return - - # fix a possible malformed URL - urlparts = urlparse(newurl) - - # For security reasons we don't allow redirection to anything other - # than http, https or ftp. - - if urlparts.scheme not in ('http', 'https', 'ftp', ''): - raise HTTPError( - newurl, code, - "%s - Redirection to url '%s' is not allowed" % (msg, newurl), - headers, fp) - - if not urlparts.path: - urlparts = list(urlparts) - urlparts[2] = "/" - newurl = urlunparse(urlparts) - - newurl = urljoin(req.full_url, newurl) - - # XXX Probably want to forget about the state of the current - # request, although that might interact poorly with other - # handlers that also use handler-specific request attributes - new = self.redirect_request(req, fp, code, msg, headers, newurl) - if new is None: - return - - # loop detection - # .redirect_dict has a key url if url was previously visited. - if hasattr(req, 'redirect_dict'): - visited = new.redirect_dict = req.redirect_dict - if (visited.get(newurl, 0) >= self.max_repeats or - len(visited) >= self.max_redirections): - raise HTTPError(req.full_url, code, - self.inf_msg + msg, headers, fp) - else: - visited = new.redirect_dict = req.redirect_dict = {} - visited[newurl] = visited.get(newurl, 0) + 1 - - # Don't close the fp until we are sure that we won't use it - # with HTTPError. - fp.read() - fp.close() - - return self.parent.open(new, timeout=req.timeout) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - - inf_msg = "The HTTP server returned a redirect error that would " \ - "lead to an infinite loop.\n" \ - "The last 30x error message was:\n" - - -def _parse_proxy(proxy): - """Return (scheme, user, password, host/port) given a URL or an authority. - - If a URL is supplied, it must have an authority (host:port) component. - According to RFC 3986, having an authority component means the URL must - have two slashes after the scheme: - - >>> _parse_proxy('file:/ftp.example.com/') - Traceback (most recent call last): - ValueError: proxy URL with no authority: 'file:/ftp.example.com/' - - The first three items of the returned tuple may be None. - - Examples of authority parsing: - - >>> _parse_proxy('proxy.example.com') - (None, None, None, 'proxy.example.com') - >>> _parse_proxy('proxy.example.com:3128') - (None, None, None, 'proxy.example.com:3128') - - The authority component may optionally include userinfo (assumed to be - username:password): - - >>> _parse_proxy('joe:[email protected]') - (None, 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('joe:[email protected]:3128') - (None, 'joe', 'password', 'proxy.example.com:3128') - - Same examples, but with URLs instead: - - >>> _parse_proxy('http://proxy.example.com/') - ('http', None, None, 'proxy.example.com') - >>> _parse_proxy('http://proxy.example.com:3128/') - ('http', None, None, 'proxy.example.com:3128') - >>> _parse_proxy('http://joe:[email protected]/') - ('http', 'joe', 'password', 'proxy.example.com') - >>> _parse_proxy('http://joe:[email protected]:3128') - ('http', 'joe', 'password', 'proxy.example.com:3128') - - Everything after the authority is ignored: - - >>> _parse_proxy('ftp://joe:[email protected]/rubbish:3128') - ('ftp', 'joe', 'password', 'proxy.example.com') - - Test for no trailing '/' case: - - >>> _parse_proxy('http://joe:[email protected]') - ('http', 'joe', 'password', 'proxy.example.com') - - """ - scheme, r_scheme = splittype(proxy) - if not r_scheme.startswith("/"): - # authority - scheme = None - authority = proxy - else: - # URL - if not r_scheme.startswith("//"): - raise ValueError("proxy URL with no authority: %r" % proxy) - # We have an authority, so for RFC 3986-compliant URLs (by ss 3. - # and 3.3.), path is empty or starts with '/' - end = r_scheme.find("/", 2) - if end == -1: - end = None - authority = r_scheme[2:end] - userinfo, hostport = splituser(authority) - if userinfo is not None: - user, password = splitpasswd(userinfo) - else: - user = password = None - return scheme, user, password, hostport - -class ProxyHandler(BaseHandler): - # Proxies must be in front - handler_order = 100 - - def __init__(self, proxies=None): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'keys'), "proxies must be a mapping" - self.proxies = proxies - for type, url in proxies.items(): - setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: - meth(r, proxy, type)) - - def proxy_open(self, req, proxy, type): - orig_type = req.type - proxy_type, user, password, hostport = _parse_proxy(proxy) - if proxy_type is None: - proxy_type = orig_type - - if req.host and proxy_bypass(req.host): - return None - - if user and password: - user_pass = '%s:%s' % (unquote(user), - unquote(password)) - creds = base64.b64encode(user_pass.encode()).decode("ascii") - req.add_header('Proxy-authorization', 'Basic ' + creds) - hostport = unquote(hostport) - req.set_proxy(hostport, proxy_type) - if orig_type == proxy_type or orig_type == 'https': - # let other handlers take care of it - return None - else: - # need to start over, because the other handlers don't - # grok the proxy's URL type - # e.g. if we have a constructor arg proxies like so: - # {'http': 'ftp://proxy.example.com'}, we may end up turning - # a request for http://acme.example.com/a into one for - # ftp://proxy.example.com/a - return self.parent.open(req, timeout=req.timeout) - -class HTTPPasswordMgr(object): - - def __init__(self): - self.passwd = {} - - def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if isinstance(uri, str): - uri = [uri] - if realm not in self.passwd: - self.passwd[realm] = {} - for default_port in True, False: - reduced_uri = tuple( - [self.reduce_uri(u, default_port) for u in uri]) - self.passwd[realm][reduced_uri] = (user, passwd) - - def find_user_password(self, realm, authuri): - domains = self.passwd.get(realm, {}) - for default_port in True, False: - reduced_authuri = self.reduce_uri(authuri, default_port) - for uris, authinfo in domains.items(): - for uri in uris: - if self.is_suburi(uri, reduced_authuri): - return authinfo - return None, None - - def reduce_uri(self, uri, default_port=True): - """Accept authority or URI and extract only the authority and path.""" - # note HTTP URLs do not have a userinfo component - parts = urlsplit(uri) - if parts[1]: - # URI - scheme = parts[0] - authority = parts[1] - path = parts[2] or '/' - else: - # host or host:port - scheme = None - authority = uri - path = '/' - host, port = splitport(authority) - if default_port and port is None and scheme is not None: - dport = {"http": 80, - "https": 443, - }.get(scheme) - if dport is not None: - authority = "%s:%d" % (host, dport) - return authority, path - - def is_suburi(self, base, test): - """Check if test is below base in a URI tree - - Both args must be URIs in reduced form. - """ - if base == test: - return True - if base[0] != test[0]: - return False - common = posixpath.commonprefix((base[1], test[1])) - if len(common) == len(base[1]): - return True - return False - - -class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): - - def find_user_password(self, realm, authuri): - user, password = HTTPPasswordMgr.find_user_password(self, realm, - authuri) - if user is not None: - return user, password - return HTTPPasswordMgr.find_user_password(self, None, authuri) - - -class AbstractBasicAuthHandler(object): - - # XXX this allows for multiple auth-schemes, but will stupidly pick - # the last one with a realm specified. - - # allow for double- and single-quoted realm values - # (single quotes are a violation of the RFC, but appear in the wild) - rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' - 'realm=(["\']?)([^"\']*)\\2', re.I) - - # XXX could pre-emptively send auth info already accepted (RFC 2617, - # end of section 2, and section 1.2 immediately after "credentials" - # production). - - def __init__(self, password_mgr=None): - if password_mgr is None: - password_mgr = HTTPPasswordMgr() - self.passwd = password_mgr - self.add_password = self.passwd.add_password - self.retried = 0 - - def reset_retry_count(self): - self.retried = 0 - - def http_error_auth_reqed(self, authreq, host, req, headers): - # host may be an authority (without userinfo) or a URL with an - # authority - # XXX could be multiple headers - authreq = headers.get(authreq, None) - - if self.retried > 5: - # retry sending the username:password 5 times before failing. - raise HTTPError(req.get_full_url(), 401, "basic auth failed", - headers, None) - else: - self.retried += 1 - - if authreq: - scheme = authreq.split()[0] - if scheme.lower() != 'basic': - raise ValueError("AbstractBasicAuthHandler does not" - " support the following scheme: '%s'" % - scheme) - else: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, quote, realm = mo.groups() - if quote not in ['"',"'"]: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 2) - if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response - - def retry_http_basic_auth(self, host, req, realm): - user, pw = self.passwd.find_user_password(realm, host) - if pw is not None: - raw = "%s:%s" % (user, pw) - auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") - if req.headers.get(self.auth_header, None) == auth: - return None - req.add_unredirected_header(self.auth_header, auth) - return self.parent.open(req, timeout=req.timeout) - else: - return None - - -class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Authorization' - - def http_error_401(self, req, fp, code, msg, headers): - url = req.full_url - response = self.http_error_auth_reqed('www-authenticate', - url, req, headers) - self.reset_retry_count() - return response - - -class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - - auth_header = 'Proxy-authorization' - - def http_error_407(self, req, fp, code, msg, headers): - # http_error_auth_reqed requires that there is no userinfo component in - # authority. Assume there isn't one, since urllib.request does not (and - # should not, RFC 3986 s. 3.2.1) support requests for URLs containing - # userinfo. - authority = req.host - response = self.http_error_auth_reqed('proxy-authenticate', - authority, req, headers) - self.reset_retry_count() - return response - - -# Return n random bytes. -_randombytes = os.urandom - - -class AbstractDigestAuthHandler(object): - # Digest authentication is specified in RFC 2617. - - # XXX The client does not inspect the Authentication-Info header - # in a successful response. - - # XXX It should be possible to test this implementation against - # a mock server that just generates a static set of challenges. - - # XXX qop="auth-int" supports is shaky - - def __init__(self, passwd=None): - if passwd is None: - passwd = HTTPPasswordMgr() - self.passwd = passwd - self.add_password = self.passwd.add_password - self.retried = 0 - self.nonce_count = 0 - self.last_nonce = None - - def reset_retry_count(self): - self.retried = 0 - - def http_error_auth_reqed(self, auth_header, host, req, headers): - authreq = headers.get(auth_header, None) - if self.retried > 5: - # Don't fail endlessly - if we failed once, we'll probably - # fail a second time. Hm. Unless the Password Manager is - # prompting for the information. Crap. This isn't great - # but it's better than the current 'repeat until recursion - # depth exceeded' approach <wink> - raise HTTPError(req.full_url, 401, "digest auth failed", - headers, None) - else: - self.retried += 1 - if authreq: - scheme = authreq.split()[0] - if scheme.lower() == 'digest': - return self.retry_http_digest_auth(req, authreq) - elif scheme.lower() != 'basic': - raise ValueError("AbstractDigestAuthHandler does not support" - " the following scheme: '%s'" % scheme) - - def retry_http_digest_auth(self, req, auth): - token, challenge = auth.split(' ', 1) - chal = parse_keqv_list(filter(None, parse_http_list(challenge))) - auth = self.get_authorization(req, chal) - if auth: - auth_val = 'Digest %s' % auth - if req.headers.get(self.auth_header, None) == auth_val: - return None - req.add_unredirected_header(self.auth_header, auth_val) - resp = self.parent.open(req, timeout=req.timeout) - return resp - - def get_cnonce(self, nonce): - # The cnonce-value is an opaque - # quoted string value provided by the client and used by both client - # and server to avoid chosen plaintext attacks, to provide mutual - # authentication, and to provide some message integrity protection. - # This isn't a fabulous effort, but it's probably Good Enough. - s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) - b = s.encode("ascii") + _randombytes(8) - dig = hashlib.sha1(b).hexdigest() - return dig[:16] - - def get_authorization(self, req, chal): - try: - realm = chal['realm'] - nonce = chal['nonce'] - qop = chal.get('qop') - algorithm = chal.get('algorithm', 'MD5') - # mod_digest doesn't send an opaque, even though it isn't - # supposed to be optional - opaque = chal.get('opaque', None) - except KeyError: - return None - - H, KD = self.get_algorithm_impls(algorithm) - if H is None: - return None - - user, pw = self.passwd.find_user_password(realm, req.full_url) - if user is None: - return None - - # XXX not implemented yet - if req.data is not None: - entdig = self.get_entity_digest(req.data, chal) - else: - entdig = None - - A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.get_method(), - # XXX selector: what about proxies and full urls - req.selector) - if qop == 'auth': - if nonce == self.last_nonce: - self.nonce_count += 1 - else: - self.nonce_count = 1 - self.last_nonce = nonce - ncvalue = '%08x' % self.nonce_count - cnonce = self.get_cnonce(nonce) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) - respdig = KD(H(A1), noncebit) - elif qop is None: - respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) - else: - # XXX handle auth-int. - raise URLError("qop '%s' is not supported." % qop) - - # XXX should the partial digests be encoded too? - - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.selector, - respdig) - if opaque: - base += ', opaque="%s"' % opaque - if entdig: - base += ', digest="%s"' % entdig - base += ', algorithm="%s"' % algorithm - if qop: - base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) - return base - - def get_algorithm_impls(self, algorithm): - # lambdas assume digest modules are imported at the top level - if algorithm == 'MD5': - H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() - elif algorithm == 'SHA': - H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() - # XXX MD5-sess - KD = lambda s, d: H("%s:%s" % (s, d)) - return H, KD - - def get_entity_digest(self, data, chal): - # XXX not implemented yet - return None - - -class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - """An authentication protocol defined by RFC 2069 - - Digest authentication improves on basic authentication because it - does not transmit passwords in the clear. - """ - - auth_header = 'Authorization' - handler_order = 490 # before Basic auth - - def http_error_401(self, req, fp, code, msg, headers): - host = urlparse(req.full_url)[1] - retry = self.http_error_auth_reqed('www-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - - -class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): - - auth_header = 'Proxy-Authorization' - handler_order = 490 # before Basic auth - - def http_error_407(self, req, fp, code, msg, headers): - host = req.host - retry = self.http_error_auth_reqed('proxy-authenticate', - host, req, headers) - self.reset_retry_count() - return retry - -class AbstractHTTPHandler(BaseHandler): - - def __init__(self, debuglevel=0): - self._debuglevel = debuglevel - - def set_http_debuglevel(self, level): - self._debuglevel = level - - def do_request_(self, request): - host = request.host - if not host: - raise URLError('no host given') - - if request.data is not None: # POST - data = request.data - if isinstance(data, str): - msg = "POST data should be bytes or an iterable of bytes. " \ - "It cannot be of type str." - raise TypeError(msg) - if not request.has_header('Content-type'): - request.add_unredirected_header( - 'Content-type', - 'application/x-www-form-urlencoded') - if not request.has_header('Content-length'): - size = None - try: - ### For Python-Future: - if PY2 and isinstance(data, array.array): - # memoryviews of arrays aren't supported - # in Py2.7. (e.g. memoryview(array.array('I', - # [1, 2, 3, 4])) raises a TypeError.) - # So we calculate the size manually instead: - size = len(data) * data.itemsize - ### - else: - mv = memoryview(data) - size = len(mv) * mv.itemsize - except TypeError: - if isinstance(data, Iterable): - raise ValueError("Content-Length should be specified " - "for iterable data of type %r %r" % (type(data), - data)) - else: - request.add_unredirected_header( - 'Content-length', '%d' % size) - - sel_host = host - if request.has_proxy(): - scheme, sel = splittype(request.selector) - sel_host, sel_path = splithost(sel) - if not request.has_header('Host'): - request.add_unredirected_header('Host', sel_host) - for name, value in self.parent.addheaders: - name = name.capitalize() - if not request.has_header(name): - request.add_unredirected_header(name, value) - - return request - - def do_open(self, http_class, req, **http_conn_args): - """Return an HTTPResponse object for the request, using http_class. - - http_class must implement the HTTPConnection API from http.client. - """ - host = req.host - if not host: - raise URLError('no host given') - - # will parse host:port - h = http_class(host, timeout=req.timeout, **http_conn_args) - - headers = dict(req.unredirected_hdrs) - headers.update(dict((k, v) for k, v in req.headers.items() - if k not in headers)) - - # TODO(jhylton): Should this be redesigned to handle - # persistent connections? - - # We want to make an HTTP/1.1 request, but the addinfourl - # class isn't prepared to deal with a persistent connection. - # It will try to read all remaining data from the socket, - # which will block while the server waits for the next request. - # So make sure the connection gets closed after the (only) - # request. - headers["Connection"] = "close" - headers = dict((name.title(), val) for name, val in headers.items()) - - if req._tunnel_host: - tunnel_headers = {} - proxy_auth_hdr = "Proxy-Authorization" - if proxy_auth_hdr in headers: - tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] - # Proxy-Authorization should not be sent to origin - # server. - del headers[proxy_auth_hdr] - h.set_tunnel(req._tunnel_host, headers=tunnel_headers) - - try: - h.request(req.get_method(), req.selector, req.data, headers) - except socket.error as err: # timeout error - h.close() - raise URLError(err) - else: - r = h.getresponse() - # If the server does not send us a 'Connection: close' header, - # HTTPConnection assumes the socket should be left open. Manually - # mark the socket to be closed when this response object goes away. - if h.sock: - h.sock.close() - h.sock = None - - - r.url = req.get_full_url() - # This line replaces the .msg attribute of the HTTPResponse - # with .headers, because urllib clients expect the response to - # have the reason in .msg. It would be good to mark this - # attribute is deprecated and get then to use info() or - # .headers. - r.msg = r.reason - return r - - -class HTTPHandler(AbstractHTTPHandler): - - def http_open(self, req): - return self.do_open(http_client.HTTPConnection, req) - - http_request = AbstractHTTPHandler.do_request_ - -if hasattr(http_client, 'HTTPSConnection'): - - class HTTPSHandler(AbstractHTTPHandler): - - def __init__(self, debuglevel=0, context=None, check_hostname=None): - AbstractHTTPHandler.__init__(self, debuglevel) - self._context = context - self._check_hostname = check_hostname - - def https_open(self, req): - return self.do_open(http_client.HTTPSConnection, req, - context=self._context, check_hostname=self._check_hostname) - - https_request = AbstractHTTPHandler.do_request_ - - __all__.append('HTTPSHandler') - -class HTTPCookieProcessor(BaseHandler): - def __init__(self, cookiejar=None): - import future.backports.http.cookiejar as http_cookiejar - if cookiejar is None: - cookiejar = http_cookiejar.CookieJar() - self.cookiejar = cookiejar - - def http_request(self, request): - self.cookiejar.add_cookie_header(request) - return request - - def http_response(self, request, response): - self.cookiejar.extract_cookies(response, request) - return response - - https_request = http_request - https_response = http_response - -class UnknownHandler(BaseHandler): - def unknown_open(self, req): - type = req.type - raise URLError('unknown url type: %s' % type) - -def parse_keqv_list(l): - """Parse list of key=value strings where keys are not duplicated.""" - parsed = {} - for elt in l: - k, v = elt.split('=', 1) - if v[0] == '"' and v[-1] == '"': - v = v[1:-1] - parsed[k] = v - return parsed - -def parse_http_list(s): - """Parse lists as described by RFC 2068 Section 2. - - In particular, parse comma-separated lists where the elements of - the list may include quoted-strings. A quoted-string could - contain a comma. A non-quoted string could have quotes in the - middle. Neither commas nor quotes count if they are escaped. - Only double-quotes count, not single-quotes. - """ - res = [] - part = '' - - escape = quote = False - for cur in s: - if escape: - part += cur - escape = False - continue - if quote: - if cur == '\\': - escape = True - continue - elif cur == '"': - quote = False - part += cur - continue - - if cur == ',': - res.append(part) - part = '' - continue - - if cur == '"': - quote = True - - part += cur - - # append last part - if part: - res.append(part) - - return [part.strip() for part in res] - -class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.selector - if url[:2] == '//' and url[2:3] != '/' and (req.host and - req.host != 'localhost'): - if not req.host is self.get_names(): - raise URLError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(req) - - # names for the localhost - names = None - def get_names(self): - if FileHandler.names is None: - try: - FileHandler.names = tuple( - socket.gethostbyname_ex('localhost')[2] + - socket.gethostbyname_ex(socket.gethostname())[2]) - except socket.gaierror: - FileHandler.names = (socket.gethostbyname('localhost'),) - return FileHandler.names - - # not entirely sure what the rules are here - def open_local_file(self, req): - import future.backports.email.utils as email_utils - import mimetypes - host = req.host - filename = req.selector - localfile = url2pathname(filename) - try: - stats = os.stat(localfile) - size = stats.st_size - modified = email_utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(filename)[0] - headers = email.message_from_string( - 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified)) - if host: - host, port = splitport(host) - if not host or \ - (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename - return addinfourl(open(localfile, 'rb'), headers, origurl) - except OSError as exp: - # users shouldn't expect OSErrors coming from urlopen() - raise URLError(exp) - raise URLError('file not on local host') - -def _safe_gethostbyname(host): - try: - return socket.gethostbyname(host) - except socket.gaierror: - return None - -class FTPHandler(BaseHandler): - def ftp_open(self, req): - import ftplib - import mimetypes - host = req.host - if not host: - raise URLError('ftp error: no host given') - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT - else: - port = int(port) - - # username/password handling - user, host = splituser(host) - if user: - user, passwd = splitpasswd(user) - else: - passwd = None - host = unquote(host) - user = user or '' - passwd = passwd or '' - - try: - host = socket.gethostbyname(host) - except socket.error as msg: - raise URLError(msg) - path, attrs = splitattr(req.selector) - dirs = path.split('/') - dirs = list(map(unquote, dirs)) - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] - try: - fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) - type = file and 'I' or 'D' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - fp, retrlen = fw.retrfile(file, type) - headers = "" - mtype = mimetypes.guess_type(req.full_url)[0] - if mtype: - headers += "Content-type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-length: %d\n" % retrlen - headers = email.message_from_string(headers) - return addinfourl(fp, headers, req.full_url) - except ftplib.all_errors as exp: - exc = URLError('ftp error: %r' % exp) - raise_with_traceback(exc) - - def connect_ftp(self, user, passwd, host, port, dirs, timeout): - return ftpwrapper(user, passwd, host, port, dirs, timeout, - persistent=False) - -class CacheFTPHandler(FTPHandler): - # XXX would be nice to have pluggable cache strategies - # XXX this stuff is definitely not thread safe - def __init__(self): - self.cache = {} - self.timeout = {} - self.soonest = 0 - self.delay = 60 - self.max_conns = 16 - - def setTimeout(self, t): - self.delay = t - - def setMaxConns(self, m): - self.max_conns = m - - def connect_ftp(self, user, passwd, host, port, dirs, timeout): - key = user, host, port, '/'.join(dirs), timeout - if key in self.cache: - self.timeout[key] = time.time() + self.delay - else: - self.cache[key] = ftpwrapper(user, passwd, host, port, - dirs, timeout) - self.timeout[key] = time.time() + self.delay - self.check_cache() - return self.cache[key] - - def check_cache(self): - # first check for old ones - t = time.time() - if self.soonest <= t: - for k, v in list(self.timeout.items()): - if v < t: - self.cache[k].close() - del self.cache[k] - del self.timeout[k] - self.soonest = min(list(self.timeout.values())) - - # then check the size - if len(self.cache) == self.max_conns: - for k, v in list(self.timeout.items()): - if v == self.soonest: - del self.cache[k] - del self.timeout[k] - break - self.soonest = min(list(self.timeout.values())) - - def clear_cache(self): - for conn in self.cache.values(): - conn.close() - self.cache.clear() - self.timeout.clear() - - -# Code move from the old urllib module - -MAXFTPCACHE = 10 # Trim the ftp cache beyond this size - -# Helper for non-unix systems -if os.name == 'nt': - from nturl2path import url2pathname, pathname2url -else: - def url2pathname(pathname): - """OS-specific conversion from a relative URL of the 'file' scheme - to a file system path; not recommended for general use.""" - return unquote(pathname) - - def pathname2url(pathname): - """OS-specific conversion from a file system path to a relative URL - of the 'file' scheme; not recommended for general use.""" - return quote(pathname) - -# This really consists of two pieces: -# (1) a class which handles opening of all sorts of URLs -# (plus assorted utilities etc.) -# (2) a set of functions for parsing URLs -# XXX Should these be separated out into different modules? - - -ftpcache = {} -class URLopener(object): - """Class to open URLs. - This is a class rather than just a subroutine because we may need - more than one set of global protocol-specific options. - Note -- this is a base class for those who don't want the - automatic handling of errors type 302 (relocated) and 401 - (authorization needed).""" - - __tempfiles = None - - version = "Python-urllib/%s" % __version__ - - # Constructor - def __init__(self, proxies=None, **x509): - msg = "%(class)s style of invoking requests is deprecated. " \ - "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} - warnings.warn(msg, DeprecationWarning, stacklevel=3) - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'keys'), "proxies must be a mapping" - self.proxies = proxies - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') - self.addheaders = [('User-Agent', self.version)] - self.__tempfiles = [] - self.__unlink = os.unlink # See cleanup() - self.tempcache = None - # Undocumented feature: if you assign {} to tempcache, - # it is used to cache files retrieved with - # self.retrieve(). This is not enabled by default - # since it does not work for changing documents (and I - # haven't got the logic to check expiration headers - # yet). - self.ftpcache = ftpcache - # Undocumented feature: you can use a different - # ftp cache by assigning to the .ftpcache member; - # in case you want logically independent URL openers - # XXX This is not threadsafe. Bah. - - def __del__(self): - self.close() - - def close(self): - self.cleanup() - - def cleanup(self): - # This code sometimes runs when the rest of this module - # has already been deleted, so it can't use any globals - # or import anything. - if self.__tempfiles: - for file in self.__tempfiles: - try: - self.__unlink(file) - except OSError: - pass - del self.__tempfiles[:] - if self.tempcache: - self.tempcache.clear() - - def addheader(self, *args): - """Add a header to be used by the HTTP interface only - e.g. u.addheader('Accept', 'sound/basic')""" - self.addheaders.append(args) - - # External interface - def open(self, fullurl, data=None): - """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(to_bytes(fullurl)) - fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") - if self.tempcache and fullurl in self.tempcache: - filename, headers = self.tempcache[fullurl] - fp = open(filename, 'rb') - return addinfourl(fp, headers, fullurl) - urltype, url = splittype(fullurl) - if not urltype: - urltype = 'file' - if urltype in self.proxies: - proxy = self.proxies[urltype] - urltype, proxyhost = splittype(proxy) - host, selector = splithost(proxyhost) - url = (host, fullurl) # Signal special case to open_*() - else: - proxy = None - name = 'open_' + urltype - self.type = urltype - name = name.replace('-', '_') - if not hasattr(self, name): - if proxy: - return self.open_unknown_proxy(proxy, fullurl, data) - else: - return self.open_unknown(fullurl, data) - try: - if data is None: - return getattr(self, name)(url) - else: - return getattr(self, name)(url, data) - except HTTPError: - raise - except socket.error as msg: - raise_with_traceback(IOError('socket error', msg)) - - def open_unknown(self, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError('url error', 'unknown url type', type) - - def open_unknown_proxy(self, proxy, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = splittype(fullurl) - raise IOError('url error', 'invalid proxy for %s' % type, proxy) - - # External interface - def retrieve(self, url, filename=None, reporthook=None, data=None): - """retrieve(url) returns (filename, headers) for a local object - or (tempfilename, headers) for a remote object.""" - url = unwrap(to_bytes(url)) - if self.tempcache and url in self.tempcache: - return self.tempcache[url] - type, url1 = splittype(url) - if filename is None and (not type or type == 'file'): - try: - fp = self.open_local_file(url1) - hdrs = fp.info() - fp.close() - return url2pathname(splithost(url1)[1]), hdrs - except IOError as msg: - pass - fp = self.open(url, data) - try: - headers = fp.info() - if filename: - tfp = open(filename, 'wb') - else: - import tempfile - garbage, path = splittype(url) - garbage, path = splithost(path or "") - path, garbage = splitquery(path or "") - path, garbage = splitattr(path or "") - suffix = os.path.splitext(path)[1] - (fd, filename) = tempfile.mkstemp(suffix) - self.__tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - try: - result = filename, headers - if self.tempcache is not None: - self.tempcache[url] = result - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, bs, size) - while 1: - block = fp.read(bs) - if not block: - break - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - finally: - tfp.close() - finally: - fp.close() - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError( - "retrieval incomplete: got only %i out of %i bytes" - % (read, size), result) - - return result - - # Each method named open_<type> knows how to open that type of URL - - def _open_generic_http(self, connection_factory, url, data): - """Make an HTTP connection using connection_class. - - This is an internal method that should be called from - open_http() or open_https(). - - Arguments: - - connection_factory should take a host name and return an - HTTPConnection instance. - - url is the url to retrieval or a host, relative-path pair. - - data is payload for a POST request or None. - """ - - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost - - if not host: raise IOError('http error', 'no host given') - - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') - else: - proxy_auth = None - - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd.encode()).decode('ascii') - else: - auth = None - http_conn = connection_factory(host) - headers = {} - if proxy_auth: - headers["Proxy-Authorization"] = "Basic %s" % proxy_auth - if auth: - headers["Authorization"] = "Basic %s" % auth - if realhost: - headers["Host"] = realhost - - # Add Connection:close as we don't support persistent connections yet. - # This helps in closing the socket and avoiding ResourceWarning - - headers["Connection"] = "close" - - for header, value in self.addheaders: - headers[header] = value - - if data is not None: - headers["Content-Type"] = "application/x-www-form-urlencoded" - http_conn.request("POST", selector, data, headers) - else: - http_conn.request("GET", selector, headers=headers) - - try: - response = http_conn.getresponse() - except http_client.BadStatusLine: - # something went wrong with the HTTP status line - raise URLError("http protocol error: bad status line") - - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if 200 <= response.status < 300: - return addinfourl(response, response.msg, "http:" + url, - response.status) - else: - return self.http_error( - url, response.fp, - response.status, response.reason, response.msg, data) - - def open_http(self, url, data=None): - """Use HTTP protocol.""" - return self._open_generic_http(http_client.HTTPConnection, url, data) - - def http_error(self, url, fp, errcode, errmsg, headers, data=None): - """Handle http errors. - - Derived class can override this, or provide specific handlers - named http_error_DDD where DDD is the 3-digit error code.""" - # First check if there's a specific handler for this error - name = 'http_error_%d' % errcode - if hasattr(self, name): - method = getattr(self, name) - if data is None: - result = method(url, fp, errcode, errmsg, headers) - else: - result = method(url, fp, errcode, errmsg, headers, data) - if result: return result - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" - fp.close() - raise HTTPError(url, errcode, errmsg, headers, None) - - if _have_ssl: - def _https_connection(self, host): - return http_client.HTTPSConnection(host, - key_file=self.key_file, - cert_file=self.cert_file) - - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - return self._open_generic_http(self._https_connection, url, data) - - def open_file(self, url): - """Use local file or FTP depending on form of URL.""" - if not isinstance(url, str): - raise URLError('file error: proxy support for file protocol currently not implemented') - if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - raise ValueError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(url) - - def open_local_file(self, url): - """Use local file.""" - import future.backports.email.utils as email_utils - import mimetypes - host, file = splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError as e: - raise URLError(e.strerror, e.filename) - size = stats.st_size - modified = email_utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = email.message_from_string( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified)) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), headers, urlfile) - host, port = splitport(host) - if (not port - and socket.gethostbyname(host) in ((localhost(),) + thishost())): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - elif file[:2] == './': - raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) - return addinfourl(open(localname, 'rb'), headers, urlfile) - raise URLError('local file error: not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise URLError('ftp error: proxy support for ftp protocol currently not implemented') - import mimetypes - host, path = splithost(url) - if not host: raise URLError('ftp error: no host given') - host, port = splitport(host) - user, host = splituser(host) - if user: user, passwd = splitpasswd(user) - else: passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - host = socket.gethostbyname(host) - if not port: - import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in self.ftpcache.keys(): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if key not in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = email.message_from_string(headers) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors() as exp: - raise_with_traceback(URLError('ftp error %r' % exp)) - - def open_data(self, url, data=None): - """Use "data" URL.""" - if not isinstance(url, str): - raise URLError('data error: proxy support for data protocol currently not implemented') - # ignore POSTed data - # - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - try: - [type, data] = url.split(',', 1) - except ValueError: - raise IOError('data error', 'bad data URL') - if not type: - type = 'text/plain;charset=US-ASCII' - semi = type.rfind(';') - if semi >= 0 and '=' not in type[semi:]: - encoding = type[semi+1:] - type = type[:semi] - else: - encoding = '' - msg = [] - msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(time.time()))) - msg.append('Content-type: %s' % type) - if encoding == 'base64': - # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin-1') - else: - data = unquote(data) - msg.append('Content-Length: %d' % len(data)) - msg.append('') - msg.append(data) - msg = '\n'.join(msg) - headers = email.message_from_string(msg) - f = io.StringIO(msg) - #f.fileno = None # needed for addinfourl - return addinfourl(f, headers, url) - - -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url, errcode) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - self.tries = 0 - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", headers) - result = self.redirect_internal(url, fp, errcode, errmsg, headers, - data) - self.tries = 0 - return result - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - fp.close() - - # In case the server sent a relative URL, join with original: - newurl = urljoin(self.type + ":" + url, newurl) - - urlparts = urlparse(newurl) - - # For security reasons, we don't allow redirection to anything other - # than http, https and ftp. - - # We are using newer HTTPError with older redirect_internal method - # This older method will get deprecated in 3.3 - - if urlparts.scheme not in ('http', 'https', 'ftp', ''): - raise HTTPError(newurl, errcode, - errmsg + - " Redirection to url '%s' is not allowed." % newurl, - headers, fp) - - return self.open(newurl) - - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if 'www-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if 'proxy-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache=0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = input("Enter username for %s at %s: " % (realm, host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print() - return None, None - - -# Utility functions - -_localhost = None -def localhost(): - """Return the IP address of the magic hostname 'localhost'.""" - global _localhost - if _localhost is None: - _localhost = socket.gethostbyname('localhost') - return _localhost - -_thishost = None -def thishost(): - """Return the IP addresses of the current host.""" - global _thishost - if _thishost is None: - try: - _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) - except socket.gaierror: - _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) - return _thishost - -_ftperrors = None -def ftperrors(): - """Return the set of errors raised by the FTP class.""" - global _ftperrors - if _ftperrors is None: - import ftplib - _ftperrors = ftplib.all_errors - return _ftperrors - -_noheaders = None -def noheaders(): - """Return an empty email Message object.""" - global _noheaders - if _noheaders is None: - _noheaders = email.message_from_string("") - return _noheaders - - -# Utility classes - -class ftpwrapper(object): - """Class used by open_ftp() for cache of open FTP connections.""" - - def __init__(self, user, passwd, host, port, dirs, timeout=None, - persistent=True): - self.user = user - self.passwd = passwd - self.host = host - self.port = port - self.dirs = dirs - self.timeout = timeout - self.refcount = 0 - self.keepalive = persistent - self.init() - - def init(self): - import ftplib - self.busy = 0 - self.ftp = ftplib.FTP() - self.ftp.connect(self.host, self.port, self.timeout) - self.ftp.login(self.user, self.passwd) - _target = '/'.join(self.dirs) - self.ftp.cwd(_target) - - def retrfile(self, file, type): - import ftplib - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn, retrlen = self.ftp.ntransfercmd(cmd) - except ftplib.error_perm as reason: - if str(reason)[:3] != '550': - raise_with_traceback(URLError('ftp error: %r' % reason)) - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing. Verify that directory exists. - if file: - pwd = self.ftp.pwd() - try: - try: - self.ftp.cwd(file) - except ftplib.error_perm as reason: - ### Was: - # raise URLError('ftp error: %r' % reason) from reason - exc = URLError('ftp error: %r' % reason) - exc.__cause__ = reason - raise exc - finally: - self.ftp.cwd(pwd) - cmd = 'LIST ' + file - else: - cmd = 'LIST' - conn, retrlen = self.ftp.ntransfercmd(cmd) - self.busy = 1 - - ftpobj = addclosehook(conn.makefile('rb'), self.file_close) - self.refcount += 1 - conn.close() - # Pass back both a suitably decorated object and a retrieval length - return (ftpobj, retrlen) - - def endtransfer(self): - self.busy = 0 - - def close(self): - self.keepalive = False - if self.refcount <= 0: - self.real_close() - - def file_close(self): - self.endtransfer() - self.refcount -= 1 - if self.refcount <= 0 and not self.keepalive: - self.real_close() - - def real_close(self): - self.endtransfer() - try: - self.ftp.close() - except ftperrors(): - pass - -# Proxy handling -def getproxies_environment(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Scan the environment for variables named <scheme>_proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. - - """ - proxies = {} - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value - return proxies - -def proxy_bypass_environment(host): - """Test if proxies should not be used for a particular host. - - Checks the environment for a variable named no_proxy, which should - be a list of DNS suffixes separated by commas, or '*' for all hosts. - """ - no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') - # '*' is special case for always bypass - if no_proxy == '*': - return 1 - # strip port off host - hostonly, port = splitport(host) - # check if the host ends with any of the DNS suffixes - no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] - for name in no_proxy_list: - if name and (hostonly.endswith(name) or host.endswith(name)): - return 1 - # otherwise, don't bypass - return 0 - - -# This code tests an OSX specific data structure but is testable on all -# platforms -def _proxy_bypass_macosx_sysconf(host, proxy_settings): - """ - Return True iff this host shouldn't be accessed using a proxy - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - - proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: - { 'exclude_simple': bool, - 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] - } - """ - from fnmatch import fnmatch - - hostonly, port = splitport(host) - - def ip2num(ipAddr): - parts = ipAddr.split('.') - parts = list(map(int, parts)) - if len(parts) != 4: - parts = (parts + [0, 0, 0, 0])[:4] - return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] - - # Check for simple host names: - if '.' not in host: - if proxy_settings['exclude_simple']: - return True - - hostIP = None - - for value in proxy_settings.get('exceptions', ()): - # Items in the list are strings like these: *.local, 169.254/16 - if not value: continue - - m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except socket.error: - continue - - base = ip2num(m.group(1)) - mask = m.group(2) - if mask is None: - mask = 8 * (m.group(1).count('.') + 1) - else: - mask = int(mask[1:]) - mask = 32 - mask - - if (hostIP >> mask) == (base >> mask): - return True - - elif fnmatch(host, value): - return True - - return False - - -if sys.platform == 'darwin': - from _scproxy import _get_proxy_settings, _get_proxies - - def proxy_bypass_macosx_sysconf(host): - proxy_settings = _get_proxy_settings() - return _proxy_bypass_macosx_sysconf(host, proxy_settings) - - def getproxies_macosx_sysconf(): - """Return a dictionary of scheme -> proxy server URL mappings. - - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - return _get_proxies() - - - - def proxy_bypass(host): - if getproxies_environment(): - return proxy_bypass_environment(host) - else: - return proxy_bypass_macosx_sysconf(host) - - def getproxies(): - return getproxies_environment() or getproxies_macosx_sysconf() - - -elif os.name == 'nt': - def getproxies_registry(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Win32 uses the registry to store proxies. - - """ - proxies = {} - try: - import winreg - except ImportError: - # Std module, so should be around - but you never know! - return proxies - try: - internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - if proxyEnable: - # Returned as Unicode but problems if not converted to ASCII - proxyServer = str(winreg.QueryValueEx(internetSettings, - 'ProxyServer')[0]) - if '=' in proxyServer: - # Per-protocol settings - for p in proxyServer.split(';'): - protocol, address = p.split('=', 1) - # See if address has a type:// prefix - if not re.match('^([^/:]+)://', address): - address = '%s://%s' % (protocol, address) - proxies[protocol] = address - else: - # Use one setting for all protocols - if proxyServer[:5] == 'http:': - proxies['http'] = proxyServer - else: - proxies['http'] = 'http://%s' % proxyServer - proxies['https'] = 'https://%s' % proxyServer - proxies['ftp'] = 'ftp://%s' % proxyServer - internetSettings.Close() - except (WindowsError, ValueError, TypeError): - # Either registry key not found etc, or the value in an - # unexpected format. - # proxies already set up to be empty so nothing to do - pass - return proxies - - def getproxies(): - """Return a dictionary of scheme -> proxy server URL mappings. - - Returns settings gathered from the environment, if specified, - or the registry. - - """ - return getproxies_environment() or getproxies_registry() - - def proxy_bypass_registry(host): - try: - import winreg - except ImportError: - # Std modules, so should be around - but you never know! - return 0 - try: - internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, - r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') - proxyEnable = winreg.QueryValueEx(internetSettings, - 'ProxyEnable')[0] - proxyOverride = str(winreg.QueryValueEx(internetSettings, - 'ProxyOverride')[0]) - # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: - return 0 - if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - rawHost, port = splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except socket.error: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except socket.error: - pass - # make a check value list from the registry entry: replace the - # '<local>' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '<local>': - if '.' not in rawHost: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - for val in host: - if re.match(test, val, re.I): - return 1 - return 0 - - def proxy_bypass(host): - """Return a dictionary of scheme -> proxy server URL mappings. - - Returns settings gathered from the environment, if specified, - or the registry. - - """ - if getproxies_environment(): - return proxy_bypass_environment(host) - else: - return proxy_bypass_registry(host) - -else: - # By default use environment variables - getproxies = getproxies_environment - proxy_bypass = proxy_bypass_environment diff --git a/contrib/python/future/future/backports/urllib/response.py b/contrib/python/future/future/backports/urllib/response.py deleted file mode 100644 index adbf6e5ae36..00000000000 --- a/contrib/python/future/future/backports/urllib/response.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Response classes used by urllib. - -The base class, addbase, defines a minimal file-like interface, -including read() and readline(). The typical response object is an -addinfourl instance, which defines an info() method that returns -headers and a geturl() method that returns the url. -""" -from __future__ import absolute_import, division, unicode_literals -from future.builtins import object - -class addbase(object): - """Base class for addinfo and addclosehook.""" - - # XXX Add a method to expose the timeout on the underlying socket? - - def __init__(self, fp): - # TODO(jhylton): Is there a better way to delegate using io? - self.fp = fp - self.read = self.fp.read - self.readline = self.fp.readline - # TODO(jhylton): Make sure an object with readlines() is also iterable - if hasattr(self.fp, "readlines"): - self.readlines = self.fp.readlines - if hasattr(self.fp, "fileno"): - self.fileno = self.fp.fileno - else: - self.fileno = lambda: None - - def __iter__(self): - # Assigning `__iter__` to the instance doesn't work as intended - # because the iter builtin does something like `cls.__iter__(obj)` - # and thus fails to find the _bound_ method `obj.__iter__`. - # Returning just `self.fp` works for built-in file objects but - # might not work for general file-like objects. - return iter(self.fp) - - def __repr__(self): - return '<%s at %r whose fp = %r>' % (self.__class__.__name__, - id(self), self.fp) - - def close(self): - if self.fp: - self.fp.close() - self.fp = None - self.read = None - self.readline = None - self.readlines = None - self.fileno = None - self.__iter__ = None - self.__next__ = None - - def __enter__(self): - if self.fp is None: - raise ValueError("I/O operation on closed file") - return self - - def __exit__(self, type, value, traceback): - self.close() - -class addclosehook(addbase): - """Class to add a close hook to an open file.""" - - def __init__(self, fp, closehook, *hookargs): - addbase.__init__(self, fp) - self.closehook = closehook - self.hookargs = hookargs - - def close(self): - if self.closehook: - self.closehook(*self.hookargs) - self.closehook = None - self.hookargs = None - addbase.close(self) - -class addinfo(addbase): - """class to add an info() method to an open file.""" - - def __init__(self, fp, headers): - addbase.__init__(self, fp) - self.headers = headers - - def info(self): - return self.headers - -class addinfourl(addbase): - """class to add info() and geturl() methods to an open file.""" - - def __init__(self, fp, headers, url, code=None): - addbase.__init__(self, fp) - self.headers = headers - self.url = url - self.code = code - - def info(self): - return self.headers - - def getcode(self): - return self.code - - def geturl(self): - return self.url - -del absolute_import, division, unicode_literals, object diff --git a/contrib/python/future/future/backports/urllib/robotparser.py b/contrib/python/future/future/backports/urllib/robotparser.py deleted file mode 100644 index a0f36511b4b..00000000000 --- a/contrib/python/future/future/backports/urllib/robotparser.py +++ /dev/null @@ -1,211 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from future.builtins import str -""" robotparser.py - - Copyright (C) 2000 Bastian Kleineidam - - You can choose between two licenses when using this package: - 1) GNU GPLv2 - 2) PSF license for Python 2.2 - - The robots.txt Exclusion Protocol is implemented as specified in - http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html -""" - -# Was: import urllib.parse, urllib.request -from future.backports import urllib -from future.backports.urllib import parse as _parse, request as _request -urllib.parse = _parse -urllib.request = _request - - -__all__ = ["RobotFileParser"] - -class RobotFileParser(object): - """ This class provides a set of methods to read, parse and answer - questions about a single robots.txt file. - - """ - - def __init__(self, url=''): - self.entries = [] - self.default_entry = None - self.disallow_all = False - self.allow_all = False - self.set_url(url) - self.last_checked = 0 - - def mtime(self): - """Returns the time the robots.txt file was last fetched. - - This is useful for long-running web spiders that need to - check for new robots.txt files periodically. - - """ - return self.last_checked - - def modified(self): - """Sets the time the robots.txt file was last fetched to the - current time. - - """ - import time - self.last_checked = time.time() - - def set_url(self, url): - """Sets the URL referring to a robots.txt file.""" - self.url = url - self.host, self.path = urllib.parse.urlparse(url)[1:3] - - def read(self): - """Reads the robots.txt URL and feeds it to the parser.""" - try: - f = urllib.request.urlopen(self.url) - except urllib.error.HTTPError as err: - if err.code in (401, 403): - self.disallow_all = True - elif err.code >= 400: - self.allow_all = True - else: - raw = f.read() - self.parse(raw.decode("utf-8").splitlines()) - - def _add_entry(self, entry): - if "*" in entry.useragents: - # the default entry is considered last - if self.default_entry is None: - # the first default entry wins - self.default_entry = entry - else: - self.entries.append(entry) - - def parse(self, lines): - """Parse the input lines from a robots.txt file. - - We allow that a user-agent: line is not preceded by - one or more blank lines. - """ - # states: - # 0: start state - # 1: saw user-agent line - # 2: saw an allow or disallow line - state = 0 - entry = Entry() - - for line in lines: - if not line: - if state == 1: - entry = Entry() - state = 0 - elif state == 2: - self._add_entry(entry) - entry = Entry() - state = 0 - # remove optional comment and strip line - i = line.find('#') - if i >= 0: - line = line[:i] - line = line.strip() - if not line: - continue - line = line.split(':', 1) - if len(line) == 2: - line[0] = line[0].strip().lower() - line[1] = urllib.parse.unquote(line[1].strip()) - if line[0] == "user-agent": - if state == 2: - self._add_entry(entry) - entry = Entry() - entry.useragents.append(line[1]) - state = 1 - elif line[0] == "disallow": - if state != 0: - entry.rulelines.append(RuleLine(line[1], False)) - state = 2 - elif line[0] == "allow": - if state != 0: - entry.rulelines.append(RuleLine(line[1], True)) - state = 2 - if state == 2: - self._add_entry(entry) - - - def can_fetch(self, useragent, url): - """using the parsed robots.txt decide if useragent can fetch url""" - if self.disallow_all: - return False - if self.allow_all: - return True - # search for given user agent matches - # the first match counts - parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) - url = urllib.parse.urlunparse(('','',parsed_url.path, - parsed_url.params,parsed_url.query, parsed_url.fragment)) - url = urllib.parse.quote(url) - if not url: - url = "/" - for entry in self.entries: - if entry.applies_to(useragent): - return entry.allowance(url) - # try the default entry last - if self.default_entry: - return self.default_entry.allowance(url) - # agent not found ==> access granted - return True - - def __str__(self): - return ''.join([str(entry) + "\n" for entry in self.entries]) - - -class RuleLine(object): - """A rule line is a single "Allow:" (allowance==True) or "Disallow:" - (allowance==False) followed by a path.""" - def __init__(self, path, allowance): - if path == '' and not allowance: - # an empty value means allow all - allowance = True - self.path = urllib.parse.quote(path) - self.allowance = allowance - - def applies_to(self, filename): - return self.path == "*" or filename.startswith(self.path) - - def __str__(self): - return (self.allowance and "Allow" or "Disallow") + ": " + self.path - - -class Entry(object): - """An entry has one or more user-agents and zero or more rulelines""" - def __init__(self): - self.useragents = [] - self.rulelines = [] - - def __str__(self): - ret = [] - for agent in self.useragents: - ret.extend(["User-agent: ", agent, "\n"]) - for line in self.rulelines: - ret.extend([str(line), "\n"]) - return ''.join(ret) - - def applies_to(self, useragent): - """check if this entry applies to the specified agent""" - # split the name token and make it lower case - useragent = useragent.split("/")[0].lower() - for agent in self.useragents: - if agent == '*': - # we have the catch-all agent - return True - agent = agent.lower() - if agent in useragent: - return True - return False - - def allowance(self, filename): - """Preconditions: - - our agent applies to this entry - - filename is URL decoded""" - for line in self.rulelines: - if line.applies_to(filename): - return line.allowance - return True diff --git a/contrib/python/future/future/backports/xmlrpc/__init__.py b/contrib/python/future/future/backports/xmlrpc/__init__.py deleted file mode 100644 index 196d3788575..00000000000 --- a/contrib/python/future/future/backports/xmlrpc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This directory is a Python package. diff --git a/contrib/python/future/future/backports/xmlrpc/client.py b/contrib/python/future/future/backports/xmlrpc/client.py deleted file mode 100644 index 3f0cae9b00f..00000000000 --- a/contrib/python/future/future/backports/xmlrpc/client.py +++ /dev/null @@ -1,1500 +0,0 @@ -# -# XML-RPC CLIENT LIBRARY -# $Id$ -# -# an XML-RPC client interface for Python. -# -# the marshalling and response parser code can also be used to -# implement XML-RPC servers. -# -# Notes: -# this version is designed to work with Python 2.1 or newer. -# -# History: -# 1999-01-14 fl Created -# 1999-01-15 fl Changed dateTime to use localtime -# 1999-01-16 fl Added Binary/base64 element, default to RPC2 service -# 1999-01-19 fl Fixed array data element (from Skip Montanaro) -# 1999-01-21 fl Fixed dateTime constructor, etc. -# 1999-02-02 fl Added fault handling, handle empty sequences, etc. -# 1999-02-10 fl Fixed problem with empty responses (from Skip Montanaro) -# 1999-06-20 fl Speed improvements, pluggable parsers/transports (0.9.8) -# 2000-11-28 fl Changed boolean to check the truth value of its argument -# 2001-02-24 fl Added encoding/Unicode/SafeTransport patches -# 2001-02-26 fl Added compare support to wrappers (0.9.9/1.0b1) -# 2001-03-28 fl Make sure response tuple is a singleton -# 2001-03-29 fl Don't require empty params element (from Nicholas Riley) -# 2001-06-10 fl Folded in _xmlrpclib accelerator support (1.0b2) -# 2001-08-20 fl Base xmlrpclib.Error on built-in Exception (from Paul Prescod) -# 2001-09-03 fl Allow Transport subclass to override getparser -# 2001-09-10 fl Lazy import of urllib, cgi, xmllib (20x import speedup) -# 2001-10-01 fl Remove containers from memo cache when done with them -# 2001-10-01 fl Use faster escape method (80% dumps speedup) -# 2001-10-02 fl More dumps microtuning -# 2001-10-04 fl Make sure import expat gets a parser (from Guido van Rossum) -# 2001-10-10 sm Allow long ints to be passed as ints if they don't overflow -# 2001-10-17 sm Test for int and long overflow (allows use on 64-bit systems) -# 2001-11-12 fl Use repr() to marshal doubles (from Paul Felix) -# 2002-03-17 fl Avoid buffered read when possible (from James Rucker) -# 2002-04-07 fl Added pythondoc comments -# 2002-04-16 fl Added __str__ methods to datetime/binary wrappers -# 2002-05-15 fl Added error constants (from Andrew Kuchling) -# 2002-06-27 fl Merged with Python CVS version -# 2002-10-22 fl Added basic authentication (based on code from Phillip Eby) -# 2003-01-22 sm Add support for the bool type -# 2003-02-27 gvr Remove apply calls -# 2003-04-24 sm Use cStringIO if available -# 2003-04-25 ak Add support for nil -# 2003-06-15 gn Add support for time.struct_time -# 2003-07-12 gp Correct marshalling of Faults -# 2003-10-31 mvl Add multicall support -# 2004-08-20 mvl Bump minimum supported Python version to 2.1 -# -# Copyright (c) 1999-2002 by Secret Labs AB. -# Copyright (c) 1999-2002 by Fredrik Lundh. -# -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The XML-RPC client interface is -# -# Copyright (c) 1999-2002 by Secret Labs AB -# Copyright (c) 1999-2002 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - -""" -Ported using Python-Future from the Python 3.3 standard library. - -An XML-RPC client interface for Python. - -The marshalling and response parser code can also be used to -implement XML-RPC servers. - -Exported exceptions: - - Error Base class for client errors - ProtocolError Indicates an HTTP protocol error - ResponseError Indicates a broken response package - Fault Indicates an XML-RPC fault package - -Exported classes: - - ServerProxy Represents a logical connection to an XML-RPC server - - MultiCall Executor of boxcared xmlrpc requests - DateTime dateTime wrapper for an ISO 8601 string or time tuple or - localtime integer value to generate a "dateTime.iso8601" - XML-RPC value - Binary binary data wrapper - - Marshaller Generate an XML-RPC params chunk from a Python data structure - Unmarshaller Unmarshal an XML-RPC response from incoming XML event message - Transport Handles an HTTP transaction to an XML-RPC server - SafeTransport Handles an HTTPS transaction to an XML-RPC server - -Exported constants: - - (none) - -Exported functions: - - getparser Create instance of the fastest available parser & attach - to an unmarshalling object - dumps Convert an argument tuple or a Fault instance to an XML-RPC - request (or response, if the methodresponse option is used). - loads Convert an XML-RPC packet to unmarshalled data plus a method - name (None if not present). -""" - -from __future__ import (absolute_import, division, print_function, - unicode_literals) -from future.builtins import bytes, dict, int, range, str - -import sys -import base64 -if sys.version_info[0] < 3: - # Py2.7 compatibility hack - base64.encodebytes = base64.encodestring - base64.decodebytes = base64.decodestring -import time -from datetime import datetime -from future.backports.http import client as http_client -from future.backports.urllib import parse as urllib_parse -from future.utils import ensure_new_type -from xml.parsers import expat -import socket -import errno -from io import BytesIO -try: - import gzip -except ImportError: - gzip = None #python can be built without zlib/gzip support - -# -------------------------------------------------------------------- -# Internal stuff - -def escape(s): - s = s.replace("&", "&") - s = s.replace("<", "<") - return s.replace(">", ">",) - -# used in User-Agent header sent -__version__ = sys.version[:3] - -# xmlrpc integer limits -MAXINT = 2**31-1 -MININT = -2**31 - -# -------------------------------------------------------------------- -# Error constants (from Dan Libby's specification at -# http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php) - -# Ranges of errors -PARSE_ERROR = -32700 -SERVER_ERROR = -32600 -APPLICATION_ERROR = -32500 -SYSTEM_ERROR = -32400 -TRANSPORT_ERROR = -32300 - -# Specific errors -NOT_WELLFORMED_ERROR = -32700 -UNSUPPORTED_ENCODING = -32701 -INVALID_ENCODING_CHAR = -32702 -INVALID_XMLRPC = -32600 -METHOD_NOT_FOUND = -32601 -INVALID_METHOD_PARAMS = -32602 -INTERNAL_ERROR = -32603 - -# -------------------------------------------------------------------- -# Exceptions - -## -# Base class for all kinds of client-side errors. - -class Error(Exception): - """Base class for client errors.""" - def __str__(self): - return repr(self) - -## -# Indicates an HTTP-level protocol error. This is raised by the HTTP -# transport layer, if the server returns an error code other than 200 -# (OK). -# -# @param url The target URL. -# @param errcode The HTTP error code. -# @param errmsg The HTTP error message. -# @param headers The HTTP header dictionary. - -class ProtocolError(Error): - """Indicates an HTTP protocol error.""" - def __init__(self, url, errcode, errmsg, headers): - Error.__init__(self) - self.url = url - self.errcode = errcode - self.errmsg = errmsg - self.headers = headers - def __repr__(self): - return ( - "<ProtocolError for %s: %s %s>" % - (self.url, self.errcode, self.errmsg) - ) - -## -# Indicates a broken XML-RPC response package. This exception is -# raised by the unmarshalling layer, if the XML-RPC response is -# malformed. - -class ResponseError(Error): - """Indicates a broken response package.""" - pass - -## -# Indicates an XML-RPC fault response package. This exception is -# raised by the unmarshalling layer, if the XML-RPC response contains -# a fault string. This exception can also be used as a class, to -# generate a fault XML-RPC message. -# -# @param faultCode The XML-RPC fault code. -# @param faultString The XML-RPC fault string. - -class Fault(Error): - """Indicates an XML-RPC fault package.""" - def __init__(self, faultCode, faultString, **extra): - Error.__init__(self) - self.faultCode = faultCode - self.faultString = faultString - def __repr__(self): - return "<Fault %s: %r>" % (ensure_new_type(self.faultCode), - ensure_new_type(self.faultString)) - -# -------------------------------------------------------------------- -# Special values - -## -# Backwards compatibility - -boolean = Boolean = bool - -## -# Wrapper for XML-RPC DateTime values. This converts a time value to -# the format used by XML-RPC. -# <p> -# The value can be given as a datetime object, as a string in the -# format "yyyymmddThh:mm:ss", as a 9-item time tuple (as returned by -# time.localtime()), or an integer value (as returned by time.time()). -# The wrapper uses time.localtime() to convert an integer to a time -# tuple. -# -# @param value The time, given as a datetime object, an ISO 8601 string, -# a time tuple, or an integer time value. - - -### For Python-Future: -def _iso8601_format(value): - return "%04d%02d%02dT%02d:%02d:%02d" % ( - value.year, value.month, value.day, - value.hour, value.minute, value.second) -### -# Issue #13305: different format codes across platforms -# _day0 = datetime(1, 1, 1) -# if _day0.strftime('%Y') == '0001': # Mac OS X -# def _iso8601_format(value): -# return value.strftime("%Y%m%dT%H:%M:%S") -# elif _day0.strftime('%4Y') == '0001': # Linux -# def _iso8601_format(value): -# return value.strftime("%4Y%m%dT%H:%M:%S") -# else: -# def _iso8601_format(value): -# return value.strftime("%Y%m%dT%H:%M:%S").zfill(17) -# del _day0 - - -def _strftime(value): - if isinstance(value, datetime): - return _iso8601_format(value) - - if not isinstance(value, (tuple, time.struct_time)): - if value == 0: - value = time.time() - value = time.localtime(value) - - return "%04d%02d%02dT%02d:%02d:%02d" % value[:6] - -class DateTime(object): - """DateTime wrapper for an ISO 8601 string or time tuple or - localtime integer value to generate 'dateTime.iso8601' XML-RPC - value. - """ - - def __init__(self, value=0): - if isinstance(value, str): - self.value = value - else: - self.value = _strftime(value) - - def make_comparable(self, other): - if isinstance(other, DateTime): - s = self.value - o = other.value - elif isinstance(other, datetime): - s = self.value - o = _iso8601_format(other) - elif isinstance(other, str): - s = self.value - o = other - elif hasattr(other, "timetuple"): - s = self.timetuple() - o = other.timetuple() - else: - otype = (hasattr(other, "__class__") - and other.__class__.__name__ - or type(other)) - raise TypeError("Can't compare %s and %s" % - (self.__class__.__name__, otype)) - return s, o - - def __lt__(self, other): - s, o = self.make_comparable(other) - return s < o - - def __le__(self, other): - s, o = self.make_comparable(other) - return s <= o - - def __gt__(self, other): - s, o = self.make_comparable(other) - return s > o - - def __ge__(self, other): - s, o = self.make_comparable(other) - return s >= o - - def __eq__(self, other): - s, o = self.make_comparable(other) - return s == o - - def __ne__(self, other): - s, o = self.make_comparable(other) - return s != o - - def timetuple(self): - return time.strptime(self.value, "%Y%m%dT%H:%M:%S") - - ## - # Get date/time value. - # - # @return Date/time value, as an ISO 8601 string. - - def __str__(self): - return self.value - - def __repr__(self): - return "<DateTime %r at %x>" % (ensure_new_type(self.value), id(self)) - - def decode(self, data): - self.value = str(data).strip() - - def encode(self, out): - out.write("<value><dateTime.iso8601>") - out.write(self.value) - out.write("</dateTime.iso8601></value>\n") - -def _datetime(data): - # decode xml element contents into a DateTime structure. - value = DateTime() - value.decode(data) - return value - -def _datetime_type(data): - return datetime.strptime(data, "%Y%m%dT%H:%M:%S") - -## -# Wrapper for binary data. This can be used to transport any kind -# of binary data over XML-RPC, using BASE64 encoding. -# -# @param data An 8-bit string containing arbitrary data. - -class Binary(object): - """Wrapper for binary data.""" - - def __init__(self, data=None): - if data is None: - data = b"" - else: - if not isinstance(data, (bytes, bytearray)): - raise TypeError("expected bytes or bytearray, not %s" % - data.__class__.__name__) - data = bytes(data) # Make a copy of the bytes! - self.data = data - - ## - # Get buffer contents. - # - # @return Buffer contents, as an 8-bit string. - - def __str__(self): - return str(self.data, "latin-1") # XXX encoding?! - - def __eq__(self, other): - if isinstance(other, Binary): - other = other.data - return self.data == other - - def __ne__(self, other): - if isinstance(other, Binary): - other = other.data - return self.data != other - - def decode(self, data): - self.data = base64.decodebytes(data) - - def encode(self, out): - out.write("<value><base64>\n") - encoded = base64.encodebytes(self.data) - out.write(encoded.decode('ascii')) - out.write("</base64></value>\n") - -def _binary(data): - # decode xml element contents into a Binary structure - value = Binary() - value.decode(data) - return value - -WRAPPERS = (DateTime, Binary) - -# -------------------------------------------------------------------- -# XML parsers - -class ExpatParser(object): - # fast expat parser for Python 2.0 and later. - def __init__(self, target): - self._parser = parser = expat.ParserCreate(None, None) - self._target = target - parser.StartElementHandler = target.start - parser.EndElementHandler = target.end - parser.CharacterDataHandler = target.data - encoding = None - target.xml(encoding, None) - - def feed(self, data): - self._parser.Parse(data, 0) - - def close(self): - self._parser.Parse("", 1) # end of data - del self._target, self._parser # get rid of circular references - -# -------------------------------------------------------------------- -# XML-RPC marshalling and unmarshalling code - -## -# XML-RPC marshaller. -# -# @param encoding Default encoding for 8-bit strings. The default -# value is None (interpreted as UTF-8). -# @see dumps - -class Marshaller(object): - """Generate an XML-RPC params chunk from a Python data structure. - - Create a Marshaller instance for each set of parameters, and use - the "dumps" method to convert your data (represented as a tuple) - to an XML-RPC params chunk. To write a fault response, pass a - Fault instance instead. You may prefer to use the "dumps" module - function for this purpose. - """ - - # by the way, if you don't understand what's going on in here, - # that's perfectly ok. - - def __init__(self, encoding=None, allow_none=False): - self.memo = {} - self.data = None - self.encoding = encoding - self.allow_none = allow_none - - dispatch = {} - - def dumps(self, values): - out = [] - write = out.append - dump = self.__dump - if isinstance(values, Fault): - # fault instance - write("<fault>\n") - dump({'faultCode': values.faultCode, - 'faultString': values.faultString}, - write) - write("</fault>\n") - else: - # parameter block - # FIXME: the xml-rpc specification allows us to leave out - # the entire <params> block if there are no parameters. - # however, changing this may break older code (including - # old versions of xmlrpclib.py), so this is better left as - # is for now. See @XMLRPC3 for more information. /F - write("<params>\n") - for v in values: - write("<param>\n") - dump(v, write) - write("</param>\n") - write("</params>\n") - result = "".join(out) - return str(result) - - def __dump(self, value, write): - try: - f = self.dispatch[type(ensure_new_type(value))] - except KeyError: - # check if this object can be marshalled as a structure - if not hasattr(value, '__dict__'): - raise TypeError("cannot marshal %s objects" % type(value)) - # check if this class is a sub-class of a basic type, - # because we don't know how to marshal these types - # (e.g. a string sub-class) - for type_ in type(value).__mro__: - if type_ in self.dispatch.keys(): - raise TypeError("cannot marshal %s objects" % type(value)) - # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix - # for the p3yk merge, this should probably be fixed more neatly. - f = self.dispatch["_arbitrary_instance"] - f(self, value, write) - - def dump_nil (self, value, write): - if not self.allow_none: - raise TypeError("cannot marshal None unless allow_none is enabled") - write("<value><nil/></value>") - dispatch[type(None)] = dump_nil - - def dump_bool(self, value, write): - write("<value><boolean>") - write(value and "1" or "0") - write("</boolean></value>\n") - dispatch[bool] = dump_bool - - def dump_long(self, value, write): - if value > MAXINT or value < MININT: - raise OverflowError("long int exceeds XML-RPC limits") - write("<value><int>") - write(str(int(value))) - write("</int></value>\n") - dispatch[int] = dump_long - - # backward compatible - dump_int = dump_long - - def dump_double(self, value, write): - write("<value><double>") - write(repr(ensure_new_type(value))) - write("</double></value>\n") - dispatch[float] = dump_double - - def dump_unicode(self, value, write, escape=escape): - write("<value><string>") - write(escape(value)) - write("</string></value>\n") - dispatch[str] = dump_unicode - - def dump_bytes(self, value, write): - write("<value><base64>\n") - encoded = base64.encodebytes(value) - write(encoded.decode('ascii')) - write("</base64></value>\n") - dispatch[bytes] = dump_bytes - dispatch[bytearray] = dump_bytes - - def dump_array(self, value, write): - i = id(value) - if i in self.memo: - raise TypeError("cannot marshal recursive sequences") - self.memo[i] = None - dump = self.__dump - write("<value><array><data>\n") - for v in value: - dump(v, write) - write("</data></array></value>\n") - del self.memo[i] - dispatch[tuple] = dump_array - dispatch[list] = dump_array - - def dump_struct(self, value, write, escape=escape): - i = id(value) - if i in self.memo: - raise TypeError("cannot marshal recursive dictionaries") - self.memo[i] = None - dump = self.__dump - write("<value><struct>\n") - for k, v in value.items(): - write("<member>\n") - if not isinstance(k, str): - raise TypeError("dictionary key must be string") - write("<name>%s</name>\n" % escape(k)) - dump(v, write) - write("</member>\n") - write("</struct></value>\n") - del self.memo[i] - dispatch[dict] = dump_struct - - def dump_datetime(self, value, write): - write("<value><dateTime.iso8601>") - write(_strftime(value)) - write("</dateTime.iso8601></value>\n") - dispatch[datetime] = dump_datetime - - def dump_instance(self, value, write): - # check for special wrappers - if value.__class__ in WRAPPERS: - self.write = write - value.encode(self) - del self.write - else: - # store instance attributes as a struct (really?) - self.dump_struct(value.__dict__, write) - dispatch[DateTime] = dump_instance - dispatch[Binary] = dump_instance - # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix - # for the p3yk merge, this should probably be fixed more neatly. - dispatch["_arbitrary_instance"] = dump_instance - -## -# XML-RPC unmarshaller. -# -# @see loads - -class Unmarshaller(object): - """Unmarshal an XML-RPC response, based on incoming XML event - messages (start, data, end). Call close() to get the resulting - data structure. - - Note that this reader is fairly tolerant, and gladly accepts bogus - XML-RPC data without complaining (but not bogus XML). - """ - - # and again, if you don't understand what's going on in here, - # that's perfectly ok. - - def __init__(self, use_datetime=False, use_builtin_types=False): - self._type = None - self._stack = [] - self._marks = [] - self._data = [] - self._methodname = None - self._encoding = "utf-8" - self.append = self._stack.append - self._use_datetime = use_builtin_types or use_datetime - self._use_bytes = use_builtin_types - - def close(self): - # return response tuple and target method - if self._type is None or self._marks: - raise ResponseError() - if self._type == "fault": - raise Fault(**self._stack[0]) - return tuple(self._stack) - - def getmethodname(self): - return self._methodname - - # - # event handlers - - def xml(self, encoding, standalone): - self._encoding = encoding - # FIXME: assert standalone == 1 ??? - - def start(self, tag, attrs): - # prepare to handle this element - if tag == "array" or tag == "struct": - self._marks.append(len(self._stack)) - self._data = [] - self._value = (tag == "value") - - def data(self, text): - self._data.append(text) - - def end(self, tag): - # call the appropriate end tag handler - try: - f = self.dispatch[tag] - except KeyError: - pass # unknown tag ? - else: - return f(self, "".join(self._data)) - - # - # accelerator support - - def end_dispatch(self, tag, data): - # dispatch data - try: - f = self.dispatch[tag] - except KeyError: - pass # unknown tag ? - else: - return f(self, data) - - # - # element decoders - - dispatch = {} - - def end_nil (self, data): - self.append(None) - self._value = 0 - dispatch["nil"] = end_nil - - def end_boolean(self, data): - if data == "0": - self.append(False) - elif data == "1": - self.append(True) - else: - raise TypeError("bad boolean value") - self._value = 0 - dispatch["boolean"] = end_boolean - - def end_int(self, data): - self.append(int(data)) - self._value = 0 - dispatch["i4"] = end_int - dispatch["i8"] = end_int - dispatch["int"] = end_int - - def end_double(self, data): - self.append(float(data)) - self._value = 0 - dispatch["double"] = end_double - - def end_string(self, data): - if self._encoding: - data = data.decode(self._encoding) - self.append(data) - self._value = 0 - dispatch["string"] = end_string - dispatch["name"] = end_string # struct keys are always strings - - def end_array(self, data): - mark = self._marks.pop() - # map arrays to Python lists - self._stack[mark:] = [self._stack[mark:]] - self._value = 0 - dispatch["array"] = end_array - - def end_struct(self, data): - mark = self._marks.pop() - # map structs to Python dictionaries - dict = {} - items = self._stack[mark:] - for i in range(0, len(items), 2): - dict[items[i]] = items[i+1] - self._stack[mark:] = [dict] - self._value = 0 - dispatch["struct"] = end_struct - - def end_base64(self, data): - value = Binary() - value.decode(data.encode("ascii")) - if self._use_bytes: - value = value.data - self.append(value) - self._value = 0 - dispatch["base64"] = end_base64 - - def end_dateTime(self, data): - value = DateTime() - value.decode(data) - if self._use_datetime: - value = _datetime_type(data) - self.append(value) - dispatch["dateTime.iso8601"] = end_dateTime - - def end_value(self, data): - # if we stumble upon a value element with no internal - # elements, treat it as a string element - if self._value: - self.end_string(data) - dispatch["value"] = end_value - - def end_params(self, data): - self._type = "params" - dispatch["params"] = end_params - - def end_fault(self, data): - self._type = "fault" - dispatch["fault"] = end_fault - - def end_methodName(self, data): - if self._encoding: - data = data.decode(self._encoding) - self._methodname = data - self._type = "methodName" # no params - dispatch["methodName"] = end_methodName - -## Multicall support -# - -class _MultiCallMethod(object): - # some lesser magic to store calls made to a MultiCall object - # for batch execution - def __init__(self, call_list, name): - self.__call_list = call_list - self.__name = name - def __getattr__(self, name): - return _MultiCallMethod(self.__call_list, "%s.%s" % (self.__name, name)) - def __call__(self, *args): - self.__call_list.append((self.__name, args)) - -class MultiCallIterator(object): - """Iterates over the results of a multicall. Exceptions are - raised in response to xmlrpc faults.""" - - def __init__(self, results): - self.results = results - - def __getitem__(self, i): - item = self.results[i] - if isinstance(type(item), dict): - raise Fault(item['faultCode'], item['faultString']) - elif type(item) == type([]): - return item[0] - else: - raise ValueError("unexpected type in multicall result") - -class MultiCall(object): - """server -> a object used to boxcar method calls - - server should be a ServerProxy object. - - Methods can be added to the MultiCall using normal - method call syntax e.g.: - - multicall = MultiCall(server_proxy) - multicall.add(2,3) - multicall.get_address("Guido") - - To execute the multicall, call the MultiCall object e.g.: - - add_result, address = multicall() - """ - - def __init__(self, server): - self.__server = server - self.__call_list = [] - - def __repr__(self): - return "<MultiCall at %x>" % id(self) - - __str__ = __repr__ - - def __getattr__(self, name): - return _MultiCallMethod(self.__call_list, name) - - def __call__(self): - marshalled_list = [] - for name, args in self.__call_list: - marshalled_list.append({'methodName' : name, 'params' : args}) - - return MultiCallIterator(self.__server.system.multicall(marshalled_list)) - -# -------------------------------------------------------------------- -# convenience functions - -FastMarshaller = FastParser = FastUnmarshaller = None - -## -# Create a parser object, and connect it to an unmarshalling instance. -# This function picks the fastest available XML parser. -# -# return A (parser, unmarshaller) tuple. - -def getparser(use_datetime=False, use_builtin_types=False): - """getparser() -> parser, unmarshaller - - Create an instance of the fastest available parser, and attach it - to an unmarshalling object. Return both objects. - """ - if FastParser and FastUnmarshaller: - if use_builtin_types: - mkdatetime = _datetime_type - mkbytes = base64.decodebytes - elif use_datetime: - mkdatetime = _datetime_type - mkbytes = _binary - else: - mkdatetime = _datetime - mkbytes = _binary - target = FastUnmarshaller(True, False, mkbytes, mkdatetime, Fault) - parser = FastParser(target) - else: - target = Unmarshaller(use_datetime=use_datetime, use_builtin_types=use_builtin_types) - if FastParser: - parser = FastParser(target) - else: - parser = ExpatParser(target) - return parser, target - -## -# Convert a Python tuple or a Fault instance to an XML-RPC packet. -# -# @def dumps(params, **options) -# @param params A tuple or Fault instance. -# @keyparam methodname If given, create a methodCall request for -# this method name. -# @keyparam methodresponse If given, create a methodResponse packet. -# If used with a tuple, the tuple must be a singleton (that is, -# it must contain exactly one element). -# @keyparam encoding The packet encoding. -# @return A string containing marshalled data. - -def dumps(params, methodname=None, methodresponse=None, encoding=None, - allow_none=False): - """data [,options] -> marshalled data - - Convert an argument tuple or a Fault instance to an XML-RPC - request (or response, if the methodresponse option is used). - - In addition to the data object, the following options can be given - as keyword arguments: - - methodname: the method name for a methodCall packet - - methodresponse: true to create a methodResponse packet. - If this option is used with a tuple, the tuple must be - a singleton (i.e. it can contain only one element). - - encoding: the packet encoding (default is UTF-8) - - All byte strings in the data structure are assumed to use the - packet encoding. Unicode strings are automatically converted, - where necessary. - """ - - assert isinstance(params, (tuple, Fault)), "argument must be tuple or Fault instance" - if isinstance(params, Fault): - methodresponse = 1 - elif methodresponse and isinstance(params, tuple): - assert len(params) == 1, "response tuple must be a singleton" - - if not encoding: - encoding = "utf-8" - - if FastMarshaller: - m = FastMarshaller(encoding) - else: - m = Marshaller(encoding, allow_none) - - data = m.dumps(params) - - if encoding != "utf-8": - xmlheader = "<?xml version='1.0' encoding='%s'?>\n" % str(encoding) - else: - xmlheader = "<?xml version='1.0'?>\n" # utf-8 is default - - # standard XML-RPC wrappings - if methodname: - # a method call - if not isinstance(methodname, str): - methodname = methodname.encode(encoding) - data = ( - xmlheader, - "<methodCall>\n" - "<methodName>", methodname, "</methodName>\n", - data, - "</methodCall>\n" - ) - elif methodresponse: - # a method response, or a fault structure - data = ( - xmlheader, - "<methodResponse>\n", - data, - "</methodResponse>\n" - ) - else: - return data # return as is - return str("").join(data) - -## -# Convert an XML-RPC packet to a Python object. If the XML-RPC packet -# represents a fault condition, this function raises a Fault exception. -# -# @param data An XML-RPC packet, given as an 8-bit string. -# @return A tuple containing the unpacked data, and the method name -# (None if not present). -# @see Fault - -def loads(data, use_datetime=False, use_builtin_types=False): - """data -> unmarshalled data, method name - - Convert an XML-RPC packet to unmarshalled data plus a method - name (None if not present). - - If the XML-RPC packet represents a fault condition, this function - raises a Fault exception. - """ - p, u = getparser(use_datetime=use_datetime, use_builtin_types=use_builtin_types) - p.feed(data) - p.close() - return u.close(), u.getmethodname() - -## -# Encode a string using the gzip content encoding such as specified by the -# Content-Encoding: gzip -# in the HTTP header, as described in RFC 1952 -# -# @param data the unencoded data -# @return the encoded data - -def gzip_encode(data): - """data -> gzip encoded data - - Encode data using the gzip content encoding as described in RFC 1952 - """ - if not gzip: - raise NotImplementedError - f = BytesIO() - gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1) - gzf.write(data) - gzf.close() - encoded = f.getvalue() - f.close() - return encoded - -## -# Decode a string using the gzip content encoding such as specified by the -# Content-Encoding: gzip -# in the HTTP header, as described in RFC 1952 -# -# @param data The encoded data -# @return the unencoded data -# @raises ValueError if data is not correctly coded. - -def gzip_decode(data): - """gzip encoded data -> unencoded data - - Decode data using the gzip content encoding as described in RFC 1952 - """ - if not gzip: - raise NotImplementedError - f = BytesIO(data) - gzf = gzip.GzipFile(mode="rb", fileobj=f) - try: - decoded = gzf.read() - except IOError: - raise ValueError("invalid data") - f.close() - gzf.close() - return decoded - -## -# Return a decoded file-like object for the gzip encoding -# as described in RFC 1952. -# -# @param response A stream supporting a read() method -# @return a file-like object that the decoded data can be read() from - -class GzipDecodedResponse(gzip.GzipFile if gzip else object): - """a file-like object to decode a response encoded with the gzip - method, as described in RFC 1952. - """ - def __init__(self, response): - #response doesn't support tell() and read(), required by - #GzipFile - if not gzip: - raise NotImplementedError - self.io = BytesIO(response.read()) - gzip.GzipFile.__init__(self, mode="rb", fileobj=self.io) - - def close(self): - gzip.GzipFile.close(self) - self.io.close() - - -# -------------------------------------------------------------------- -# request dispatcher - -class _Method(object): - # some magic to bind an XML-RPC method to an RPC server. - # supports "nested" methods (e.g. examples.getStateName) - def __init__(self, send, name): - self.__send = send - self.__name = name - def __repr__(self): - return "<_Method for %s>" % self.__name - __str__ = __repr__ - def __getattr__(self, name): - return _Method(self.__send, "%s.%s" % (self.__name, name)) - def __call__(self, *args): - return self.__send(self.__name, args) - -## -# Standard transport class for XML-RPC over HTTP. -# <p> -# You can create custom transports by subclassing this method, and -# overriding selected methods. - -class Transport(object): - """Handles an HTTP transaction to an XML-RPC server.""" - - # client identifier (may be overridden) - user_agent = "Python-xmlrpc/%s" % __version__ - - #if true, we'll request gzip encoding - accept_gzip_encoding = True - - # if positive, encode request using gzip if it exceeds this threshold - # note that many server will get confused, so only use it if you know - # that they can decode such a request - encode_threshold = None #None = don't encode - - def __init__(self, use_datetime=False, use_builtin_types=False): - self._use_datetime = use_datetime - self._use_builtin_types = use_builtin_types - self._connection = (None, None) - self._extra_headers = [] - - ## - # Send a complete request, and parse the response. - # Retry request if a cached connection has disconnected. - # - # @param host Target host. - # @param handler Target PRC handler. - # @param request_body XML-RPC request body. - # @param verbose Debugging flag. - # @return Parsed response. - - def request(self, host, handler, request_body, verbose=False): - #retry request once if cached connection has gone cold - for i in (0, 1): - try: - return self.single_request(host, handler, request_body, verbose) - except socket.error as e: - if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE): - raise - except http_client.BadStatusLine: #close after we sent request - if i: - raise - - def single_request(self, host, handler, request_body, verbose=False): - # issue XML-RPC request - try: - http_conn = self.send_request(host, handler, request_body, verbose) - resp = http_conn.getresponse() - if resp.status == 200: - self.verbose = verbose - return self.parse_response(resp) - - except Fault: - raise - except Exception: - #All unexpected errors leave connection in - # a strange state, so we clear it. - self.close() - raise - - #We got an error response. - #Discard any response data and raise exception - if resp.getheader("content-length", ""): - resp.read() - raise ProtocolError( - host + handler, - resp.status, resp.reason, - dict(resp.getheaders()) - ) - - - ## - # Create parser. - # - # @return A 2-tuple containing a parser and a unmarshaller. - - def getparser(self): - # get parser and unmarshaller - return getparser(use_datetime=self._use_datetime, - use_builtin_types=self._use_builtin_types) - - ## - # Get authorization info from host parameter - # Host may be a string, or a (host, x509-dict) tuple; if a string, - # it is checked for a "user:pw@host" format, and a "Basic - # Authentication" header is added if appropriate. - # - # @param host Host descriptor (URL or (URL, x509 info) tuple). - # @return A 3-tuple containing (actual host, extra headers, - # x509 info). The header and x509 fields may be None. - - def get_host_info(self, host): - - x509 = {} - if isinstance(host, tuple): - host, x509 = host - - auth, host = urllib_parse.splituser(host) - - if auth: - auth = urllib_parse.unquote_to_bytes(auth) - auth = base64.encodebytes(auth).decode("utf-8") - auth = "".join(auth.split()) # get rid of whitespace - extra_headers = [ - ("Authorization", "Basic " + auth) - ] - else: - extra_headers = [] - - return host, extra_headers, x509 - - ## - # Connect to server. - # - # @param host Target host. - # @return An HTTPConnection object - - def make_connection(self, host): - #return an existing connection if possible. This allows - #HTTP/1.1 keep-alive. - if self._connection and host == self._connection[0]: - return self._connection[1] - # create a HTTP connection object from a host descriptor - chost, self._extra_headers, x509 = self.get_host_info(host) - self._connection = host, http_client.HTTPConnection(chost) - return self._connection[1] - - ## - # Clear any cached connection object. - # Used in the event of socket errors. - # - def close(self): - if self._connection[1]: - self._connection[1].close() - self._connection = (None, None) - - ## - # Send HTTP request. - # - # @param host Host descriptor (URL or (URL, x509 info) tuple). - # @param handler Targer RPC handler (a path relative to host) - # @param request_body The XML-RPC request body - # @param debug Enable debugging if debug is true. - # @return An HTTPConnection. - - def send_request(self, host, handler, request_body, debug): - connection = self.make_connection(host) - headers = self._extra_headers[:] - if debug: - connection.set_debuglevel(1) - if self.accept_gzip_encoding and gzip: - connection.putrequest("POST", handler, skip_accept_encoding=True) - headers.append(("Accept-Encoding", "gzip")) - else: - connection.putrequest("POST", handler) - headers.append(("Content-Type", "text/xml")) - headers.append(("User-Agent", self.user_agent)) - self.send_headers(connection, headers) - self.send_content(connection, request_body) - return connection - - ## - # Send request headers. - # This function provides a useful hook for subclassing - # - # @param connection httpConnection. - # @param headers list of key,value pairs for HTTP headers - - def send_headers(self, connection, headers): - for key, val in headers: - connection.putheader(key, val) - - ## - # Send request body. - # This function provides a useful hook for subclassing - # - # @param connection httpConnection. - # @param request_body XML-RPC request body. - - def send_content(self, connection, request_body): - #optionally encode the request - if (self.encode_threshold is not None and - self.encode_threshold < len(request_body) and - gzip): - connection.putheader("Content-Encoding", "gzip") - request_body = gzip_encode(request_body) - - connection.putheader("Content-Length", str(len(request_body))) - connection.endheaders(request_body) - - ## - # Parse response. - # - # @param file Stream. - # @return Response tuple and target method. - - def parse_response(self, response): - # read response data from httpresponse, and parse it - # Check for new http response object, otherwise it is a file object. - if hasattr(response, 'getheader'): - if response.getheader("Content-Encoding", "") == "gzip": - stream = GzipDecodedResponse(response) - else: - stream = response - else: - stream = response - - p, u = self.getparser() - - while 1: - data = stream.read(1024) - if not data: - break - if self.verbose: - print("body:", repr(data)) - p.feed(data) - - if stream is not response: - stream.close() - p.close() - - return u.close() - -## -# Standard transport class for XML-RPC over HTTPS. - -class SafeTransport(Transport): - """Handles an HTTPS transaction to an XML-RPC server.""" - - # FIXME: mostly untested - - def make_connection(self, host): - if self._connection and host == self._connection[0]: - return self._connection[1] - - if not hasattr(http_client, "HTTPSConnection"): - raise NotImplementedError( - "your version of http.client doesn't support HTTPS") - # create a HTTPS connection object from a host descriptor - # host may be a string, or a (host, x509-dict) tuple - chost, self._extra_headers, x509 = self.get_host_info(host) - self._connection = host, http_client.HTTPSConnection(chost, - None, **(x509 or {})) - return self._connection[1] - -## -# Standard server proxy. This class establishes a virtual connection -# to an XML-RPC server. -# <p> -# This class is available as ServerProxy and Server. New code should -# use ServerProxy, to avoid confusion. -# -# @def ServerProxy(uri, **options) -# @param uri The connection point on the server. -# @keyparam transport A transport factory, compatible with the -# standard transport class. -# @keyparam encoding The default encoding used for 8-bit strings -# (default is UTF-8). -# @keyparam verbose Use a true value to enable debugging output. -# (printed to standard output). -# @see Transport - -class ServerProxy(object): - """uri [,options] -> a logical connection to an XML-RPC server - - uri is the connection point on the server, given as - scheme://host/target. - - The standard implementation always supports the "http" scheme. If - SSL socket support is available (Python 2.0), it also supports - "https". - - If the target part and the slash preceding it are both omitted, - "/RPC2" is assumed. - - The following options can be given as keyword arguments: - - transport: a transport factory - encoding: the request encoding (default is UTF-8) - - All 8-bit strings passed to the server proxy are assumed to use - the given encoding. - """ - - def __init__(self, uri, transport=None, encoding=None, verbose=False, - allow_none=False, use_datetime=False, use_builtin_types=False): - # establish a "logical" server connection - - # get the url - type, uri = urllib_parse.splittype(uri) - if type not in ("http", "https"): - raise IOError("unsupported XML-RPC protocol") - self.__host, self.__handler = urllib_parse.splithost(uri) - if not self.__handler: - self.__handler = "/RPC2" - - if transport is None: - if type == "https": - handler = SafeTransport - else: - handler = Transport - transport = handler(use_datetime=use_datetime, - use_builtin_types=use_builtin_types) - self.__transport = transport - - self.__encoding = encoding or 'utf-8' - self.__verbose = verbose - self.__allow_none = allow_none - - def __close(self): - self.__transport.close() - - def __request(self, methodname, params): - # call a method on the remote server - - request = dumps(params, methodname, encoding=self.__encoding, - allow_none=self.__allow_none).encode(self.__encoding) - - response = self.__transport.request( - self.__host, - self.__handler, - request, - verbose=self.__verbose - ) - - if len(response) == 1: - response = response[0] - - return response - - def __repr__(self): - return ( - "<ServerProxy for %s%s>" % - (self.__host, self.__handler) - ) - - __str__ = __repr__ - - def __getattr__(self, name): - # magic method dispatcher - return _Method(self.__request, name) - - # note: to call a remote object with an non-standard name, use - # result getattr(server, "strange-python-name")(args) - - def __call__(self, attr): - """A workaround to get special attributes on the ServerProxy - without interfering with the magic __getattr__ - """ - if attr == "close": - return self.__close - elif attr == "transport": - return self.__transport - raise AttributeError("Attribute %r not found" % (attr,)) - -# compatibility - -Server = ServerProxy - -# -------------------------------------------------------------------- -# test code - -if __name__ == "__main__": - - # simple test program (from the XML-RPC specification) - - # local server, available from Lib/xmlrpc/server.py - server = ServerProxy("http://localhost:8000") - - try: - print(server.currentTime.getCurrentTime()) - except Error as v: - print("ERROR", v) - - multi = MultiCall(server) - multi.getData() - multi.pow(2,9) - multi.add(1,2) - try: - for response in multi(): - print(response) - except Error as v: - print("ERROR", v) diff --git a/contrib/python/future/future/backports/xmlrpc/server.py b/contrib/python/future/future/backports/xmlrpc/server.py deleted file mode 100644 index 28072bfecda..00000000000 --- a/contrib/python/future/future/backports/xmlrpc/server.py +++ /dev/null @@ -1,999 +0,0 @@ -r""" -Ported using Python-Future from the Python 3.3 standard library. - -XML-RPC Servers. - -This module can be used to create simple XML-RPC servers -by creating a server and either installing functions, a -class instance, or by extending the SimpleXMLRPCServer -class. - -It can also be used to handle XML-RPC requests in a CGI -environment using CGIXMLRPCRequestHandler. - -The Doc* classes can be used to create XML-RPC servers that -serve pydoc-style documentation in response to HTTP -GET requests. This documentation is dynamically generated -based on the functions and methods registered with the -server. - -A list of possible usage patterns follows: - -1. Install functions: - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_function(pow) -server.register_function(lambda x,y: x+y, 'add') -server.serve_forever() - -2. Install an instance: - -class MyFuncs: - def __init__(self): - # make all of the sys functions available through sys.func_name - import sys - self.sys = sys - def _listMethods(self): - # implement this method so that system.listMethods - # knows to advertise the sys methods - return list_public_methods(self) + \ - ['sys.' + method for method in list_public_methods(self.sys)] - def pow(self, x, y): return pow(x, y) - def add(self, x, y) : return x + y - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_introspection_functions() -server.register_instance(MyFuncs()) -server.serve_forever() - -3. Install an instance with custom dispatch method: - -class Math: - def _listMethods(self): - # this method must be present for system.listMethods - # to work - return ['add', 'pow'] - def _methodHelp(self, method): - # this method must be present for system.methodHelp - # to work - if method == 'add': - return "add(2,3) => 5" - elif method == 'pow': - return "pow(x, y[, z]) => number" - else: - # By convention, return empty - # string if no help is available - return "" - def _dispatch(self, method, params): - if method == 'pow': - return pow(*params) - elif method == 'add': - return params[0] + params[1] - else: - raise ValueError('bad method') - -server = SimpleXMLRPCServer(("localhost", 8000)) -server.register_introspection_functions() -server.register_instance(Math()) -server.serve_forever() - -4. Subclass SimpleXMLRPCServer: - -class MathServer(SimpleXMLRPCServer): - def _dispatch(self, method, params): - try: - # We are forcing the 'export_' prefix on methods that are - # callable through XML-RPC to prevent potential security - # problems - func = getattr(self, 'export_' + method) - except AttributeError: - raise Exception('method "%s" is not supported' % method) - else: - return func(*params) - - def export_add(self, x, y): - return x + y - -server = MathServer(("localhost", 8000)) -server.serve_forever() - -5. CGI script: - -server = CGIXMLRPCRequestHandler() -server.register_function(pow) -server.handle_request() -""" - -from __future__ import absolute_import, division, print_function, unicode_literals -from future.builtins import int, str - -# Written by Brian Quinlan ([email protected]). -# Based on code written by Fredrik Lundh. - -from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode -from future.backports.http.server import BaseHTTPRequestHandler -import future.backports.http.server as http_server -from future.backports import socketserver -import sys -import os -import re -import pydoc -import inspect -import traceback -try: - import fcntl -except ImportError: - fcntl = None - -def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): - """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d - - Resolves a dotted attribute name to an object. Raises - an AttributeError if any attribute in the chain starts with a '_'. - - If the optional allow_dotted_names argument is false, dots are not - supported and this function operates similar to getattr(obj, attr). - """ - - if allow_dotted_names: - attrs = attr.split('.') - else: - attrs = [attr] - - for i in attrs: - if i.startswith('_'): - raise AttributeError( - 'attempt to access private attribute "%s"' % i - ) - else: - obj = getattr(obj,i) - return obj - -def list_public_methods(obj): - """Returns a list of attribute strings, found in the specified - object, which represent callable attributes""" - - return [member for member in dir(obj) - if not member.startswith('_') and - callable(getattr(obj, member))] - -class SimpleXMLRPCDispatcher(object): - """Mix-in class that dispatches XML-RPC requests. - - This class is used to register XML-RPC method handlers - and then to dispatch them. This class doesn't need to be - instanced directly when used by SimpleXMLRPCServer but it - can be instanced when used by the MultiPathXMLRPCServer - """ - - def __init__(self, allow_none=False, encoding=None, - use_builtin_types=False): - self.funcs = {} - self.instance = None - self.allow_none = allow_none - self.encoding = encoding or 'utf-8' - self.use_builtin_types = use_builtin_types - - def register_instance(self, instance, allow_dotted_names=False): - """Registers an instance to respond to XML-RPC requests. - - Only one instance can be installed at a time. - - If the registered instance has a _dispatch method then that - method will be called with the name of the XML-RPC method and - its parameters as a tuple - e.g. instance._dispatch('add',(2,3)) - - If the registered instance does not have a _dispatch method - then the instance will be searched to find a matching method - and, if found, will be called. Methods beginning with an '_' - are considered private and will not be called by - SimpleXMLRPCServer. - - If a registered function matches a XML-RPC request, then it - will be called instead of the registered instance. - - If the optional allow_dotted_names argument is true and the - instance does not have a _dispatch method, method names - containing dots are supported and resolved, as long as none of - the name segments start with an '_'. - - *** SECURITY WARNING: *** - - Enabling the allow_dotted_names options allows intruders - to access your module's global variables and may allow - intruders to execute arbitrary code on your machine. Only - use this option on a secure, closed network. - - """ - - self.instance = instance - self.allow_dotted_names = allow_dotted_names - - def register_function(self, function, name=None): - """Registers a function to respond to XML-RPC requests. - - The optional name argument can be used to set a Unicode name - for the function. - """ - - if name is None: - name = function.__name__ - self.funcs[name] = function - - def register_introspection_functions(self): - """Registers the XML-RPC introspection methods in the system - namespace. - - see http://xmlrpc.usefulinc.com/doc/reserved.html - """ - - self.funcs.update({'system.listMethods' : self.system_listMethods, - 'system.methodSignature' : self.system_methodSignature, - 'system.methodHelp' : self.system_methodHelp}) - - def register_multicall_functions(self): - """Registers the XML-RPC multicall method in the system - namespace. - - see http://www.xmlrpc.com/discuss/msgReader$1208""" - - self.funcs.update({'system.multicall' : self.system_multicall}) - - def _marshaled_dispatch(self, data, dispatch_method = None, path = None): - """Dispatches an XML-RPC method from marshalled (XML) data. - - XML-RPC methods are dispatched from the marshalled (XML) data - using the _dispatch method and the result is returned as - marshalled data. For backwards compatibility, a dispatch - function can be provided as an argument (see comment in - SimpleXMLRPCRequestHandler.do_POST) but overriding the - existing method through subclassing is the preferred means - of changing method dispatch behavior. - """ - - try: - params, method = loads(data, use_builtin_types=self.use_builtin_types) - - # generate response - if dispatch_method is not None: - response = dispatch_method(method, params) - else: - response = self._dispatch(method, params) - # wrap response in a singleton tuple - response = (response,) - response = dumps(response, methodresponse=1, - allow_none=self.allow_none, encoding=self.encoding) - except Fault as fault: - response = dumps(fault, allow_none=self.allow_none, - encoding=self.encoding) - except: - # report exception back to server - exc_type, exc_value, exc_tb = sys.exc_info() - response = dumps( - Fault(1, "%s:%s" % (exc_type, exc_value)), - encoding=self.encoding, allow_none=self.allow_none, - ) - - return response.encode(self.encoding) - - def system_listMethods(self): - """system.listMethods() => ['add', 'subtract', 'multiple'] - - Returns a list of the methods supported by the server.""" - - methods = set(self.funcs.keys()) - if self.instance is not None: - # Instance can implement _listMethod to return a list of - # methods - if hasattr(self.instance, '_listMethods'): - methods |= set(self.instance._listMethods()) - # if the instance has a _dispatch method then we - # don't have enough information to provide a list - # of methods - elif not hasattr(self.instance, '_dispatch'): - methods |= set(list_public_methods(self.instance)) - return sorted(methods) - - def system_methodSignature(self, method_name): - """system.methodSignature('add') => [double, int, int] - - Returns a list describing the signature of the method. In the - above example, the add method takes two integers as arguments - and returns a double result. - - This server does NOT support system.methodSignature.""" - - # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html - - return 'signatures not supported' - - def system_methodHelp(self, method_name): - """system.methodHelp('add') => "Adds two integers together" - - Returns a string containing documentation for the specified method.""" - - method = None - if method_name in self.funcs: - method = self.funcs[method_name] - elif self.instance is not None: - # Instance can implement _methodHelp to return help for a method - if hasattr(self.instance, '_methodHelp'): - return self.instance._methodHelp(method_name) - # if the instance has a _dispatch method then we - # don't have enough information to provide help - elif not hasattr(self.instance, '_dispatch'): - try: - method = resolve_dotted_attribute( - self.instance, - method_name, - self.allow_dotted_names - ) - except AttributeError: - pass - - # Note that we aren't checking that the method actually - # be a callable object of some kind - if method is None: - return "" - else: - return pydoc.getdoc(method) - - def system_multicall(self, call_list): - """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \ -[[4], ...] - - Allows the caller to package multiple XML-RPC calls into a single - request. - - See http://www.xmlrpc.com/discuss/msgReader$1208 - """ - - results = [] - for call in call_list: - method_name = call['methodName'] - params = call['params'] - - try: - # XXX A marshalling error in any response will fail the entire - # multicall. If someone cares they should fix this. - results.append([self._dispatch(method_name, params)]) - except Fault as fault: - results.append( - {'faultCode' : fault.faultCode, - 'faultString' : fault.faultString} - ) - except: - exc_type, exc_value, exc_tb = sys.exc_info() - results.append( - {'faultCode' : 1, - 'faultString' : "%s:%s" % (exc_type, exc_value)} - ) - return results - - def _dispatch(self, method, params): - """Dispatches the XML-RPC method. - - XML-RPC calls are forwarded to a registered function that - matches the called XML-RPC method name. If no such function - exists then the call is forwarded to the registered instance, - if available. - - If the registered instance has a _dispatch method then that - method will be called with the name of the XML-RPC method and - its parameters as a tuple - e.g. instance._dispatch('add',(2,3)) - - If the registered instance does not have a _dispatch method - then the instance will be searched to find a matching method - and, if found, will be called. - - Methods beginning with an '_' are considered private and will - not be called. - """ - - func = None - try: - # check to see if a matching function has been registered - func = self.funcs[method] - except KeyError: - if self.instance is not None: - # check for a _dispatch method - if hasattr(self.instance, '_dispatch'): - return self.instance._dispatch(method, params) - else: - # call instance method directly - try: - func = resolve_dotted_attribute( - self.instance, - method, - self.allow_dotted_names - ) - except AttributeError: - pass - - if func is not None: - return func(*params) - else: - raise Exception('method "%s" is not supported' % method) - -class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler): - """Simple XML-RPC request handler class. - - Handles all HTTP POST requests and attempts to decode them as - XML-RPC requests. - """ - - # Class attribute listing the accessible path components; - # paths not on this list will result in a 404 error. - rpc_paths = ('/', '/RPC2') - - #if not None, encode responses larger than this, if possible - encode_threshold = 1400 #a common MTU - - #Override form StreamRequestHandler: full buffering of output - #and no Nagle. - wbufsize = -1 - disable_nagle_algorithm = True - - # a re to match a gzip Accept-Encoding - aepattern = re.compile(r""" - \s* ([^\s;]+) \s* #content-coding - (;\s* q \s*=\s* ([0-9\.]+))? #q - """, re.VERBOSE | re.IGNORECASE) - - def accept_encodings(self): - r = {} - ae = self.headers.get("Accept-Encoding", "") - for e in ae.split(","): - match = self.aepattern.match(e) - if match: - v = match.group(3) - v = float(v) if v else 1.0 - r[match.group(1)] = v - return r - - def is_rpc_path_valid(self): - if self.rpc_paths: - return self.path in self.rpc_paths - else: - # If .rpc_paths is empty, just assume all paths are legal - return True - - def do_POST(self): - """Handles the HTTP POST request. - - Attempts to interpret all HTTP POST requests as XML-RPC calls, - which are forwarded to the server's _dispatch method for handling. - """ - - # Check that the path is legal - if not self.is_rpc_path_valid(): - self.report_404() - return - - try: - # Get arguments by reading body of request. - # We read this in chunks to avoid straining - # socket.read(); around the 10 or 15Mb mark, some platforms - # begin to have problems (bug #792570). - max_chunk_size = 10*1024*1024 - size_remaining = int(self.headers["content-length"]) - L = [] - while size_remaining: - chunk_size = min(size_remaining, max_chunk_size) - chunk = self.rfile.read(chunk_size) - if not chunk: - break - L.append(chunk) - size_remaining -= len(L[-1]) - data = b''.join(L) - - data = self.decode_request_content(data) - if data is None: - return #response has been sent - - # In previous versions of SimpleXMLRPCServer, _dispatch - # could be overridden in this class, instead of in - # SimpleXMLRPCDispatcher. To maintain backwards compatibility, - # check to see if a subclass implements _dispatch and dispatch - # using that method if present. - response = self.server._marshaled_dispatch( - data, getattr(self, '_dispatch', None), self.path - ) - except Exception as e: # This should only happen if the module is buggy - # internal error, report as HTTP server error - self.send_response(500) - - # Send information about the exception if requested - if hasattr(self.server, '_send_traceback_header') and \ - self.server._send_traceback_header: - self.send_header("X-exception", str(e)) - trace = traceback.format_exc() - trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII') - self.send_header("X-traceback", trace) - - self.send_header("Content-length", "0") - self.end_headers() - else: - self.send_response(200) - self.send_header("Content-type", "text/xml") - if self.encode_threshold is not None: - if len(response) > self.encode_threshold: - q = self.accept_encodings().get("gzip", 0) - if q: - try: - response = gzip_encode(response) - self.send_header("Content-Encoding", "gzip") - except NotImplementedError: - pass - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - - def decode_request_content(self, data): - #support gzip encoding of request - encoding = self.headers.get("content-encoding", "identity").lower() - if encoding == "identity": - return data - if encoding == "gzip": - try: - return gzip_decode(data) - except NotImplementedError: - self.send_response(501, "encoding %r not supported" % encoding) - except ValueError: - self.send_response(400, "error decoding gzip content") - else: - self.send_response(501, "encoding %r not supported" % encoding) - self.send_header("Content-length", "0") - self.end_headers() - - def report_404 (self): - # Report a 404 error - self.send_response(404) - response = b'No such page' - self.send_header("Content-type", "text/plain") - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - - def log_request(self, code='-', size='-'): - """Selectively log an accepted request.""" - - if self.server.logRequests: - BaseHTTPRequestHandler.log_request(self, code, size) - -class SimpleXMLRPCServer(socketserver.TCPServer, - SimpleXMLRPCDispatcher): - """Simple XML-RPC server. - - Simple XML-RPC server that allows functions and a single instance - to be installed to handle requests. The default implementation - attempts to dispatch XML-RPC calls to the functions or instance - installed in the server. Override the _dispatch method inherited - from SimpleXMLRPCDispatcher to change this behavior. - """ - - allow_reuse_address = True - - # Warning: this is for debugging purposes only! Never set this to True in - # production code, as will be sending out sensitive information (exception - # and stack trace details) when exceptions are raised inside - # SimpleXMLRPCRequestHandler.do_POST - _send_traceback_header = False - - def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, - logRequests=True, allow_none=False, encoding=None, - bind_and_activate=True, use_builtin_types=False): - self.logRequests = logRequests - - SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) - socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate) - - # [Bug #1222790] If possible, set close-on-exec flag; if a - # method spawns a subprocess, the subprocess shouldn't have - # the listening socket open. - if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): - flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) - flags |= fcntl.FD_CLOEXEC - fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) - -class MultiPathXMLRPCServer(SimpleXMLRPCServer): - """Multipath XML-RPC Server - This specialization of SimpleXMLRPCServer allows the user to create - multiple Dispatcher instances and assign them to different - HTTP request paths. This makes it possible to run two or more - 'virtual XML-RPC servers' at the same port. - Make sure that the requestHandler accepts the paths in question. - """ - def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, - logRequests=True, allow_none=False, encoding=None, - bind_and_activate=True, use_builtin_types=False): - - SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none, - encoding, bind_and_activate, use_builtin_types) - self.dispatchers = {} - self.allow_none = allow_none - self.encoding = encoding or 'utf-8' - - def add_dispatcher(self, path, dispatcher): - self.dispatchers[path] = dispatcher - return dispatcher - - def get_dispatcher(self, path): - return self.dispatchers[path] - - def _marshaled_dispatch(self, data, dispatch_method = None, path = None): - try: - response = self.dispatchers[path]._marshaled_dispatch( - data, dispatch_method, path) - except: - # report low level exception back to server - # (each dispatcher should have handled their own - # exceptions) - exc_type, exc_value = sys.exc_info()[:2] - response = dumps( - Fault(1, "%s:%s" % (exc_type, exc_value)), - encoding=self.encoding, allow_none=self.allow_none) - response = response.encode(self.encoding) - return response - -class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): - """Simple handler for XML-RPC data passed through CGI.""" - - def __init__(self, allow_none=False, encoding=None, use_builtin_types=False): - SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) - - def handle_xmlrpc(self, request_text): - """Handle a single XML-RPC request""" - - response = self._marshaled_dispatch(request_text) - - print('Content-Type: text/xml') - print('Content-Length: %d' % len(response)) - print() - sys.stdout.flush() - sys.stdout.buffer.write(response) - sys.stdout.buffer.flush() - - def handle_get(self): - """Handle a single HTTP GET request. - - Default implementation indicates an error because - XML-RPC uses the POST method. - """ - - code = 400 - message, explain = BaseHTTPRequestHandler.responses[code] - - response = http_server.DEFAULT_ERROR_MESSAGE % \ - { - 'code' : code, - 'message' : message, - 'explain' : explain - } - response = response.encode('utf-8') - print('Status: %d %s' % (code, message)) - print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE) - print('Content-Length: %d' % len(response)) - print() - sys.stdout.flush() - sys.stdout.buffer.write(response) - sys.stdout.buffer.flush() - - def handle_request(self, request_text=None): - """Handle a single XML-RPC request passed through a CGI post method. - - If no XML data is given then it is read from stdin. The resulting - XML-RPC response is printed to stdout along with the correct HTTP - headers. - """ - - if request_text is None and \ - os.environ.get('REQUEST_METHOD', None) == 'GET': - self.handle_get() - else: - # POST data is normally available through stdin - try: - length = int(os.environ.get('CONTENT_LENGTH', None)) - except (ValueError, TypeError): - length = -1 - if request_text is None: - request_text = sys.stdin.read(length) - - self.handle_xmlrpc(request_text) - - -# ----------------------------------------------------------------------------- -# Self documenting XML-RPC Server. - -class ServerHTMLDoc(pydoc.HTMLDoc): - """Class used to generate pydoc HTML document for a server""" - - def markup(self, text, escape=None, funcs={}, classes={}, methods={}): - """Mark up some plain text, given a context of symbols to look for. - Each context dictionary maps object names to anchor names.""" - escape = escape or self.escape - results = [] - here = 0 - - # XXX Note that this regular expression does not allow for the - # hyperlinking of arbitrary strings being used as method - # names. Only methods with names consisting of word characters - # and '.'s are hyperlinked. - pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|' - r'RFC[- ]?(\d+)|' - r'PEP[- ]?(\d+)|' - r'(self\.)?((?:\w|\.)+))\b') - while 1: - match = pattern.search(text, here) - if not match: break - start, end = match.span() - results.append(escape(text[here:start])) - - all, scheme, rfc, pep, selfdot, name = match.groups() - if scheme: - url = escape(all).replace('"', '"') - results.append('<a href="%s">%s</a>' % (url, url)) - elif rfc: - url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc) - results.append('<a href="%s">%s</a>' % (url, escape(all))) - elif pep: - url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep) - results.append('<a href="%s">%s</a>' % (url, escape(all))) - elif text[end:end+1] == '(': - results.append(self.namelink(name, methods, funcs, classes)) - elif selfdot: - results.append('self.<strong>%s</strong>' % name) - else: - results.append(self.namelink(name, classes)) - here = end - results.append(escape(text[here:])) - return ''.join(results) - - def docroutine(self, object, name, mod=None, - funcs={}, classes={}, methods={}, cl=None): - """Produce HTML documentation for a function or method object.""" - - anchor = (cl and cl.__name__ or '') + '-' + name - note = '' - - title = '<a name="%s"><strong>%s</strong></a>' % ( - self.escape(anchor), self.escape(name)) - - if inspect.ismethod(object): - args = inspect.getfullargspec(object) - # exclude the argument bound to the instance, it will be - # confusing to the non-Python user - argspec = inspect.formatargspec ( - args.args[1:], - args.varargs, - args.varkw, - args.defaults, - annotations=args.annotations, - formatvalue=self.formatvalue - ) - elif inspect.isfunction(object): - args = inspect.getfullargspec(object) - argspec = inspect.formatargspec( - args.args, args.varargs, args.varkw, args.defaults, - annotations=args.annotations, - formatvalue=self.formatvalue) - else: - argspec = '(...)' - - if isinstance(object, tuple): - argspec = object[0] or argspec - docstring = object[1] or "" - else: - docstring = pydoc.getdoc(object) - - decl = title + argspec + (note and self.grey( - '<font face="helvetica, arial">%s</font>' % note)) - - doc = self.markup( - docstring, self.preformat, funcs, classes, methods) - doc = doc and '<dd><tt>%s</tt></dd>' % doc - return '<dl><dt>%s</dt>%s</dl>\n' % (decl, doc) - - def docserver(self, server_name, package_documentation, methods): - """Produce HTML documentation for an XML-RPC server.""" - - fdict = {} - for key, value in methods.items(): - fdict[key] = '#-' + key - fdict[value] = fdict[key] - - server_name = self.escape(server_name) - head = '<big><big><strong>%s</strong></big></big>' % server_name - result = self.heading(head, '#ffffff', '#7799ee') - - doc = self.markup(package_documentation, self.preformat, fdict) - doc = doc and '<tt>%s</tt>' % doc - result = result + '<p>%s</p>\n' % doc - - contents = [] - method_items = sorted(methods.items()) - for key, value in method_items: - contents.append(self.docroutine(value, key, funcs=fdict)) - result = result + self.bigsection( - 'Methods', '#ffffff', '#eeaa77', ''.join(contents)) - - return result - -class XMLRPCDocGenerator(object): - """Generates documentation for an XML-RPC server. - - This class is designed as mix-in and should not - be constructed directly. - """ - - def __init__(self): - # setup variables used for HTML documentation - self.server_name = 'XML-RPC Server Documentation' - self.server_documentation = \ - "This server exports the following methods through the XML-RPC "\ - "protocol." - self.server_title = 'XML-RPC Server Documentation' - - def set_server_title(self, server_title): - """Set the HTML title of the generated server documentation""" - - self.server_title = server_title - - def set_server_name(self, server_name): - """Set the name of the generated HTML server documentation""" - - self.server_name = server_name - - def set_server_documentation(self, server_documentation): - """Set the documentation string for the entire server.""" - - self.server_documentation = server_documentation - - def generate_html_documentation(self): - """generate_html_documentation() => html documentation for the server - - Generates HTML documentation for the server using introspection for - installed functions and instances that do not implement the - _dispatch method. Alternatively, instances can choose to implement - the _get_method_argstring(method_name) method to provide the - argument string used in the documentation and the - _methodHelp(method_name) method to provide the help text used - in the documentation.""" - - methods = {} - - for method_name in self.system_listMethods(): - if method_name in self.funcs: - method = self.funcs[method_name] - elif self.instance is not None: - method_info = [None, None] # argspec, documentation - if hasattr(self.instance, '_get_method_argstring'): - method_info[0] = self.instance._get_method_argstring(method_name) - if hasattr(self.instance, '_methodHelp'): - method_info[1] = self.instance._methodHelp(method_name) - - method_info = tuple(method_info) - if method_info != (None, None): - method = method_info - elif not hasattr(self.instance, '_dispatch'): - try: - method = resolve_dotted_attribute( - self.instance, - method_name - ) - except AttributeError: - method = method_info - else: - method = method_info - else: - assert 0, "Could not find method in self.functions and no "\ - "instance installed" - - methods[method_name] = method - - documenter = ServerHTMLDoc() - documentation = documenter.docserver( - self.server_name, - self.server_documentation, - methods - ) - - return documenter.page(self.server_title, documentation) - -class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): - """XML-RPC and documentation request handler class. - - Handles all HTTP POST requests and attempts to decode them as - XML-RPC requests. - - Handles all HTTP GET requests and interprets them as requests - for documentation. - """ - - def do_GET(self): - """Handles the HTTP GET request. - - Interpret all HTTP GET requests as requests for server - documentation. - """ - # Check that the path is legal - if not self.is_rpc_path_valid(): - self.report_404() - return - - response = self.server.generate_html_documentation().encode('utf-8') - self.send_response(200) - self.send_header("Content-type", "text/html") - self.send_header("Content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - -class DocXMLRPCServer( SimpleXMLRPCServer, - XMLRPCDocGenerator): - """XML-RPC and HTML documentation server. - - Adds the ability to serve server documentation to the capabilities - of SimpleXMLRPCServer. - """ - - def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler, - logRequests=True, allow_none=False, encoding=None, - bind_and_activate=True, use_builtin_types=False): - SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, - allow_none, encoding, bind_and_activate, - use_builtin_types) - XMLRPCDocGenerator.__init__(self) - -class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler, - XMLRPCDocGenerator): - """Handler for XML-RPC data and documentation requests passed through - CGI""" - - def handle_get(self): - """Handles the HTTP GET request. - - Interpret all HTTP GET requests as requests for server - documentation. - """ - - response = self.generate_html_documentation().encode('utf-8') - - print('Content-Type: text/html') - print('Content-Length: %d' % len(response)) - print() - sys.stdout.flush() - sys.stdout.buffer.write(response) - sys.stdout.buffer.flush() - - def __init__(self): - CGIXMLRPCRequestHandler.__init__(self) - XMLRPCDocGenerator.__init__(self) - - -if __name__ == '__main__': - import datetime - - class ExampleService: - def getData(self): - return '42' - - class currentTime: - @staticmethod - def getCurrentTime(): - return datetime.datetime.now() - - server = SimpleXMLRPCServer(("localhost", 8000)) - server.register_function(pow) - server.register_function(lambda x,y: x+y, 'add') - server.register_instance(ExampleService(), allow_dotted_names=True) - server.register_multicall_functions() - print('Serving XML-RPC on localhost port 8000') - print('It is advisable to run this example server within a secure, closed network.') - try: - server.serve_forever() - except KeyboardInterrupt: - print("\nKeyboard interrupt received, exiting.") - server.server_close() - sys.exit(0) diff --git a/contrib/python/future/future/builtins/__init__.py b/contrib/python/future/future/builtins/__init__.py deleted file mode 100644 index 8bc1649d2fd..00000000000 --- a/contrib/python/future/future/builtins/__init__.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -A module that brings in equivalents of the new and modified Python 3 -builtins into Py2. Has no effect on Py3. - -See the docs `here <http://python-future.org/what-else.html>`_ -(``docs/what-else.rst``) for more information. - -""" - -from future.builtins.iterators import (filter, map, zip) -# The isinstance import is no longer needed. We provide it only for -# backward-compatibility with future v0.8.2. It will be removed in future v1.0. -from future.builtins.misc import (ascii, chr, hex, input, isinstance, next, - oct, open, pow, round, super, max, min) -from future.utils import PY3 - -if PY3: - import builtins - bytes = builtins.bytes - dict = builtins.dict - int = builtins.int - list = builtins.list - object = builtins.object - range = builtins.range - str = builtins.str - __all__ = [] -else: - from future.types import (newbytes as bytes, - newdict as dict, - newint as int, - newlist as list, - newobject as object, - newrange as range, - newstr as str) -from future import utils - - -if not utils.PY3: - # We only import names that shadow the builtins on Py2. No other namespace - # pollution on Py2. - - # Only shadow builtins on Py2; no new names - __all__ = ['filter', 'map', 'zip', - 'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow', - 'round', 'super', - 'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min' - ] - -else: - # No namespace pollution on Py3 - __all__ = [] diff --git a/contrib/python/future/future/builtins/disabled.py b/contrib/python/future/future/builtins/disabled.py deleted file mode 100644 index f6d6ea9b80c..00000000000 --- a/contrib/python/future/future/builtins/disabled.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -This disables builtin functions (and one exception class) which are -removed from Python 3.3. - -This module is designed to be used like this:: - - from future.builtins.disabled import * - -This disables the following obsolete Py2 builtin functions:: - - apply, cmp, coerce, execfile, file, input, long, - raw_input, reduce, reload, unicode, xrange - -We don't hack __builtin__, which is very fragile because it contaminates -imported modules too. Instead, we just create new functions with -the same names as the obsolete builtins from Python 2 which raise -NameError exceptions when called. - -Note that both ``input()`` and ``raw_input()`` are among the disabled -functions (in this module). Although ``input()`` exists as a builtin in -Python 3, the Python 2 ``input()`` builtin is unsafe to use because it -can lead to shell injection. Therefore we shadow it by default upon ``from -future.builtins.disabled import *``, in case someone forgets to import our -replacement ``input()`` somehow and expects Python 3 semantics. - -See the ``future.builtins.misc`` module for a working version of -``input`` with Python 3 semantics. - -(Note that callable() is not among the functions disabled; this was -reintroduced into Python 3.2.) - -This exception class is also disabled: - - StandardError - -""" - -from __future__ import division, absolute_import, print_function - -from future import utils - - -OBSOLETE_BUILTINS = ['apply', 'chr', 'cmp', 'coerce', 'execfile', 'file', - 'input', 'long', 'raw_input', 'reduce', 'reload', - 'unicode', 'xrange', 'StandardError'] - - -def disabled_function(name): - ''' - Returns a function that cannot be called - ''' - def disabled(*args, **kwargs): - ''' - A function disabled by the ``future`` module. This function is - no longer a builtin in Python 3. - ''' - raise NameError('obsolete Python 2 builtin {0} is disabled'.format(name)) - return disabled - - -if not utils.PY3: - for fname in OBSOLETE_BUILTINS: - locals()[fname] = disabled_function(fname) - __all__ = OBSOLETE_BUILTINS -else: - __all__ = [] diff --git a/contrib/python/future/future/builtins/iterators.py b/contrib/python/future/future/builtins/iterators.py deleted file mode 100644 index dff651e0f46..00000000000 --- a/contrib/python/future/future/builtins/iterators.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -This module is designed to be used as follows:: - - from future.builtins.iterators import * - -And then, for example:: - - for i in range(10**15): - pass - - for (a, b) in zip(range(10**15), range(-10**15, 0)): - pass - -Note that this is standard Python 3 code, plus some imports that do -nothing on Python 3. - -The iterators this brings in are:: - -- ``range`` -- ``filter`` -- ``map`` -- ``zip`` - -On Python 2, ``range`` is a pure-Python backport of Python 3's ``range`` -iterator with slicing support. The other iterators (``filter``, ``map``, -``zip``) are from the ``itertools`` module on Python 2. On Python 3 these -are available in the module namespace but not exported for * imports via -__all__ (zero no namespace pollution). - -Note that these are also available in the standard library -``future_builtins`` module on Python 2 -- but not Python 3, so using -the standard library version is not portable, nor anywhere near complete. -""" - -from __future__ import division, absolute_import, print_function - -import itertools -from future import utils - -if not utils.PY3: - filter = itertools.ifilter - map = itertools.imap - from future.types import newrange as range - zip = itertools.izip - __all__ = ['filter', 'map', 'range', 'zip'] -else: - import builtins - filter = builtins.filter - map = builtins.map - range = builtins.range - zip = builtins.zip - __all__ = [] diff --git a/contrib/python/future/future/builtins/misc.py b/contrib/python/future/future/builtins/misc.py deleted file mode 100644 index f86ce5f3423..00000000000 --- a/contrib/python/future/future/builtins/misc.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -A module that brings in equivalents of various modified Python 3 builtins -into Py2. Has no effect on Py3. - -The builtin functions are: - -- ``ascii`` (from Py2's future_builtins module) -- ``hex`` (from Py2's future_builtins module) -- ``oct`` (from Py2's future_builtins module) -- ``chr`` (equivalent to ``unichr`` on Py2) -- ``input`` (equivalent to ``raw_input`` on Py2) -- ``next`` (calls ``__next__`` if it exists, else ``next`` method) -- ``open`` (equivalent to io.open on Py2) -- ``super`` (backport of Py3's magic zero-argument super() function -- ``round`` (new "Banker's Rounding" behaviour from Py3) -- ``max`` (new default option from Py3.4) -- ``min`` (new default option from Py3.4) - -``isinstance`` is also currently exported for backwards compatibility -with v0.8.2, although this has been deprecated since v0.9. - - -input() -------- -Like the new ``input()`` function from Python 3 (without eval()), except -that it returns bytes. Equivalent to Python 2's ``raw_input()``. - -Warning: By default, importing this module *removes* the old Python 2 -input() function entirely from ``__builtin__`` for safety. This is -because forgetting to import the new ``input`` from ``future`` might -otherwise lead to a security vulnerability (shell injection) on Python 2. - -To restore it, you can retrieve it yourself from -``__builtin__._old_input``. - -Fortunately, ``input()`` seems to be seldom used in the wild in Python -2... - -""" - -from future import utils - - -if utils.PY2: - from io import open - from future_builtins import ascii, oct, hex - from __builtin__ import unichr as chr, pow as _builtin_pow - import __builtin__ - - # Only for backward compatibility with future v0.8.2: - isinstance = __builtin__.isinstance - - # Warning: Python 2's input() is unsafe and MUST not be able to be used - # accidentally by someone who expects Python 3 semantics but forgets - # to import it on Python 2. Versions of ``future`` prior to 0.11 - # deleted it from __builtin__. Now we keep in __builtin__ but shadow - # the name like all others. Just be sure to import ``input``. - - input = raw_input - - from future.builtins.newnext import newnext as next - from future.builtins.newround import newround as round - from future.builtins.newsuper import newsuper as super - from future.builtins.new_min_max import newmax as max - from future.builtins.new_min_max import newmin as min - from future.types.newint import newint - - _SENTINEL = object() - - def pow(x, y, z=_SENTINEL): - """ - pow(x, y[, z]) -> number - - With two arguments, equivalent to x**y. With three arguments, - equivalent to (x**y) % z, but may be more efficient (e.g. for ints). - """ - # Handle newints - if isinstance(x, newint): - x = long(x) - if isinstance(y, newint): - y = long(y) - if isinstance(z, newint): - z = long(z) - - try: - if z == _SENTINEL: - return _builtin_pow(x, y) - else: - return _builtin_pow(x, y, z) - except ValueError: - if z == _SENTINEL: - return _builtin_pow(x+0j, y) - else: - return _builtin_pow(x+0j, y, z) - - - # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this: - # callable = __builtin__.callable - - __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct', - 'open', 'pow', 'round', 'super', 'max', 'min'] - -else: - import builtins - ascii = builtins.ascii - chr = builtins.chr - hex = builtins.hex - input = builtins.input - next = builtins.next - # Only for backward compatibility with future v0.8.2: - isinstance = builtins.isinstance - oct = builtins.oct - open = builtins.open - pow = builtins.pow - round = builtins.round - super = builtins.super - if utils.PY34_PLUS: - max = builtins.max - min = builtins.min - __all__ = [] - else: - from future.builtins.new_min_max import newmax as max - from future.builtins.new_min_max import newmin as min - __all__ = ['min', 'max'] - - # The callable() function was removed from Py3.0 and 3.1 and - # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever - # did, we'd add this: - # try: - # callable = builtins.callable - # except AttributeError: - # # Definition from Pandas - # def callable(obj): - # return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - # __all__.append('callable') diff --git a/contrib/python/future/future/builtins/new_min_max.py b/contrib/python/future/future/builtins/new_min_max.py deleted file mode 100644 index 6f0c2a86fee..00000000000 --- a/contrib/python/future/future/builtins/new_min_max.py +++ /dev/null @@ -1,59 +0,0 @@ -import itertools - -from future import utils -if utils.PY2: - from __builtin__ import max as _builtin_max, min as _builtin_min -else: - from builtins import max as _builtin_max, min as _builtin_min - -_SENTINEL = object() - - -def newmin(*args, **kwargs): - return new_min_max(_builtin_min, *args, **kwargs) - - -def newmax(*args, **kwargs): - return new_min_max(_builtin_max, *args, **kwargs) - - -def new_min_max(_builtin_func, *args, **kwargs): - """ - To support the argument "default" introduced in python 3.4 for min and max - :param _builtin_func: builtin min or builtin max - :param args: - :param kwargs: - :return: returns the min or max based on the arguments passed - """ - - for key, _ in kwargs.items(): - if key not in set(['key', 'default']): - raise TypeError('Illegal argument %s', key) - - if len(args) == 0: - raise TypeError - - if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL: - raise TypeError - - if len(args) == 1: - iterator = iter(args[0]) - try: - first = next(iterator) - except StopIteration: - if kwargs.get('default', _SENTINEL) is not _SENTINEL: - return kwargs.get('default') - else: - raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__)) - else: - iterator = itertools.chain([first], iterator) - if kwargs.get('key') is not None: - return _builtin_func(iterator, key=kwargs.get('key')) - else: - return _builtin_func(iterator) - - if len(args) > 1: - if kwargs.get('key') is not None: - return _builtin_func(args, key=kwargs.get('key')) - else: - return _builtin_func(args) diff --git a/contrib/python/future/future/builtins/newnext.py b/contrib/python/future/future/builtins/newnext.py deleted file mode 100644 index 097638ac112..00000000000 --- a/contrib/python/future/future/builtins/newnext.py +++ /dev/null @@ -1,70 +0,0 @@ -''' -This module provides a newnext() function in Python 2 that mimics the -behaviour of ``next()`` in Python 3, falling back to Python 2's behaviour for -compatibility if this fails. - -``newnext(iterator)`` calls the iterator's ``__next__()`` method if it exists. If this -doesn't exist, it falls back to calling a ``next()`` method. - -For example: - - >>> class Odds(object): - ... def __init__(self, start=1): - ... self.value = start - 2 - ... def __next__(self): # note the Py3 interface - ... self.value += 2 - ... return self.value - ... def __iter__(self): - ... return self - ... - >>> iterator = Odds() - >>> next(iterator) - 1 - >>> next(iterator) - 3 - -If you are defining your own custom iterator class as above, it is preferable -to explicitly decorate the class with the @implements_iterator decorator from -``future.utils`` as follows: - - >>> @implements_iterator - ... class Odds(object): - ... # etc - ... pass - -This next() function is primarily for consuming iterators defined in Python 3 -code elsewhere that we would like to run on Python 2 or 3. -''' - -_builtin_next = next - -_SENTINEL = object() - -def newnext(iterator, default=_SENTINEL): - """ - next(iterator[, default]) - - Return the next item from the iterator. If default is given and the iterator - is exhausted, it is returned instead of raising StopIteration. - """ - - # args = [] - # if default is not _SENTINEL: - # args.append(default) - try: - try: - return iterator.__next__() - except AttributeError: - try: - return iterator.next() - except AttributeError: - raise TypeError("'{0}' object is not an iterator".format( - iterator.__class__.__name__)) - except StopIteration as e: - if default is _SENTINEL: - raise e - else: - return default - - -__all__ = ['newnext'] diff --git a/contrib/python/future/future/builtins/newround.py b/contrib/python/future/future/builtins/newround.py deleted file mode 100644 index 394a2c63c4b..00000000000 --- a/contrib/python/future/future/builtins/newround.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -``python-future``: pure Python implementation of Python 3 round(). -""" - -from future.utils import PYPY, PY26, bind_method - -# Use the decimal module for simplicity of implementation (and -# hopefully correctness). -from decimal import Decimal, ROUND_HALF_EVEN - - -def newround(number, ndigits=None): - """ - See Python 3 documentation: uses Banker's Rounding. - - Delegates to the __round__ method if for some reason this exists. - - If not, rounds a number to a given precision in decimal digits (default - 0 digits). This returns an int when called with one argument, - otherwise the same type as the number. ndigits may be negative. - - See the test_round method in future/tests/test_builtins.py for - examples. - """ - return_int = False - if ndigits is None: - return_int = True - ndigits = 0 - if hasattr(number, '__round__'): - return number.__round__(ndigits) - - if ndigits < 0: - raise NotImplementedError('negative ndigits not supported yet') - exponent = Decimal('10') ** (-ndigits) - - if PYPY: - # Work around issue #24: round() breaks on PyPy with NumPy's types - if 'numpy' in repr(type(number)): - number = float(number) - - if isinstance(number, Decimal): - d = number - else: - if not PY26: - d = Decimal.from_float(number).quantize(exponent, - rounding=ROUND_HALF_EVEN) - else: - d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN) - - if return_int: - return int(d) - else: - return float(d) - - -### From Python 2.7's decimal.py. Only needed to support Py2.6: - -def from_float_26(f): - """Converts a float to a decimal number, exactly. - - Note that Decimal.from_float(0.1) is not the same as Decimal('0.1'). - Since 0.1 is not exactly representable in binary floating point, the - value is stored as the nearest representable value which is - 0x1.999999999999ap-4. The exact equivalent of the value in decimal - is 0.1000000000000000055511151231257827021181583404541015625. - - >>> Decimal.from_float(0.1) - Decimal('0.1000000000000000055511151231257827021181583404541015625') - >>> Decimal.from_float(float('nan')) - Decimal('NaN') - >>> Decimal.from_float(float('inf')) - Decimal('Infinity') - >>> Decimal.from_float(-float('inf')) - Decimal('-Infinity') - >>> Decimal.from_float(-0.0) - Decimal('-0') - - """ - import math as _math - from decimal import _dec_from_triple # only available on Py2.6 and Py2.7 (not 3.3) - - if isinstance(f, (int, long)): # handle integer inputs - return Decimal(f) - if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float - return Decimal(repr(f)) - if _math.copysign(1.0, f) == 1.0: - sign = 0 - else: - sign = 1 - n, d = abs(f).as_integer_ratio() - # int.bit_length() method doesn't exist on Py2.6: - def bit_length(d): - if d != 0: - return len(bin(abs(d))) - 2 - else: - return 0 - k = bit_length(d) - 1 - result = _dec_from_triple(sign, str(n*5**k), -k) - return result - - -__all__ = ['newround'] diff --git a/contrib/python/future/future/builtins/newsuper.py b/contrib/python/future/future/builtins/newsuper.py deleted file mode 100644 index 5d3402bd2f9..00000000000 --- a/contrib/python/future/future/builtins/newsuper.py +++ /dev/null @@ -1,114 +0,0 @@ -''' -This module provides a newsuper() function in Python 2 that mimics the -behaviour of super() in Python 3. It is designed to be used as follows: - - from __future__ import division, absolute_import, print_function - from future.builtins import super - -And then, for example: - - class VerboseList(list): - def append(self, item): - print('Adding an item') - super().append(item) # new simpler super() function - -Importing this module on Python 3 has no effect. - -This is based on (i.e. almost identical to) Ryan Kelly's magicsuper -module here: - - https://github.com/rfk/magicsuper.git - -Excerpts from Ryan's docstring: - - "Of course, you can still explicitly pass in the arguments if you want - to do something strange. Sometimes you really do want that, e.g. to - skip over some classes in the method resolution order. - - "How does it work? By inspecting the calling frame to determine the - function object being executed and the object on which it's being - called, and then walking the object's __mro__ chain to find out where - that function was defined. Yuck, but it seems to work..." -''' - -from __future__ import absolute_import -import sys -from types import FunctionType - -from future.utils import PY3, PY26 - - -_builtin_super = super - -_SENTINEL = object() - -def newsuper(typ=_SENTINEL, type_or_obj=_SENTINEL, framedepth=1): - '''Like builtin super(), but capable of magic. - - This acts just like the builtin super() function, but if called - without any arguments it attempts to infer them at runtime. - ''' - # Infer the correct call if used without arguments. - if typ is _SENTINEL: - # We'll need to do some frame hacking. - f = sys._getframe(framedepth) - - try: - # Get the function's first positional argument. - type_or_obj = f.f_locals[f.f_code.co_varnames[0]] - except (IndexError, KeyError,): - raise RuntimeError('super() used in a function with no args') - - try: - # Get the MRO so we can crawl it. - mro = type_or_obj.__mro__ - except (AttributeError, RuntimeError): # see issue #160 - try: - mro = type_or_obj.__class__.__mro__ - except AttributeError: - raise RuntimeError('super() used with a non-newstyle class') - - # A ``for...else`` block? Yes! It's odd, but useful. - # If unfamiliar with for...else, see: - # - # http://psung.blogspot.com/2007/12/for-else-in-python.html - for typ in mro: - # Find the class that owns the currently-executing method. - for meth in typ.__dict__.values(): - # Drill down through any wrappers to the underlying func. - # This handles e.g. classmethod() and staticmethod(). - try: - while not isinstance(meth,FunctionType): - if isinstance(meth, property): - # Calling __get__ on the property will invoke - # user code which might throw exceptions or have - # side effects - meth = meth.fget - else: - try: - meth = meth.__func__ - except AttributeError: - meth = meth.__get__(type_or_obj, typ) - except (AttributeError, TypeError): - continue - if meth.func_code is f.f_code: - break # Aha! Found you. - else: - continue # Not found! Move onto the next class in MRO. - break # Found! Break out of the search loop. - else: - raise RuntimeError('super() called outside a method') - - # Dispatch to builtin super(). - if type_or_obj is not _SENTINEL: - return _builtin_super(typ, type_or_obj) - return _builtin_super(typ) - - -def superm(*args, **kwds): - f = sys._getframe(1) - nm = f.f_code.co_name - return getattr(newsuper(framedepth=2),nm)(*args, **kwds) - - -__all__ = ['newsuper'] diff --git a/contrib/python/future/future/moves/__init__.py b/contrib/python/future/future/moves/__init__.py deleted file mode 100644 index 0cd60d3d5c3..00000000000 --- a/contrib/python/future/future/moves/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# future.moves package -from __future__ import absolute_import -import sys -__future_module__ = True -from future.standard_library import import_top_level_modules - -if sys.version_info[0] >= 3: - import_top_level_modules() diff --git a/contrib/python/future/future/moves/_dummy_thread.py b/contrib/python/future/future/moves/_dummy_thread.py deleted file mode 100644 index e5dca348fbd..00000000000 --- a/contrib/python/future/future/moves/_dummy_thread.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - try: - from _dummy_thread import * - except ImportError: - from _thread import * -else: - __future_module__ = True - from dummy_thread import * diff --git a/contrib/python/future/future/moves/_markupbase.py b/contrib/python/future/future/moves/_markupbase.py deleted file mode 100644 index f9fb4bbf283..00000000000 --- a/contrib/python/future/future/moves/_markupbase.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from _markupbase import * -else: - __future_module__ = True - from markupbase import * diff --git a/contrib/python/future/future/moves/_thread.py b/contrib/python/future/future/moves/_thread.py deleted file mode 100644 index c68018bb11f..00000000000 --- a/contrib/python/future/future/moves/_thread.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from _thread import * -else: - __future_module__ = True - from thread import * diff --git a/contrib/python/future/future/moves/builtins.py b/contrib/python/future/future/moves/builtins.py deleted file mode 100644 index e4b6221d592..00000000000 --- a/contrib/python/future/future/moves/builtins.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from builtins import * -else: - __future_module__ = True - from __builtin__ import * - # Overwrite any old definitions with the equivalent future.builtins ones: - from future.builtins import * diff --git a/contrib/python/future/future/moves/collections.py b/contrib/python/future/future/moves/collections.py deleted file mode 100644 index 664ee6a3d00..00000000000 --- a/contrib/python/future/future/moves/collections.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import absolute_import -import sys - -from future.utils import PY2, PY26 -__future_module__ = True - -from collections import * - -if PY2: - from UserDict import UserDict - from UserList import UserList - from UserString import UserString - -if PY26: - from future.backports.misc import OrderedDict, Counter - -if sys.version_info < (3, 3): - from future.backports.misc import ChainMap, _count_elements diff --git a/contrib/python/future/future/moves/configparser.py b/contrib/python/future/future/moves/configparser.py deleted file mode 100644 index 33d9cf9533d..00000000000 --- a/contrib/python/future/future/moves/configparser.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import - -from future.utils import PY2 - -if PY2: - from ConfigParser import * -else: - from configparser import * diff --git a/contrib/python/future/future/moves/copyreg.py b/contrib/python/future/future/moves/copyreg.py deleted file mode 100644 index 9d08cdc5ed6..00000000000 --- a/contrib/python/future/future/moves/copyreg.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - import copyreg, sys - # A "*" import uses Python 3's copyreg.__all__ which does not include - # all public names in the API surface for copyreg, this avoids that - # problem by just making our module _be_ a reference to the actual module. - sys.modules['future.moves.copyreg'] = copyreg -else: - __future_module__ = True - from copy_reg import * diff --git a/contrib/python/future/future/moves/dbm/__init__.py b/contrib/python/future/future/moves/dbm/__init__.py deleted file mode 100644 index 626b406f7fe..00000000000 --- a/contrib/python/future/future/moves/dbm/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from dbm import * -else: - __future_module__ = True - from whichdb import * - from anydbm import * - -# Py3.3's dbm/__init__.py imports ndbm but doesn't expose it via __all__. -# In case some (badly written) code depends on dbm.ndbm after import dbm, -# we simulate this: -if PY3: - from dbm import ndbm -else: - try: - from future.moves.dbm import ndbm - except ImportError: - ndbm = None diff --git a/contrib/python/future/future/moves/dbm/dumb.py b/contrib/python/future/future/moves/dbm/dumb.py deleted file mode 100644 index 528383f6d83..00000000000 --- a/contrib/python/future/future/moves/dbm/dumb.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import - -from future.utils import PY3 - -if PY3: - from dbm.dumb import * -else: - __future_module__ = True - from dumbdbm import * diff --git a/contrib/python/future/future/moves/dbm/ndbm.py b/contrib/python/future/future/moves/dbm/ndbm.py deleted file mode 100644 index 8c6fff8ab7d..00000000000 --- a/contrib/python/future/future/moves/dbm/ndbm.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import - -from future.utils import PY3 - -if PY3: - from dbm.ndbm import * -else: - __future_module__ = True - from dbm import * diff --git a/contrib/python/future/future/moves/html/__init__.py b/contrib/python/future/future/moves/html/__init__.py deleted file mode 100644 index 22ed6e7d2cb..00000000000 --- a/contrib/python/future/future/moves/html/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 -__future_module__ = True - -if PY3: - from html import * -else: - # cgi.escape isn't good enough for the single Py3.3 html test to pass. - # Define it inline here instead. From the Py3.4 stdlib. Note that the - # html.escape() function from the Py3.3 stdlib is not suitable for use on - # Py2.x. - """ - General functions for HTML manipulation. - """ - - def escape(s, quote=True): - """ - Replace special characters "&", "<" and ">" to HTML-safe sequences. - If the optional flag quote is true (the default), the quotation mark - characters, both double quote (") and single quote (') characters are also - translated. - """ - s = s.replace("&", "&") # Must be done first! - s = s.replace("<", "<") - s = s.replace(">", ">") - if quote: - s = s.replace('"', """) - s = s.replace('\'', "'") - return s - - __all__ = ['escape'] diff --git a/contrib/python/future/future/moves/html/entities.py b/contrib/python/future/future/moves/html/entities.py deleted file mode 100644 index 56a88609112..00000000000 --- a/contrib/python/future/future/moves/html/entities.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from html.entities import * -else: - __future_module__ = True - from htmlentitydefs import * diff --git a/contrib/python/future/future/moves/html/parser.py b/contrib/python/future/future/moves/html/parser.py deleted file mode 100644 index a6115b59f08..00000000000 --- a/contrib/python/future/future/moves/html/parser.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 -__future_module__ = True - -if PY3: - from html.parser import * -else: - from HTMLParser import * diff --git a/contrib/python/future/future/moves/http/__init__.py b/contrib/python/future/future/moves/http/__init__.py deleted file mode 100644 index 917b3d71ac9..00000000000 --- a/contrib/python/future/future/moves/http/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from future.utils import PY3 - -if not PY3: - __future_module__ = True diff --git a/contrib/python/future/future/moves/http/client.py b/contrib/python/future/future/moves/http/client.py deleted file mode 100644 index 55f9c9c1ae1..00000000000 --- a/contrib/python/future/future/moves/http/client.py +++ /dev/null @@ -1,8 +0,0 @@ -from future.utils import PY3 - -if PY3: - from http.client import * -else: - from httplib import * - from httplib import HTTPMessage - __future_module__ = True diff --git a/contrib/python/future/future/moves/http/cookiejar.py b/contrib/python/future/future/moves/http/cookiejar.py deleted file mode 100644 index ea00df7720f..00000000000 --- a/contrib/python/future/future/moves/http/cookiejar.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from http.cookiejar import * -else: - __future_module__ = True - from cookielib import * diff --git a/contrib/python/future/future/moves/http/cookies.py b/contrib/python/future/future/moves/http/cookies.py deleted file mode 100644 index 1b74fe2dd7e..00000000000 --- a/contrib/python/future/future/moves/http/cookies.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from http.cookies import * -else: - __future_module__ = True - from Cookie import * - from Cookie import Morsel # left out of __all__ on Py2.7! diff --git a/contrib/python/future/future/moves/http/server.py b/contrib/python/future/future/moves/http/server.py deleted file mode 100644 index 4e75cc1dec0..00000000000 --- a/contrib/python/future/future/moves/http/server.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from http.server import * -else: - __future_module__ = True - from BaseHTTPServer import * - from CGIHTTPServer import * - from SimpleHTTPServer import * - try: - from CGIHTTPServer import _url_collapse_path # needed for a test - except ImportError: - try: - # Python 2.7.0 to 2.7.3 - from CGIHTTPServer import ( - _url_collapse_path_split as _url_collapse_path) - except ImportError: - # Doesn't exist on Python 2.6.x. Ignore it. - pass diff --git a/contrib/python/future/future/moves/itertools.py b/contrib/python/future/future/moves/itertools.py deleted file mode 100644 index e5eb20d5d59..00000000000 --- a/contrib/python/future/future/moves/itertools.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import - -from itertools import * -try: - zip_longest = izip_longest - filterfalse = ifilterfalse -except NameError: - pass diff --git a/contrib/python/future/future/moves/pickle.py b/contrib/python/future/future/moves/pickle.py deleted file mode 100644 index c53d693925d..00000000000 --- a/contrib/python/future/future/moves/pickle.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from pickle import * -else: - __future_module__ = True - try: - from cPickle import * - except ImportError: - from pickle import * diff --git a/contrib/python/future/future/moves/queue.py b/contrib/python/future/future/moves/queue.py deleted file mode 100644 index 1cb1437d748..00000000000 --- a/contrib/python/future/future/moves/queue.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from queue import * -else: - __future_module__ = True - from Queue import * diff --git a/contrib/python/future/future/moves/reprlib.py b/contrib/python/future/future/moves/reprlib.py deleted file mode 100644 index a313a13a49f..00000000000 --- a/contrib/python/future/future/moves/reprlib.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from reprlib import * -else: - __future_module__ = True - from repr import * diff --git a/contrib/python/future/future/moves/socketserver.py b/contrib/python/future/future/moves/socketserver.py deleted file mode 100644 index 062e0848deb..00000000000 --- a/contrib/python/future/future/moves/socketserver.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from socketserver import * -else: - __future_module__ = True - from SocketServer import * diff --git a/contrib/python/future/future/moves/subprocess.py b/contrib/python/future/future/moves/subprocess.py deleted file mode 100644 index 43ffd2ac235..00000000000 --- a/contrib/python/future/future/moves/subprocess.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY2, PY26 - -from subprocess import * - -if PY2: - __future_module__ = True - from commands import getoutput, getstatusoutput - -if PY26: - from future.backports.misc import check_output diff --git a/contrib/python/future/future/moves/sys.py b/contrib/python/future/future/moves/sys.py deleted file mode 100644 index 1293bcb070e..00000000000 --- a/contrib/python/future/future/moves/sys.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import - -from future.utils import PY2 - -from sys import * - -if PY2: - from __builtin__ import intern diff --git a/contrib/python/future/future/moves/urllib/__init__.py b/contrib/python/future/future/moves/urllib/__init__.py deleted file mode 100644 index 5cf428b6ec4..00000000000 --- a/contrib/python/future/future/moves/urllib/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if not PY3: - __future_module__ = True diff --git a/contrib/python/future/future/moves/urllib/error.py b/contrib/python/future/future/moves/urllib/error.py deleted file mode 100644 index 7d8ada73f8f..00000000000 --- a/contrib/python/future/future/moves/urllib/error.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import absolute_import -from future.standard_library import suspend_hooks - -from future.utils import PY3 - -if PY3: - from urllib.error import * -else: - __future_module__ = True - - # We use this method to get at the original Py2 urllib before any renaming magic - # ContentTooShortError = sys.py2_modules['urllib'].ContentTooShortError - - with suspend_hooks(): - from urllib import ContentTooShortError - from urllib2 import URLError, HTTPError diff --git a/contrib/python/future/future/moves/urllib/parse.py b/contrib/python/future/future/moves/urllib/parse.py deleted file mode 100644 index 9074b8163f6..00000000000 --- a/contrib/python/future/future/moves/urllib/parse.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import absolute_import -from future.standard_library import suspend_hooks - -from future.utils import PY3 - -if PY3: - from urllib.parse import * -else: - __future_module__ = True - from urlparse import (ParseResult, SplitResult, parse_qs, parse_qsl, - urldefrag, urljoin, urlparse, urlsplit, - urlunparse, urlunsplit) - - # we use this method to get at the original py2 urllib before any renaming - # quote = sys.py2_modules['urllib'].quote - # quote_plus = sys.py2_modules['urllib'].quote_plus - # unquote = sys.py2_modules['urllib'].unquote - # unquote_plus = sys.py2_modules['urllib'].unquote_plus - # urlencode = sys.py2_modules['urllib'].urlencode - # splitquery = sys.py2_modules['urllib'].splitquery - - with suspend_hooks(): - from urllib import (quote, - quote_plus, - unquote, - unquote_plus, - urlencode, - splitquery) diff --git a/contrib/python/future/future/moves/urllib/request.py b/contrib/python/future/future/moves/urllib/request.py deleted file mode 100644 index 972aa4ab5da..00000000000 --- a/contrib/python/future/future/moves/urllib/request.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import absolute_import - -from future.standard_library import suspend_hooks -from future.utils import PY3 - -if PY3: - from urllib.request import * - # This aren't in __all__: - from urllib.request import (getproxies, - pathname2url, - proxy_bypass, - quote, - request_host, - thishost, - unquote, - url2pathname, - urlcleanup, - urljoin, - urlopen, - urlparse, - urlretrieve, - urlsplit, - urlunparse) - - from urllib.parse import (splitattr, - splithost, - splitpasswd, - splitport, - splitquery, - splittag, - splittype, - splituser, - splitvalue, - to_bytes, - unwrap) -else: - __future_module__ = True - with suspend_hooks(): - from urllib import * - from urllib2 import * - from urlparse import * - - # Rename: - from urllib import toBytes # missing from __all__ on Py2.6 - to_bytes = toBytes - - # from urllib import (pathname2url, - # url2pathname, - # getproxies, - # urlretrieve, - # urlcleanup, - # URLopener, - # FancyURLopener, - # proxy_bypass) - - # from urllib2 import ( - # AbstractBasicAuthHandler, - # AbstractDigestAuthHandler, - # BaseHandler, - # CacheFTPHandler, - # FileHandler, - # FTPHandler, - # HTTPBasicAuthHandler, - # HTTPCookieProcessor, - # HTTPDefaultErrorHandler, - # HTTPDigestAuthHandler, - # HTTPErrorProcessor, - # HTTPHandler, - # HTTPPasswordMgr, - # HTTPPasswordMgrWithDefaultRealm, - # HTTPRedirectHandler, - # HTTPSHandler, - # URLError, - # build_opener, - # install_opener, - # OpenerDirector, - # ProxyBasicAuthHandler, - # ProxyDigestAuthHandler, - # ProxyHandler, - # Request, - # UnknownHandler, - # urlopen, - # ) - - # from urlparse import ( - # urldefrag - # urljoin, - # urlparse, - # urlunparse, - # urlsplit, - # urlunsplit, - # parse_qs, - # parse_q" - # ) diff --git a/contrib/python/future/future/moves/urllib/response.py b/contrib/python/future/future/moves/urllib/response.py deleted file mode 100644 index a287ae2833f..00000000000 --- a/contrib/python/future/future/moves/urllib/response.py +++ /dev/null @@ -1,12 +0,0 @@ -from future import standard_library -from future.utils import PY3 - -if PY3: - from urllib.response import * -else: - __future_module__ = True - with standard_library.suspend_hooks(): - from urllib import (addbase, - addclosehook, - addinfo, - addinfourl) diff --git a/contrib/python/future/future/moves/urllib/robotparser.py b/contrib/python/future/future/moves/urllib/robotparser.py deleted file mode 100644 index 0dc8f5715cd..00000000000 --- a/contrib/python/future/future/moves/urllib/robotparser.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from urllib.robotparser import * -else: - __future_module__ = True - from robotparser import * diff --git a/contrib/python/future/future/moves/winreg.py b/contrib/python/future/future/moves/winreg.py deleted file mode 100644 index c8b147568c4..00000000000 --- a/contrib/python/future/future/moves/winreg.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from winreg import * -else: - __future_module__ = True - from _winreg import * diff --git a/contrib/python/future/future/moves/xmlrpc/__init__.py b/contrib/python/future/future/moves/xmlrpc/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/future/moves/xmlrpc/__init__.py +++ /dev/null diff --git a/contrib/python/future/future/moves/xmlrpc/client.py b/contrib/python/future/future/moves/xmlrpc/client.py deleted file mode 100644 index 4708cf89921..00000000000 --- a/contrib/python/future/future/moves/xmlrpc/client.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from xmlrpc.client import * -else: - from xmlrpclib import * diff --git a/contrib/python/future/future/moves/xmlrpc/server.py b/contrib/python/future/future/moves/xmlrpc/server.py deleted file mode 100644 index 1a8af3454bd..00000000000 --- a/contrib/python/future/future/moves/xmlrpc/server.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from xmlrpc.server import * -else: - from xmlrpclib import * diff --git a/contrib/python/future/future/standard_library/__init__.py b/contrib/python/future/future/standard_library/__init__.py deleted file mode 100644 index 41c4f36df25..00000000000 --- a/contrib/python/future/future/standard_library/__init__.py +++ /dev/null @@ -1,815 +0,0 @@ -""" -Python 3 reorganized the standard library (PEP 3108). This module exposes -several standard library modules to Python 2 under their new Python 3 -names. - -It is designed to be used as follows:: - - from future import standard_library - standard_library.install_aliases() - -And then these normal Py3 imports work on both Py3 and Py2:: - - import builtins - import copyreg - import queue - import reprlib - import socketserver - import winreg # on Windows only - import test.support - import html, html.parser, html.entites - import http, http.client, http.server - import http.cookies, http.cookiejar - import urllib.parse, urllib.request, urllib.response, urllib.error, urllib.robotparser - import xmlrpc.client, xmlrpc.server - - import _thread - import _dummy_thread - import _markupbase - - from itertools import filterfalse, zip_longest - from sys import intern - from collections import UserDict, UserList, UserString - from collections import OrderedDict, Counter, ChainMap # even on Py2.6 - from subprocess import getoutput, getstatusoutput - from subprocess import check_output # even on Py2.6 - -(The renamed modules and functions are still available under their old -names on Python 2.) - -This is a cleaner alternative to this idiom (see -http://docs.pythonsprints.com/python3_porting/py-porting.html):: - - try: - import queue - except ImportError: - import Queue as queue - - -Limitations ------------ -We don't currently support these modules, but would like to:: - - import dbm - import dbm.dumb - import dbm.gnu - import collections.abc # on Py33 - import pickle # should (optionally) bring in cPickle on Python 2 - -""" - -from __future__ import absolute_import, division, print_function - -import sys -import logging -import imp -import contextlib -import types -import copy -import os - -# Make a dedicated logger; leave the root logger to be configured -# by the application. -flog = logging.getLogger('future_stdlib') -_formatter = logging.Formatter(logging.BASIC_FORMAT) -_handler = logging.StreamHandler() -_handler.setFormatter(_formatter) -flog.addHandler(_handler) -flog.setLevel(logging.WARN) - -from future.utils import PY2, PY3 - -# The modules that are defined under the same names on Py3 but with -# different contents in a significant way (e.g. submodules) are: -# pickle (fast one) -# dbm -# urllib -# test -# email - -REPLACED_MODULES = set(['test', 'urllib', 'pickle', 'dbm']) # add email and dbm when we support it - -# The following module names are not present in Python 2.x, so they cause no -# potential clashes between the old and new names: -# http -# html -# tkinter -# xmlrpc -# Keys: Py2 / real module names -# Values: Py3 / simulated module names -RENAMES = { - # 'cStringIO': 'io', # there's a new io module in Python 2.6 - # that provides StringIO and BytesIO - # 'StringIO': 'io', # ditto - # 'cPickle': 'pickle', - '__builtin__': 'builtins', - 'copy_reg': 'copyreg', - 'Queue': 'queue', - 'future.moves.socketserver': 'socketserver', - 'ConfigParser': 'configparser', - 'repr': 'reprlib', - # 'FileDialog': 'tkinter.filedialog', - # 'tkFileDialog': 'tkinter.filedialog', - # 'SimpleDialog': 'tkinter.simpledialog', - # 'tkSimpleDialog': 'tkinter.simpledialog', - # 'tkColorChooser': 'tkinter.colorchooser', - # 'tkCommonDialog': 'tkinter.commondialog', - # 'Dialog': 'tkinter.dialog', - # 'Tkdnd': 'tkinter.dnd', - # 'tkFont': 'tkinter.font', - # 'tkMessageBox': 'tkinter.messagebox', - # 'ScrolledText': 'tkinter.scrolledtext', - # 'Tkconstants': 'tkinter.constants', - # 'Tix': 'tkinter.tix', - # 'ttk': 'tkinter.ttk', - # 'Tkinter': 'tkinter', - '_winreg': 'winreg', - 'thread': '_thread', - 'dummy_thread': '_dummy_thread' if sys.version_info < (3, 9) else '_thread', - # 'anydbm': 'dbm', # causes infinite import loop - # 'whichdb': 'dbm', # causes infinite import loop - # anydbm and whichdb are handled by fix_imports2 - # 'dbhash': 'dbm.bsd', - # 'dumbdbm': 'dbm.dumb', - # 'dbm': 'dbm.ndbm', - # 'gdbm': 'dbm.gnu', - 'future.moves.xmlrpc': 'xmlrpc', - # 'future.backports.email': 'email', # for use by urllib - # 'DocXMLRPCServer': 'xmlrpc.server', - # 'SimpleXMLRPCServer': 'xmlrpc.server', - # 'httplib': 'http.client', - # 'htmlentitydefs' : 'html.entities', - # 'HTMLParser' : 'html.parser', - # 'Cookie': 'http.cookies', - # 'cookielib': 'http.cookiejar', - # 'BaseHTTPServer': 'http.server', - # 'SimpleHTTPServer': 'http.server', - # 'CGIHTTPServer': 'http.server', - # 'future.backports.test': 'test', # primarily for renaming test_support to support - # 'commands': 'subprocess', - # 'urlparse' : 'urllib.parse', - # 'robotparser' : 'urllib.robotparser', - # 'abc': 'collections.abc', # for Py33 - # 'future.utils.six.moves.html': 'html', - # 'future.utils.six.moves.http': 'http', - 'future.moves.html': 'html', - 'future.moves.http': 'http', - # 'future.backports.urllib': 'urllib', - # 'future.utils.six.moves.urllib': 'urllib', - 'future.moves._markupbase': '_markupbase', - } - - -# It is complicated and apparently brittle to mess around with the -# ``sys.modules`` cache in order to support "import urllib" meaning two -# different things (Py2.7 urllib and backported Py3.3-like urllib) in different -# contexts. So we require explicit imports for these modules. -assert len(set(RENAMES.values()) & set(REPLACED_MODULES)) == 0 - - -# Harmless renames that we can insert. -# These modules need names from elsewhere being added to them: -# subprocess: should provide getoutput and other fns from commands -# module but these fns are missing: getstatus, mk2arg, -# mkarg -# re: needs an ASCII constant that works compatibly with Py3 - -# etc: see lib2to3/fixes/fix_imports.py - -# (New module name, new object name, old module name, old object name) -MOVES = [('collections', 'UserList', 'UserList', 'UserList'), - ('collections', 'UserDict', 'UserDict', 'UserDict'), - ('collections', 'UserString','UserString', 'UserString'), - ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), - ('itertools', 'filterfalse','itertools', 'ifilterfalse'), - ('itertools', 'zip_longest','itertools', 'izip_longest'), - ('sys', 'intern','__builtin__', 'intern'), - # The re module has no ASCII flag in Py2, but this is the default. - # Set re.ASCII to a zero constant. stat.ST_MODE just happens to be one - # (and it exists on Py2.6+). - ('re', 'ASCII','stat', 'ST_MODE'), - ('base64', 'encodebytes','base64', 'encodestring'), - ('base64', 'decodebytes','base64', 'decodestring'), - ('subprocess', 'getoutput', 'commands', 'getoutput'), - ('subprocess', 'getstatusoutput', 'commands', 'getstatusoutput'), - ('subprocess', 'check_output', 'future.backports.misc', 'check_output'), - ('math', 'ceil', 'future.backports.misc', 'ceil'), - ('collections', 'OrderedDict', 'future.backports.misc', 'OrderedDict'), - ('collections', 'Counter', 'future.backports.misc', 'Counter'), - ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), - ('itertools', 'count', 'future.backports.misc', 'count'), - ('reprlib', 'recursive_repr', 'future.backports.misc', 'recursive_repr'), - ('functools', 'cmp_to_key', 'future.backports.misc', 'cmp_to_key'), - -# This is no use, since "import urllib.request" etc. still fails: -# ('urllib', 'error', 'future.moves.urllib', 'error'), -# ('urllib', 'parse', 'future.moves.urllib', 'parse'), -# ('urllib', 'request', 'future.moves.urllib', 'request'), -# ('urllib', 'response', 'future.moves.urllib', 'response'), -# ('urllib', 'robotparser', 'future.moves.urllib', 'robotparser'), - ] - - -# A minimal example of an import hook: -# class WarnOnImport(object): -# def __init__(self, *args): -# self.module_names = args -# -# def find_module(self, fullname, path=None): -# if fullname in self.module_names: -# self.path = path -# return self -# return None -# -# def load_module(self, name): -# if name in sys.modules: -# return sys.modules[name] -# module_info = imp.find_module(name, self.path) -# module = imp.load_module(name, *module_info) -# sys.modules[name] = module -# flog.warning("Imported deprecated module %s", name) -# return module - - -class RenameImport(object): - """ - A class for import hooks mapping Py3 module names etc. to the Py2 equivalents. - """ - # Different RenameImport classes are created when importing this module from - # different source files. This causes isinstance(hook, RenameImport) checks - # to produce inconsistent results. We add this RENAMER attribute here so - # remove_hooks() and install_hooks() can find instances of these classes - # easily: - RENAMER = True - - def __init__(self, old_to_new): - ''' - Pass in a dictionary-like object mapping from old names to new - names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'} - ''' - self.old_to_new = old_to_new - both = set(old_to_new.keys()) & set(old_to_new.values()) - assert (len(both) == 0 and - len(set(old_to_new.values())) == len(old_to_new.values())), \ - 'Ambiguity in renaming (handler not implemented)' - self.new_to_old = dict((new, old) for (old, new) in old_to_new.items()) - - def find_module(self, fullname, path=None): - # Handles hierarchical importing: package.module.module2 - new_base_names = set([s.split('.')[0] for s in self.new_to_old]) - # Before v0.12: Was: if fullname in set(self.old_to_new) | new_base_names: - if fullname in new_base_names: - return self - return None - - def load_module(self, name): - path = None - if name in sys.modules: - return sys.modules[name] - elif name in self.new_to_old: - # New name. Look up the corresponding old (Py2) name: - oldname = self.new_to_old[name] - module = self._find_and_load_module(oldname) - # module.__future_module__ = True - else: - module = self._find_and_load_module(name) - # In any case, make it available under the requested (Py3) name - sys.modules[name] = module - return module - - def _find_and_load_module(self, name, path=None): - """ - Finds and loads it. But if there's a . in the name, handles it - properly. - """ - bits = name.split('.') - while len(bits) > 1: - # Treat the first bit as a package - packagename = bits.pop(0) - package = self._find_and_load_module(packagename, path) - try: - path = package.__path__ - except AttributeError: - # This could be e.g. moves. - flog.debug('Package {0} has no __path__.'.format(package)) - if name in sys.modules: - return sys.modules[name] - flog.debug('What to do here?') - - name = bits[0] - module_info = imp.find_module(name, path) - return imp.load_module(name, *module_info) - - -class hooks(object): - """ - Acts as a context manager. Saves the state of sys.modules and restores it - after the 'with' block. - - Use like this: - - >>> from future import standard_library - >>> with standard_library.hooks(): - ... import http.client - >>> import requests - - For this to work, http.client will be scrubbed from sys.modules after the - 'with' block. That way the modules imported in the 'with' block will - continue to be accessible in the current namespace but not from any - imported modules (like requests). - """ - def __enter__(self): - # flog.debug('Entering hooks context manager') - self.old_sys_modules = copy.copy(sys.modules) - self.hooks_were_installed = detect_hooks() - # self.scrubbed = scrub_py2_sys_modules() - install_hooks() - return self - - def __exit__(self, *args): - # flog.debug('Exiting hooks context manager') - # restore_sys_modules(self.scrubbed) - if not self.hooks_were_installed: - remove_hooks() - # scrub_future_sys_modules() - -# Sanity check for is_py2_stdlib_module(): We aren't replacing any -# builtin modules names: -if PY2: - assert len(set(RENAMES.values()) & set(sys.builtin_module_names)) == 0 - - -def is_py2_stdlib_module(m): - """ - Tries to infer whether the module m is from the Python 2 standard library. - This may not be reliable on all systems. - """ - if PY3: - return False - if not 'stdlib_path' in is_py2_stdlib_module.__dict__: - stdlib_files = [contextlib.__file__, os.__file__, copy.__file__] - stdlib_paths = [os.path.split(f)[0] for f in stdlib_files] - if not len(set(stdlib_paths)) == 1: - # This seems to happen on travis-ci.org. Very strange. We'll try to - # ignore it. - flog.warn('Multiple locations found for the Python standard ' - 'library: %s' % stdlib_paths) - # Choose the first one arbitrarily - is_py2_stdlib_module.stdlib_path = stdlib_paths[0] - - if m.__name__ in sys.builtin_module_names: - return True - - if hasattr(m, '__file__'): - modpath = os.path.split(m.__file__) - if (modpath[0].startswith(is_py2_stdlib_module.stdlib_path) and - 'site-packages' not in modpath[0]): - return True - - return False - - -def scrub_py2_sys_modules(): - """ - Removes any Python 2 standard library modules from ``sys.modules`` that - would interfere with Py3-style imports using import hooks. Examples are - modules with the same names (like urllib or email). - - (Note that currently import hooks are disabled for modules like these - with ambiguous names anyway ...) - """ - if PY3: - return {} - scrubbed = {} - for modulename in REPLACED_MODULES & set(RENAMES.keys()): - if not modulename in sys.modules: - continue - - module = sys.modules[modulename] - - if is_py2_stdlib_module(module): - flog.debug('Deleting (Py2) {} from sys.modules'.format(modulename)) - scrubbed[modulename] = sys.modules[modulename] - del sys.modules[modulename] - return scrubbed - - -def scrub_future_sys_modules(): - """ - Deprecated. - """ - return {} - -class suspend_hooks(object): - """ - Acts as a context manager. Use like this: - - >>> from future import standard_library - >>> standard_library.install_hooks() - >>> import http.client - >>> # ... - >>> with standard_library.suspend_hooks(): - >>> import requests # incompatible with ``future``'s standard library hooks - - If the hooks were disabled before the context, they are not installed when - the context is left. - """ - def __enter__(self): - self.hooks_were_installed = detect_hooks() - remove_hooks() - # self.scrubbed = scrub_future_sys_modules() - return self - - def __exit__(self, *args): - if self.hooks_were_installed: - install_hooks() - # restore_sys_modules(self.scrubbed) - - -def restore_sys_modules(scrubbed): - """ - Add any previously scrubbed modules back to the sys.modules cache, - but only if it's safe to do so. - """ - clash = set(sys.modules) & set(scrubbed) - if len(clash) != 0: - # If several, choose one arbitrarily to raise an exception about - first = list(clash)[0] - raise ImportError('future module {} clashes with Py2 module' - .format(first)) - sys.modules.update(scrubbed) - - -def install_aliases(): - """ - Monkey-patches the standard library in Py2.6/7 to provide - aliases for better Py3 compatibility. - """ - if PY3: - return - # if hasattr(install_aliases, 'run_already'): - # return - for (newmodname, newobjname, oldmodname, oldobjname) in MOVES: - __import__(newmodname) - # We look up the module in sys.modules because __import__ just returns the - # top-level package: - newmod = sys.modules[newmodname] - # newmod.__future_module__ = True - - __import__(oldmodname) - oldmod = sys.modules[oldmodname] - - obj = getattr(oldmod, oldobjname) - setattr(newmod, newobjname, obj) - - # Hack for urllib so it appears to have the same structure on Py2 as on Py3 - import urllib - from future.backports.urllib import request - from future.backports.urllib import response - from future.backports.urllib import parse - from future.backports.urllib import error - from future.backports.urllib import robotparser - urllib.request = request - urllib.response = response - urllib.parse = parse - urllib.error = error - urllib.robotparser = robotparser - sys.modules['urllib.request'] = request - sys.modules['urllib.response'] = response - sys.modules['urllib.parse'] = parse - sys.modules['urllib.error'] = error - sys.modules['urllib.robotparser'] = robotparser - - # Patch the test module so it appears to have the same structure on Py2 as on Py3 - try: - import test - except ImportError: - pass - try: - from future.moves.test import support - except ImportError: - pass - else: - test.support = support - sys.modules['test.support'] = support - - # Patch the dbm module so it appears to have the same structure on Py2 as on Py3 - try: - import dbm - except ImportError: - pass - else: - from future.moves.dbm import dumb - dbm.dumb = dumb - sys.modules['dbm.dumb'] = dumb - try: - from future.moves.dbm import gnu - except ImportError: - pass - else: - dbm.gnu = gnu - sys.modules['dbm.gnu'] = gnu - try: - from future.moves.dbm import ndbm - except ImportError: - pass - else: - dbm.ndbm = ndbm - sys.modules['dbm.ndbm'] = ndbm - - # install_aliases.run_already = True - - -def install_hooks(): - """ - This function installs the future.standard_library import hook into - sys.meta_path. - """ - if PY3: - return - - install_aliases() - - flog.debug('sys.meta_path was: {0}'.format(sys.meta_path)) - flog.debug('Installing hooks ...') - - # Add it unless it's there already - newhook = RenameImport(RENAMES) - if not detect_hooks(): - sys.meta_path.append(newhook) - flog.debug('sys.meta_path is now: {0}'.format(sys.meta_path)) - - -def enable_hooks(): - """ - Deprecated. Use install_hooks() instead. This will be removed by - ``future`` v1.0. - """ - install_hooks() - - -def remove_hooks(scrub_sys_modules=False): - """ - This function removes the import hook from sys.meta_path. - """ - if PY3: - return - flog.debug('Uninstalling hooks ...') - # Loop backwards, so deleting items keeps the ordering: - for i, hook in list(enumerate(sys.meta_path))[::-1]: - if hasattr(hook, 'RENAMER'): - del sys.meta_path[i] - - # Explicit is better than implicit. In the future the interface should - # probably change so that scrubbing the import hooks requires a separate - # function call. Left as is for now for backward compatibility with - # v0.11.x. - if scrub_sys_modules: - scrub_future_sys_modules() - - -def disable_hooks(): - """ - Deprecated. Use remove_hooks() instead. This will be removed by - ``future`` v1.0. - """ - remove_hooks() - - -def detect_hooks(): - """ - Returns True if the import hooks are installed, False if not. - """ - flog.debug('Detecting hooks ...') - present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path]) - if present: - flog.debug('Detected.') - else: - flog.debug('Not detected.') - return present - - -# As of v0.12, this no longer happens implicitly: -# if not PY3: -# install_hooks() - - -if not hasattr(sys, 'py2_modules'): - sys.py2_modules = {} - -def cache_py2_modules(): - """ - Currently this function is unneeded, as we are not attempting to provide import hooks - for modules with ambiguous names: email, urllib, pickle. - """ - if len(sys.py2_modules) != 0: - return - assert not detect_hooks() - import urllib - sys.py2_modules['urllib'] = urllib - - import email - sys.py2_modules['email'] = email - - import pickle - sys.py2_modules['pickle'] = pickle - - # Not all Python installations have test module. (Anaconda doesn't, for example.) - # try: - # import test - # except ImportError: - # sys.py2_modules['test'] = None - # sys.py2_modules['test'] = test - - # import dbm - # sys.py2_modules['dbm'] = dbm - - -def import_(module_name, backport=False): - """ - Pass a (potentially dotted) module name of a Python 3 standard library - module. This function imports the module compatibly on Py2 and Py3 and - returns the top-level module. - - Example use: - >>> http = import_('http.client') - >>> http = import_('http.server') - >>> urllib = import_('urllib.request') - - Then: - >>> conn = http.client.HTTPConnection(...) - >>> response = urllib.request.urlopen('http://mywebsite.com') - >>> # etc. - - Use as follows: - >>> package_name = import_(module_name) - - On Py3, equivalent to this: - - >>> import module_name - - On Py2, equivalent to this if backport=False: - - >>> from future.moves import module_name - - or to this if backport=True: - - >>> from future.backports import module_name - - except that it also handles dotted module names such as ``http.client`` - The effect then is like this: - - >>> from future.backports import module - >>> from future.backports.module import submodule - >>> module.submodule = submodule - - Note that this would be a SyntaxError in Python: - - >>> from future.backports import http.client - - """ - # Python 2.6 doesn't have importlib in the stdlib, so it requires - # the backported ``importlib`` package from PyPI as a dependency to use - # this function: - import importlib - - if PY3: - return __import__(module_name) - else: - # client.blah = blah - # Then http.client = client - # etc. - if backport: - prefix = 'future.backports' - else: - prefix = 'future.moves' - parts = prefix.split('.') + module_name.split('.') - - modules = [] - for i, part in enumerate(parts): - sofar = '.'.join(parts[:i+1]) - modules.append(importlib.import_module(sofar)) - for i, part in reversed(list(enumerate(parts))): - if i == 0: - break - setattr(modules[i-1], part, modules[i]) - - # Return the next-most top-level module after future.backports / future.moves: - return modules[2] - - -def from_import(module_name, *symbol_names, **kwargs): - """ - Example use: - >>> HTTPConnection = from_import('http.client', 'HTTPConnection') - >>> HTTPServer = from_import('http.server', 'HTTPServer') - >>> urlopen, urlparse = from_import('urllib.request', 'urlopen', 'urlparse') - - Equivalent to this on Py3: - - >>> from module_name import symbol_names[0], symbol_names[1], ... - - and this on Py2: - - >>> from future.moves.module_name import symbol_names[0], ... - - or: - - >>> from future.backports.module_name import symbol_names[0], ... - - except that it also handles dotted module names such as ``http.client``. - """ - - if PY3: - return __import__(module_name) - else: - if 'backport' in kwargs and bool(kwargs['backport']): - prefix = 'future.backports' - else: - prefix = 'future.moves' - parts = prefix.split('.') + module_name.split('.') - module = importlib.import_module(prefix + '.' + module_name) - output = [getattr(module, name) for name in symbol_names] - if len(output) == 1: - return output[0] - else: - return output - - -class exclude_local_folder_imports(object): - """ - A context-manager that prevents standard library modules like configparser - from being imported from the local python-future source folder on Py3. - - (This was need prior to v0.16.0 because the presence of a configparser - folder would otherwise have prevented setuptools from running on Py3. Maybe - it's not needed any more?) - """ - def __init__(self, *args): - assert len(args) > 0 - self.module_names = args - # Disallow dotted module names like http.client: - if any(['.' in m for m in self.module_names]): - raise NotImplementedError('Dotted module names are not supported') - - def __enter__(self): - self.old_sys_path = copy.copy(sys.path) - self.old_sys_modules = copy.copy(sys.modules) - if sys.version_info[0] < 3: - return - # The presence of all these indicates we've found our source folder, - # because `builtins` won't have been installed in site-packages by setup.py: - FUTURE_SOURCE_SUBFOLDERS = ['future', 'past', 'libfuturize', 'libpasteurize', 'builtins'] - - # Look for the future source folder: - for folder in self.old_sys_path: - if all([os.path.exists(os.path.join(folder, subfolder)) - for subfolder in FUTURE_SOURCE_SUBFOLDERS]): - # Found it. Remove it. - sys.path.remove(folder) - - # Ensure we import the system module: - for m in self.module_names: - # Delete the module and any submodules from sys.modules: - # for key in list(sys.modules): - # if key == m or key.startswith(m + '.'): - # try: - # del sys.modules[key] - # except KeyError: - # pass - try: - module = __import__(m, level=0) - except ImportError: - # There's a problem importing the system module. E.g. the - # winreg module is not available except on Windows. - pass - - def __exit__(self, *args): - # Restore sys.path and sys.modules: - sys.path = self.old_sys_path - for m in set(self.old_sys_modules.keys()) - set(sys.modules.keys()): - sys.modules[m] = self.old_sys_modules[m] - -TOP_LEVEL_MODULES = ['builtins', - 'copyreg', - 'html', - 'http', - 'queue', - 'reprlib', - 'socketserver', - 'test', - 'tkinter', - 'winreg', - 'xmlrpc', - '_dummy_thread', - '_markupbase', - '_thread', - ] - -def import_top_level_modules(): - with exclude_local_folder_imports(*TOP_LEVEL_MODULES): - for m in TOP_LEVEL_MODULES: - try: - __import__(m) - except ImportError: # e.g. winreg - pass diff --git a/contrib/python/future/future/tests/__init__.py b/contrib/python/future/future/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/future/tests/__init__.py +++ /dev/null diff --git a/contrib/python/future/future/tests/base.py b/contrib/python/future/future/tests/base.py deleted file mode 100644 index 4ef437baa64..00000000000 --- a/contrib/python/future/future/tests/base.py +++ /dev/null @@ -1,539 +0,0 @@ -from __future__ import print_function, absolute_import -import os -import tempfile -import unittest -import sys -import re -import warnings -import io -from textwrap import dedent - -from future.utils import bind_method, PY26, PY3, PY2, PY27 -from future.moves.subprocess import check_output, STDOUT, CalledProcessError - -if PY26: - import unittest2 as unittest - - -def reformat_code(code): - """ - Removes any leading \n and dedents. - """ - if code.startswith('\n'): - code = code[1:] - return dedent(code) - - -def order_future_lines(code): - """ - Returns the code block with any ``__future__`` import lines sorted, and - then any ``future`` import lines sorted, then any ``builtins`` import lines - sorted. - - This only sorts the lines within the expected blocks. - - See test_order_future_lines() for an example. - """ - - # We need .splitlines(keepends=True), which doesn't exist on Py2, - # so we use this instead: - lines = code.split('\n') - - uufuture_line_numbers = [i for i, line in enumerate(lines) - if line.startswith('from __future__ import ')] - - future_line_numbers = [i for i, line in enumerate(lines) - if line.startswith('from future') - or line.startswith('from past')] - - builtins_line_numbers = [i for i, line in enumerate(lines) - if line.startswith('from builtins')] - - assert code.lstrip() == code, ('internal usage error: ' - 'dedent the code before calling order_future_lines()') - - def mymax(numbers): - return max(numbers) if len(numbers) > 0 else 0 - - def mymin(numbers): - return min(numbers) if len(numbers) > 0 else float('inf') - - assert mymax(uufuture_line_numbers) <= mymin(future_line_numbers), \ - 'the __future__ and future imports are out of order' - - # assert mymax(future_line_numbers) <= mymin(builtins_line_numbers), \ - # 'the future and builtins imports are out of order' - - uul = sorted([lines[i] for i in uufuture_line_numbers]) - sorted_uufuture_lines = dict(zip(uufuture_line_numbers, uul)) - - fl = sorted([lines[i] for i in future_line_numbers]) - sorted_future_lines = dict(zip(future_line_numbers, fl)) - - bl = sorted([lines[i] for i in builtins_line_numbers]) - sorted_builtins_lines = dict(zip(builtins_line_numbers, bl)) - - # Replace the old unsorted "from __future__ import ..." lines with the - # new sorted ones: - new_lines = [] - for i in range(len(lines)): - if i in uufuture_line_numbers: - new_lines.append(sorted_uufuture_lines[i]) - elif i in future_line_numbers: - new_lines.append(sorted_future_lines[i]) - elif i in builtins_line_numbers: - new_lines.append(sorted_builtins_lines[i]) - else: - new_lines.append(lines[i]) - return '\n'.join(new_lines) - - -class VerboseCalledProcessError(CalledProcessError): - """ - Like CalledProcessError, but it displays more information (message and - script output) for diagnosing test failures etc. - """ - def __init__(self, msg, returncode, cmd, output=None): - self.msg = msg - self.returncode = returncode - self.cmd = cmd - self.output = output - - def __str__(self): - return ("Command '%s' failed with exit status %d\nMessage: %s\nOutput: %s" - % (self.cmd, self.returncode, self.msg, self.output)) - -class FuturizeError(VerboseCalledProcessError): - pass - -class PasteurizeError(VerboseCalledProcessError): - pass - - -class CodeHandler(unittest.TestCase): - """ - Handy mixin for test classes for writing / reading / futurizing / - running .py files in the test suite. - """ - def setUp(self): - """ - The outputs from the various futurize stages should have the - following headers: - """ - # After stage1: - # TODO: use this form after implementing a fixer to consolidate - # __future__ imports into a single line: - # self.headers1 = """ - # from __future__ import absolute_import, division, print_function - # """ - self.headers1 = reformat_code(""" - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - """) - - # After stage2 --all-imports: - # TODO: use this form after implementing a fixer to consolidate - # __future__ imports into a single line: - # self.headers2 = """ - # from __future__ import (absolute_import, division, - # print_function, unicode_literals) - # from future import standard_library - # from future.builtins import * - # """ - self.headers2 = reformat_code(""" - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - from __future__ import unicode_literals - from future import standard_library - standard_library.install_aliases() - from builtins import * - """) - self.interpreters = [sys.executable] - self.tempdir = tempfile.mkdtemp() + os.path.sep - pypath = os.getenv('PYTHONPATH') - if pypath: - self.env = {'PYTHONPATH': os.getcwd() + os.pathsep + pypath} - else: - self.env = {'PYTHONPATH': os.getcwd()} - - def convert(self, code, stages=(1, 2), all_imports=False, from3=False, - reformat=True, run=True, conservative=False): - """ - Converts the code block using ``futurize`` and returns the - resulting code. - - Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or - ``stage2`` to ``futurize``. Passing both stages runs ``futurize`` - with both stages by default. - - If from3 is False, runs ``futurize``, converting from Python 2 to - both 2 and 3. If from3 is True, runs ``pasteurize`` to convert - from Python 3 to both 2 and 3. - - Optionally reformats the code block first using the reformat() function. - - If run is True, runs the resulting code under all Python - interpreters in self.interpreters. - """ - if reformat: - code = reformat_code(code) - self._write_test_script(code) - self._futurize_test_script(stages=stages, all_imports=all_imports, - from3=from3, conservative=conservative) - output = self._read_test_script() - if run: - for interpreter in self.interpreters: - _ = self._run_test_script(interpreter=interpreter) - return output - - def compare(self, output, expected, ignore_imports=True): - """ - Compares whether the code blocks are equal. If not, raises an - exception so the test fails. Ignores any trailing whitespace like - blank lines. - - If ignore_imports is True, passes the code blocks into the - strip_future_imports method. - - If one code block is a unicode string and the other a - byte-string, it assumes the byte-string is encoded as utf-8. - """ - if ignore_imports: - output = self.strip_future_imports(output) - expected = self.strip_future_imports(expected) - if isinstance(output, bytes) and not isinstance(expected, bytes): - output = output.decode('utf-8') - if isinstance(expected, bytes) and not isinstance(output, bytes): - expected = expected.decode('utf-8') - self.assertEqual(order_future_lines(output.rstrip()), - expected.rstrip()) - - def strip_future_imports(self, code): - """ - Strips any of these import lines: - - from __future__ import <anything> - from future <anything> - from future.<anything> - from builtins <anything> - - or any line containing: - install_hooks() - or: - install_aliases() - - Limitation: doesn't handle imports split across multiple lines like - this: - - from __future__ import (absolute_import, division, print_function, - unicode_literals) - """ - output = [] - # We need .splitlines(keepends=True), which doesn't exist on Py2, - # so we use this instead: - for line in code.split('\n'): - if not (line.startswith('from __future__ import ') - or line.startswith('from future ') - or line.startswith('from builtins ') - or 'install_hooks()' in line - or 'install_aliases()' in line - # but don't match "from future_builtins" :) - or line.startswith('from future.')): - output.append(line) - return '\n'.join(output) - - def convert_check(self, before, expected, stages=(1, 2), all_imports=False, - ignore_imports=True, from3=False, run=True, - conservative=False): - """ - Convenience method that calls convert() and compare(). - - Reformats the code blocks automatically using the reformat_code() - function. - - If all_imports is passed, we add the appropriate import headers - for the stage(s) selected to the ``expected`` code-block, so they - needn't appear repeatedly in the test code. - - If ignore_imports is True, ignores the presence of any lines - beginning: - - from __future__ import ... - from future import ... - - for the purpose of the comparison. - """ - output = self.convert(before, stages=stages, all_imports=all_imports, - from3=from3, run=run, conservative=conservative) - if all_imports: - headers = self.headers2 if 2 in stages else self.headers1 - else: - headers = '' - - reformatted = reformat_code(expected) - if headers in reformatted: - headers = '' - - self.compare(output, headers + reformatted, - ignore_imports=ignore_imports) - - def unchanged(self, code, **kwargs): - """ - Convenience method to ensure the code is unchanged by the - futurize process. - """ - self.convert_check(code, code, **kwargs) - - def _write_test_script(self, code, filename='mytestscript.py'): - """ - Dedents the given code (a multiline string) and writes it out to - a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py. - """ - if isinstance(code, bytes): - code = code.decode('utf-8') - # Be explicit about encoding the temp file as UTF-8 (issue #63): - with io.open(self.tempdir + filename, 'wt', encoding='utf-8') as f: - f.write(dedent(code)) - - def _read_test_script(self, filename='mytestscript.py'): - with io.open(self.tempdir + filename, 'rt', encoding='utf-8') as f: - newsource = f.read() - return newsource - - def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), - all_imports=False, from3=False, - conservative=False): - params = [] - stages = list(stages) - if all_imports: - params.append('--all-imports') - if from3: - script = 'pasteurize.py' - else: - script = 'futurize.py' - if stages == [1]: - params.append('--stage1') - elif stages == [2]: - params.append('--stage2') - else: - assert stages == [1, 2] - if conservative: - params.append('--conservative') - # No extra params needed - - # Absolute file path: - fn = self.tempdir + filename - call_args = [sys.executable, script] + params + ['-w', fn] - try: - output = check_output(call_args, stderr=STDOUT, env=self.env) - except CalledProcessError as e: - with open(fn) as f: - msg = ( - 'Error running the command %s\n' - '%s\n' - 'Contents of file %s:\n' - '\n' - '%s') % ( - ' '.join(call_args), - 'env=%s' % self.env, - fn, - '----\n%s\n----' % f.read(), - ) - ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) - - if not hasattr(e, 'output'): - # The attribute CalledProcessError.output doesn't exist on Py2.6 - e.output = None - raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) - return output - - def _run_test_script(self, filename='mytestscript.py', - interpreter=sys.executable): - # Absolute file path: - fn = self.tempdir + filename - try: - output = check_output([interpreter, fn], - env=self.env, stderr=STDOUT) - except CalledProcessError as e: - with open(fn) as f: - msg = ( - 'Error running the command %s\n' - '%s\n' - 'Contents of file %s:\n' - '\n' - '%s') % ( - ' '.join([interpreter, fn]), - 'env=%s' % self.env, - fn, - '----\n%s\n----' % f.read(), - ) - if not hasattr(e, 'output'): - # The attribute CalledProcessError.output doesn't exist on Py2.6 - e.output = None - raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) - return output - - -# Decorator to skip some tests on Python 2.6 ... -skip26 = unittest.skipIf(PY26, "this test is known to fail on Py2.6") - - -def expectedFailurePY3(func): - if not PY3: - return func - return unittest.expectedFailure(func) - -def expectedFailurePY26(func): - if not PY26: - return func - return unittest.expectedFailure(func) - - -def expectedFailurePY27(func): - if not PY27: - return func - return unittest.expectedFailure(func) - - -def expectedFailurePY2(func): - if not PY2: - return func - return unittest.expectedFailure(func) - - -# Renamed in Py3.3: -if not hasattr(unittest.TestCase, 'assertRaisesRegex'): - unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp - -# From Py3.3: -def assertRegex(self, text, expected_regex, msg=None): - """Fail the test unless the text matches the regular expression.""" - if isinstance(expected_regex, (str, unicode)): - assert expected_regex, "expected_regex must not be empty." - expected_regex = re.compile(expected_regex) - if not expected_regex.search(text): - msg = msg or "Regex didn't match" - msg = '%s: %r not found in %r' % (msg, expected_regex.pattern, text) - raise self.failureException(msg) - -if not hasattr(unittest.TestCase, 'assertRegex'): - bind_method(unittest.TestCase, 'assertRegex', assertRegex) - -class _AssertRaisesBaseContext(object): - - def __init__(self, expected, test_case, callable_obj=None, - expected_regex=None): - self.expected = expected - self.test_case = test_case - if callable_obj is not None: - try: - self.obj_name = callable_obj.__name__ - except AttributeError: - self.obj_name = str(callable_obj) - else: - self.obj_name = None - if isinstance(expected_regex, (bytes, str)): - expected_regex = re.compile(expected_regex) - self.expected_regex = expected_regex - self.msg = None - - def _raiseFailure(self, standardMsg): - msg = self.test_case._formatMessage(self.msg, standardMsg) - raise self.test_case.failureException(msg) - - def handle(self, name, callable_obj, args, kwargs): - """ - If callable_obj is None, assertRaises/Warns is being used as a - context manager, so check for a 'msg' kwarg and return self. - If callable_obj is not None, call it passing args and kwargs. - """ - if callable_obj is None: - self.msg = kwargs.pop('msg', None) - return self - with self: - callable_obj(*args, **kwargs) - -class _AssertWarnsContext(_AssertRaisesBaseContext): - """A context manager used to implement TestCase.assertWarns* methods.""" - - def __enter__(self): - # The __warningregistry__'s need to be in a pristine state for tests - # to work properly. - for v in sys.modules.values(): - if getattr(v, '__warningregistry__', None): - v.__warningregistry__ = {} - self.warnings_manager = warnings.catch_warnings(record=True) - self.warnings = self.warnings_manager.__enter__() - warnings.simplefilter("always", self.expected) - return self - - def __exit__(self, exc_type, exc_value, tb): - self.warnings_manager.__exit__(exc_type, exc_value, tb) - if exc_type is not None: - # let unexpected exceptions pass through - return - try: - exc_name = self.expected.__name__ - except AttributeError: - exc_name = str(self.expected) - first_matching = None - for m in self.warnings: - w = m.message - if not isinstance(w, self.expected): - continue - if first_matching is None: - first_matching = w - if (self.expected_regex is not None and - not self.expected_regex.search(str(w))): - continue - # store warning for later retrieval - self.warning = w - self.filename = m.filename - self.lineno = m.lineno - return - # Now we simply try to choose a helpful failure message - if first_matching is not None: - self._raiseFailure('"{}" does not match "{}"'.format( - self.expected_regex.pattern, str(first_matching))) - if self.obj_name: - self._raiseFailure("{} not triggered by {}".format(exc_name, - self.obj_name)) - else: - self._raiseFailure("{} not triggered".format(exc_name)) - - -def assertWarns(self, expected_warning, callable_obj=None, *args, **kwargs): - """Fail unless a warning of class warnClass is triggered - by callable_obj when invoked with arguments args and keyword - arguments kwargs. If a different type of warning is - triggered, it will not be handled: depending on the other - warning filtering rules in effect, it might be silenced, printed - out, or raised as an exception. - - If called with callable_obj omitted or None, will return a - context object used like this:: - - with self.assertWarns(SomeWarning): - do_something() - - An optional keyword argument 'msg' can be provided when assertWarns - is used as a context object. - - The context manager keeps a reference to the first matching - warning as the 'warning' attribute; similarly, the 'filename' - and 'lineno' attributes give you information about the line - of Python code from which the warning was triggered. - This allows you to inspect the warning after the assertion:: - - with self.assertWarns(SomeWarning) as cm: - do_something() - the_warning = cm.warning - self.assertEqual(the_warning.some_attribute, 147) - """ - context = _AssertWarnsContext(expected_warning, self, callable_obj) - return context.handle('assertWarns', callable_obj, args, kwargs) - -if not hasattr(unittest.TestCase, 'assertWarns'): - bind_method(unittest.TestCase, 'assertWarns', assertWarns) diff --git a/contrib/python/future/future/types/__init__.py b/contrib/python/future/future/types/__init__.py deleted file mode 100644 index 062507703eb..00000000000 --- a/contrib/python/future/future/types/__init__.py +++ /dev/null @@ -1,257 +0,0 @@ -""" -This module contains backports the data types that were significantly changed -in the transition from Python 2 to Python 3. - -- an implementation of Python 3's bytes object (pure Python subclass of - Python 2's builtin 8-bit str type) -- an implementation of Python 3's str object (pure Python subclass of - Python 2's builtin unicode type) -- a backport of the range iterator from Py3 with slicing support - -It is used as follows:: - - from __future__ import division, absolute_import, print_function - from builtins import bytes, dict, int, range, str - -to bring in the new semantics for these functions from Python 3. And -then, for example:: - - b = bytes(b'ABCD') - assert list(b) == [65, 66, 67, 68] - assert repr(b) == "b'ABCD'" - assert [65, 66] in b - - # These raise TypeErrors: - # b + u'EFGH' - # b.split(u'B') - # bytes(b',').join([u'Fred', u'Bill']) - - - s = str(u'ABCD') - - # These raise TypeErrors: - # s.join([b'Fred', b'Bill']) - # s.startswith(b'A') - # b'B' in s - # s.find(b'A') - # s.replace(u'A', b'a') - - # This raises an AttributeError: - # s.decode('utf-8') - - assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix) - - - for i in range(10**11)[:10]: - pass - -and:: - - class VerboseList(list): - def append(self, item): - print('Adding an item') - super().append(item) # new simpler super() function - -For more information: ---------------------- - -- future.types.newbytes -- future.types.newdict -- future.types.newint -- future.types.newobject -- future.types.newrange -- future.types.newstr - - -Notes -===== - -range() -------- -``range`` is a custom class that backports the slicing behaviour from -Python 3 (based on the ``xrange`` module by Dan Crosta). See the -``newrange`` module docstring for more details. - - -super() -------- -``super()`` is based on Ryan Kelly's ``magicsuper`` module. See the -``newsuper`` module docstring for more details. - - -round() -------- -Python 3 modifies the behaviour of ``round()`` to use "Banker's Rounding". -See http://stackoverflow.com/a/10825998. See the ``newround`` module -docstring for more details. - -""" - -from __future__ import absolute_import, division, print_function - -import functools -from numbers import Integral - -from future import utils - - -# Some utility functions to enforce strict type-separation of unicode str and -# bytes: -def disallow_types(argnums, disallowed_types): - """ - A decorator that raises a TypeError if any of the given numbered - arguments is of the corresponding given type (e.g. bytes or unicode - string). - - For example: - - @disallow_types([0, 1], [unicode, bytes]) - def f(a, b): - pass - - raises a TypeError when f is called if a unicode object is passed as - `a` or a bytes object is passed as `b`. - - This also skips over keyword arguments, so - - @disallow_types([0, 1], [unicode, bytes]) - def g(a, b=None): - pass - - doesn't raise an exception if g is called with only one argument a, - e.g.: - - g(b'Byte string') - - Example use: - - >>> class newbytes(object): - ... @disallow_types([1], [unicode]) - ... def __add__(self, other): - ... pass - - >>> newbytes('1234') + u'1234' #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ... - TypeError: can't concat 'bytes' to (unicode) str - """ - - def decorator(function): - - @functools.wraps(function) - def wrapper(*args, **kwargs): - # These imports are just for this decorator, and are defined here - # to prevent circular imports: - from .newbytes import newbytes - from .newint import newint - from .newstr import newstr - - errmsg = "argument can't be {0}" - for (argnum, mytype) in zip(argnums, disallowed_types): - # Handle the case where the type is passed as a string like 'newbytes'. - if isinstance(mytype, str) or isinstance(mytype, bytes): - mytype = locals()[mytype] - - # Only restrict kw args only if they are passed: - if len(args) <= argnum: - break - - # Here we use type() rather than isinstance() because - # __instancecheck__ is being overridden. E.g. - # isinstance(b'abc', newbytes) is True on Py2. - if type(args[argnum]) == mytype: - raise TypeError(errmsg.format(mytype)) - - return function(*args, **kwargs) - return wrapper - return decorator - - -def no(mytype, argnums=(1,)): - """ - A shortcut for the disallow_types decorator that disallows only one type - (in any position in argnums). - - Example use: - - >>> class newstr(object): - ... @no('bytes') - ... def __add__(self, other): - ... pass - - >>> newstr(u'1234') + b'1234' #doctest: +IGNORE_EXCEPTION_DETAIL - Traceback (most recent call last): - ... - TypeError: argument can't be bytes - - The object can also be passed directly, but passing the string helps - to prevent circular import problems. - """ - if isinstance(argnums, Integral): - argnums = (argnums,) - disallowed_types = [mytype] * len(argnums) - return disallow_types(argnums, disallowed_types) - - -def issubset(list1, list2): - """ - Examples: - - >>> issubset([], [65, 66, 67]) - True - >>> issubset([65], [65, 66, 67]) - True - >>> issubset([65, 66], [65, 66, 67]) - True - >>> issubset([65, 67], [65, 66, 67]) - False - """ - n = len(list1) - for startpos in range(len(list2) - n + 1): - if list2[startpos:startpos+n] == list1: - return True - return False - - -if utils.PY3: - import builtins - bytes = builtins.bytes - dict = builtins.dict - int = builtins.int - list = builtins.list - object = builtins.object - range = builtins.range - str = builtins.str - - # The identity mapping - newtypes = {bytes: bytes, - dict: dict, - int: int, - list: list, - object: object, - range: range, - str: str} - - __all__ = ['newtypes'] - -else: - - from .newbytes import newbytes - from .newdict import newdict - from .newint import newint - from .newlist import newlist - from .newrange import newrange - from .newobject import newobject - from .newstr import newstr - - newtypes = {bytes: newbytes, - dict: newdict, - int: newint, - long: newint, - list: newlist, - object: newobject, - range: newrange, - str: newbytes, - unicode: newstr} - - __all__ = ['newbytes', 'newdict', 'newint', 'newlist', 'newrange', 'newstr', 'newtypes'] diff --git a/contrib/python/future/future/types/newbytes.py b/contrib/python/future/future/types/newbytes.py deleted file mode 100644 index c9d584a7cad..00000000000 --- a/contrib/python/future/future/types/newbytes.py +++ /dev/null @@ -1,460 +0,0 @@ -""" -Pure-Python implementation of a Python 3-like bytes object for Python 2. - -Why do this? Without it, the Python 2 bytes object is a very, very -different beast to the Python 3 bytes object. -""" - -from numbers import Integral -import string -import copy - -from future.utils import istext, isbytes, PY2, PY3, with_metaclass -from future.types import no, issubset -from future.types.newobject import newobject - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - - -_builtin_bytes = bytes - -if PY3: - # We'll probably never use newstr on Py3 anyway... - unicode = str - - -class BaseNewBytes(type): - def __instancecheck__(cls, instance): - if cls == newbytes: - return isinstance(instance, _builtin_bytes) - else: - return issubclass(instance.__class__, cls) - - -def _newchr(x): - if isinstance(x, str): # this happens on pypy - return x.encode('ascii') - else: - return chr(x) - - -class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)): - """ - A backport of the Python 3 bytes object to Py2 - """ - def __new__(cls, *args, **kwargs): - """ - From the Py3 bytes docstring: - - bytes(iterable_of_ints) -> bytes - bytes(string, encoding[, errors]) -> bytes - bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer - bytes(int) -> bytes object of size given by the parameter initialized with null bytes - bytes() -> empty bytes object - - Construct an immutable array of bytes from: - - an iterable yielding integers in range(256) - - a text string encoded using the specified encoding - - any object implementing the buffer API. - - an integer - """ - - encoding = None - errors = None - - if len(args) == 0: - return super(newbytes, cls).__new__(cls) - elif len(args) >= 2: - args = list(args) - if len(args) == 3: - errors = args.pop() - encoding=args.pop() - # Was: elif isinstance(args[0], newbytes): - # We use type() instead of the above because we're redefining - # this to be True for all unicode string subclasses. Warning: - # This may render newstr un-subclassable. - if type(args[0]) == newbytes: - # Special-case: for consistency with Py3.3, we return the same object - # (with the same id) if a newbytes object is passed into the - # newbytes constructor. - return args[0] - elif isinstance(args[0], _builtin_bytes): - value = args[0] - elif isinstance(args[0], unicode): - try: - if 'encoding' in kwargs: - assert encoding is None - encoding = kwargs['encoding'] - if 'errors' in kwargs: - assert errors is None - errors = kwargs['errors'] - except AssertionError: - raise TypeError('Argument given by name and position') - if encoding is None: - raise TypeError('unicode string argument without an encoding') - ### - # Was: value = args[0].encode(**kwargs) - # Python 2.6 string encode() method doesn't take kwargs: - # Use this instead: - newargs = [encoding] - if errors is not None: - newargs.append(errors) - value = args[0].encode(*newargs) - ### - elif hasattr(args[0], '__bytes__'): - value = args[0].__bytes__() - elif isinstance(args[0], Iterable): - if len(args[0]) == 0: - # This could be an empty list or tuple. Return b'' as on Py3. - value = b'' - else: - # Was: elif len(args[0])>0 and isinstance(args[0][0], Integral): - # # It's a list of integers - # But then we can't index into e.g. frozensets. Try to proceed - # anyway. - try: - value = bytearray([_newchr(x) for x in args[0]]) - except: - raise ValueError('bytes must be in range(0, 256)') - elif isinstance(args[0], Integral): - if args[0] < 0: - raise ValueError('negative count') - value = b'\x00' * args[0] - else: - value = args[0] - if type(value) == newbytes: - # Above we use type(...) rather than isinstance(...) because the - # newbytes metaclass overrides __instancecheck__. - # oldbytes(value) gives the wrong thing on Py2: the same - # result as str(value) on Py3, e.g. "b'abc'". (Issue #193). - # So we handle this case separately: - return copy.copy(value) - else: - return super(newbytes, cls).__new__(cls, value) - - def __repr__(self): - return 'b' + super(newbytes, self).__repr__() - - def __str__(self): - return 'b' + "'{0}'".format(super(newbytes, self).__str__()) - - def __getitem__(self, y): - value = super(newbytes, self).__getitem__(y) - if isinstance(y, Integral): - return ord(value) - else: - return newbytes(value) - - def __getslice__(self, *args): - return self.__getitem__(slice(*args)) - - def __contains__(self, key): - if isinstance(key, int): - newbyteskey = newbytes([key]) - # Don't use isinstance() here because we only want to catch - # newbytes, not Python 2 str: - elif type(key) == newbytes: - newbyteskey = key - else: - newbyteskey = newbytes(key) - return issubset(list(newbyteskey), list(self)) - - @no(unicode) - def __add__(self, other): - return newbytes(super(newbytes, self).__add__(other)) - - @no(unicode) - def __radd__(self, left): - return newbytes(left) + self - - @no(unicode) - def __mul__(self, other): - return newbytes(super(newbytes, self).__mul__(other)) - - @no(unicode) - def __rmul__(self, other): - return newbytes(super(newbytes, self).__rmul__(other)) - - def __mod__(self, vals): - if isinstance(vals, newbytes): - vals = _builtin_bytes.__str__(vals) - - elif isinstance(vals, tuple): - newvals = [] - for v in vals: - if isinstance(v, newbytes): - v = _builtin_bytes.__str__(v) - newvals.append(v) - vals = tuple(newvals) - - elif (hasattr(vals.__class__, '__getitem__') and - hasattr(vals.__class__, 'iteritems')): - for k, v in vals.iteritems(): - if isinstance(v, newbytes): - vals[k] = _builtin_bytes.__str__(v) - - return _builtin_bytes.__mod__(self, vals) - - def __imod__(self, other): - return self.__mod__(other) - - def join(self, iterable_of_bytes): - errmsg = 'sequence item {0}: expected bytes, {1} found' - if isbytes(iterable_of_bytes) or istext(iterable_of_bytes): - raise TypeError(errmsg.format(0, type(iterable_of_bytes))) - for i, item in enumerate(iterable_of_bytes): - if istext(item): - raise TypeError(errmsg.format(i, type(item))) - return newbytes(super(newbytes, self).join(iterable_of_bytes)) - - @classmethod - def fromhex(cls, string): - # Only on Py2: - return cls(string.replace(' ', '').decode('hex')) - - @no(unicode) - def find(self, sub, *args): - return super(newbytes, self).find(sub, *args) - - @no(unicode) - def rfind(self, sub, *args): - return super(newbytes, self).rfind(sub, *args) - - @no(unicode, (1, 2)) - def replace(self, old, new, *args): - return newbytes(super(newbytes, self).replace(old, new, *args)) - - def encode(self, *args): - raise AttributeError("encode method has been disabled in newbytes") - - def decode(self, encoding='utf-8', errors='strict'): - """ - Returns a newstr (i.e. unicode subclass) - - Decode B using the codec registered for encoding. Default encoding - is 'utf-8'. errors may be given to set a different error - handling scheme. Default is 'strict' meaning that encoding errors raise - a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' - as well as any other name registered with codecs.register_error that is - able to handle UnicodeDecodeErrors. - """ - # Py2 str.encode() takes encoding and errors as optional parameter, - # not keyword arguments as in Python 3 str. - - from future.types.newstr import newstr - - if errors == 'surrogateescape': - from future.utils.surrogateescape import register_surrogateescape - register_surrogateescape() - - return newstr(super(newbytes, self).decode(encoding, errors)) - - # This is currently broken: - # # We implement surrogateescape error handling here in addition rather - # # than relying on the custom error handler from - # # future.utils.surrogateescape to be registered globally, even though - # # that is fine in the case of decoding. (But not encoding: see the - # # comments in newstr.encode()``.) - # - # if errors == 'surrogateescape': - # # Decode char by char - # mybytes = [] - # for code in self: - # # Code is an int - # if 0x80 <= code <= 0xFF: - # b = 0xDC00 + code - # elif code <= 0x7F: - # b = _unichr(c).decode(encoding=encoding) - # else: - # # # It may be a bad byte - # # FIXME: What to do in this case? See the Py3 docs / tests. - # # # Try swallowing it. - # # continue - # # print("RAISE!") - # raise NotASurrogateError - # mybytes.append(b) - # return newbytes(mybytes) - # return newbytes(super(newstr, self).decode(encoding, errors)) - - @no(unicode) - def startswith(self, prefix, *args): - return super(newbytes, self).startswith(prefix, *args) - - @no(unicode) - def endswith(self, prefix, *args): - return super(newbytes, self).endswith(prefix, *args) - - @no(unicode) - def split(self, sep=None, maxsplit=-1): - # Py2 str.split() takes maxsplit as an optional parameter, not as a - # keyword argument as in Python 3 bytes. - parts = super(newbytes, self).split(sep, maxsplit) - return [newbytes(part) for part in parts] - - def splitlines(self, keepends=False): - """ - B.splitlines([keepends]) -> list of lines - - Return a list of the lines in B, breaking at line boundaries. - Line breaks are not included in the resulting list unless keepends - is given and true. - """ - # Py2 str.splitlines() takes keepends as an optional parameter, - # not as a keyword argument as in Python 3 bytes. - parts = super(newbytes, self).splitlines(keepends) - return [newbytes(part) for part in parts] - - @no(unicode) - def rsplit(self, sep=None, maxsplit=-1): - # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a - # keyword argument as in Python 3 bytes. - parts = super(newbytes, self).rsplit(sep, maxsplit) - return [newbytes(part) for part in parts] - - @no(unicode) - def partition(self, sep): - parts = super(newbytes, self).partition(sep) - return tuple(newbytes(part) for part in parts) - - @no(unicode) - def rpartition(self, sep): - parts = super(newbytes, self).rpartition(sep) - return tuple(newbytes(part) for part in parts) - - @no(unicode, (1,)) - def rindex(self, sub, *args): - ''' - S.rindex(sub [,start [,end]]) -> int - - Like S.rfind() but raise ValueError when the substring is not found. - ''' - pos = self.rfind(sub, *args) - if pos == -1: - raise ValueError('substring not found') - - @no(unicode) - def index(self, sub, *args): - ''' - Returns index of sub in bytes. - Raises ValueError if byte is not in bytes and TypeError if can't - be converted bytes or its length is not 1. - ''' - if isinstance(sub, int): - if len(args) == 0: - start, end = 0, len(self) - elif len(args) == 1: - start = args[0] - elif len(args) == 2: - start, end = args - else: - raise TypeError('takes at most 3 arguments') - return list(self)[start:end].index(sub) - if not isinstance(sub, bytes): - try: - sub = self.__class__(sub) - except (TypeError, ValueError): - raise TypeError("can't convert sub to bytes") - try: - return super(newbytes, self).index(sub, *args) - except ValueError: - raise ValueError('substring not found') - - def __eq__(self, other): - if isinstance(other, (_builtin_bytes, bytearray)): - return super(newbytes, self).__eq__(other) - else: - return False - - def __ne__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__ne__(other) - else: - return True - - unorderable_err = 'unorderable types: bytes() and {0}' - - def __lt__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__lt__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __le__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__le__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __gt__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__gt__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __ge__(self, other): - if isinstance(other, _builtin_bytes): - return super(newbytes, self).__ge__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __native__(self): - # We can't just feed a newbytes object into str(), because - # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes. - return super(newbytes, self).__str__() - - def __getattribute__(self, name): - """ - A trick to cause the ``hasattr`` builtin-fn to return False for - the 'encode' method on Py2. - """ - if name in ['encode', u'encode']: - raise AttributeError("encode method has been disabled in newbytes") - return super(newbytes, self).__getattribute__(name) - - @no(unicode) - def rstrip(self, bytes_to_strip=None): - """ - Strip trailing bytes contained in the argument. - If the argument is omitted, strip trailing ASCII whitespace. - """ - return newbytes(super(newbytes, self).rstrip(bytes_to_strip)) - - @no(unicode) - def strip(self, bytes_to_strip=None): - """ - Strip leading and trailing bytes contained in the argument. - If the argument is omitted, strip trailing ASCII whitespace. - """ - return newbytes(super(newbytes, self).strip(bytes_to_strip)) - - def lower(self): - """ - b.lower() -> copy of b - - Return a copy of b with all ASCII characters converted to lowercase. - """ - return newbytes(super(newbytes, self).lower()) - - @no(unicode) - def upper(self): - """ - b.upper() -> copy of b - - Return a copy of b with all ASCII characters converted to uppercase. - """ - return newbytes(super(newbytes, self).upper()) - - @classmethod - @no(unicode) - def maketrans(cls, frm, to): - """ - B.maketrans(frm, to) -> translation table - - Return a translation table (a bytes object of length 256) suitable - for use in the bytes or bytearray translate method where each byte - in frm is mapped to the byte at the same position in to. - The bytes objects frm and to must be of the same length. - """ - return newbytes(string.maketrans(frm, to)) - - -__all__ = ['newbytes'] diff --git a/contrib/python/future/future/types/newdict.py b/contrib/python/future/future/types/newdict.py deleted file mode 100644 index 3f3a559dd5e..00000000000 --- a/contrib/python/future/future/types/newdict.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -A dict subclass for Python 2 that behaves like Python 3's dict - -Example use: - ->>> from builtins import dict ->>> d1 = dict() # instead of {} for an empty dict ->>> d2 = dict(key1='value1', key2='value2') - -The keys, values and items methods now return iterators on Python 2.x -(with set-like behaviour on Python 2.7). - ->>> for d in (d1, d2): -... assert not isinstance(d.keys(), list) -... assert not isinstance(d.values(), list) -... assert not isinstance(d.items(), list) -""" - -import sys - -from future.utils import with_metaclass -from future.types.newobject import newobject - - -_builtin_dict = dict -ver = sys.version_info[:2] - - -class BaseNewDict(type): - def __instancecheck__(cls, instance): - if cls == newdict: - return isinstance(instance, _builtin_dict) - else: - return issubclass(instance.__class__, cls) - - -class newdict(with_metaclass(BaseNewDict, _builtin_dict)): - """ - A backport of the Python 3 dict object to Py2 - """ - def items(self): - """ - On Python 2.7+: - D.items() -> a set-like object providing a view on D's items - On Python 2.6: - D.items() -> an iterator over D's items - """ - if ver == (2, 7): - return self.viewitems() - elif ver == (2, 6): - return self.iteritems() - elif ver >= (3, 0): - return self.items() - - def keys(self): - """ - On Python 2.7+: - D.keys() -> a set-like object providing a view on D's keys - On Python 2.6: - D.keys() -> an iterator over D's keys - """ - if ver == (2, 7): - return self.viewkeys() - elif ver == (2, 6): - return self.iterkeys() - elif ver >= (3, 0): - return self.keys() - - def values(self): - """ - On Python 2.7+: - D.values() -> a set-like object providing a view on D's values - On Python 2.6: - D.values() -> an iterator over D's values - """ - if ver == (2, 7): - return self.viewvalues() - elif ver == (2, 6): - return self.itervalues() - elif ver >= (3, 0): - return self.values() - - def __new__(cls, *args, **kwargs): - """ - dict() -> new empty dictionary - dict(mapping) -> new dictionary initialized from a mapping object's - (key, value) pairs - dict(iterable) -> new dictionary initialized as if via: - d = {} - for k, v in iterable: - d[k] = v - dict(**kwargs) -> new dictionary initialized with the name=value pairs - in the keyword argument list. For example: dict(one=1, two=2) - """ - - if len(args) == 0: - return super(newdict, cls).__new__(cls) - elif type(args[0]) == newdict: - value = args[0] - else: - value = args[0] - return super(newdict, cls).__new__(cls, value) - - def __native__(self): - """ - Hook for the future.utils.native() function - """ - return dict(self) - - -__all__ = ['newdict'] diff --git a/contrib/python/future/future/types/newint.py b/contrib/python/future/future/types/newint.py deleted file mode 100644 index 748dba9d234..00000000000 --- a/contrib/python/future/future/types/newint.py +++ /dev/null @@ -1,381 +0,0 @@ -""" -Backport of Python 3's int, based on Py2's long. - -They are very similar. The most notable difference is: - -- representation: trailing L in Python 2 removed in Python 3 -""" -from __future__ import division - -import struct - -from future.types.newbytes import newbytes -from future.types.newobject import newobject -from future.utils import PY3, isint, istext, isbytes, with_metaclass, native - - -if PY3: - long = int - from collections.abc import Iterable -else: - from collections import Iterable - - -class BaseNewInt(type): - def __instancecheck__(cls, instance): - if cls == newint: - # Special case for Py2 short or long int - return isinstance(instance, (int, long)) - else: - return issubclass(instance.__class__, cls) - - -class newint(with_metaclass(BaseNewInt, long)): - """ - A backport of the Python 3 int object to Py2 - """ - def __new__(cls, x=0, base=10): - """ - From the Py3 int docstring: - - | int(x=0) -> integer - | int(x, base=10) -> integer - | - | Convert a number or string to an integer, or return 0 if no - | arguments are given. If x is a number, return x.__int__(). For - | floating point numbers, this truncates towards zero. - | - | If x is not a number or if base is given, then x must be a string, - | bytes, or bytearray instance representing an integer literal in the - | given base. The literal can be preceded by '+' or '-' and be - | surrounded by whitespace. The base defaults to 10. Valid bases are - | 0 and 2-36. Base 0 means to interpret the base from the string as an - | integer literal. - | >>> int('0b100', base=0) - | 4 - - """ - try: - val = x.__int__() - except AttributeError: - val = x - else: - if not isint(val): - raise TypeError('__int__ returned non-int ({0})'.format( - type(val))) - - if base != 10: - # Explicit base - if not (istext(val) or isbytes(val) or isinstance(val, bytearray)): - raise TypeError( - "int() can't convert non-string with explicit base") - try: - return super(newint, cls).__new__(cls, val, base) - except TypeError: - return super(newint, cls).__new__(cls, newbytes(val), base) - # After here, base is 10 - try: - return super(newint, cls).__new__(cls, val) - except TypeError: - # Py2 long doesn't handle bytearray input with an explicit base, so - # handle this here. - # Py3: int(bytearray(b'10'), 2) == 2 - # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError - # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError - try: - return super(newint, cls).__new__(cls, newbytes(val)) - except: - raise TypeError("newint argument must be a string or a number," - "not '{0}'".format(type(val))) - - def __repr__(self): - """ - Without the L suffix - """ - value = super(newint, self).__repr__() - assert value[-1] == 'L' - return value[:-1] - - def __add__(self, other): - value = super(newint, self).__add__(other) - if value is NotImplemented: - return long(self) + other - return newint(value) - - def __radd__(self, other): - value = super(newint, self).__radd__(other) - if value is NotImplemented: - return other + long(self) - return newint(value) - - def __sub__(self, other): - value = super(newint, self).__sub__(other) - if value is NotImplemented: - return long(self) - other - return newint(value) - - def __rsub__(self, other): - value = super(newint, self).__rsub__(other) - if value is NotImplemented: - return other - long(self) - return newint(value) - - def __mul__(self, other): - value = super(newint, self).__mul__(other) - if isint(value): - return newint(value) - elif value is NotImplemented: - return long(self) * other - return value - - def __rmul__(self, other): - value = super(newint, self).__rmul__(other) - if isint(value): - return newint(value) - elif value is NotImplemented: - return other * long(self) - return value - - def __div__(self, other): - # We override this rather than e.g. relying on object.__div__ or - # long.__div__ because we want to wrap the value in a newint() - # call if other is another int - value = long(self) / other - if isinstance(other, (int, long)): - return newint(value) - else: - return value - - def __rdiv__(self, other): - value = other / long(self) - if isinstance(other, (int, long)): - return newint(value) - else: - return value - - def __idiv__(self, other): - # long has no __idiv__ method. Use __itruediv__ and cast back to - # newint: - value = self.__itruediv__(other) - if isinstance(other, (int, long)): - return newint(value) - else: - return value - - def __truediv__(self, other): - value = super(newint, self).__truediv__(other) - if value is NotImplemented: - value = long(self) / other - return value - - def __rtruediv__(self, other): - return super(newint, self).__rtruediv__(other) - - def __itruediv__(self, other): - # long has no __itruediv__ method - mylong = long(self) - mylong /= other - return mylong - - def __floordiv__(self, other): - return newint(super(newint, self).__floordiv__(other)) - - def __rfloordiv__(self, other): - return newint(super(newint, self).__rfloordiv__(other)) - - def __ifloordiv__(self, other): - # long has no __ifloordiv__ method - mylong = long(self) - mylong //= other - return newint(mylong) - - def __mod__(self, other): - value = super(newint, self).__mod__(other) - if value is NotImplemented: - return long(self) % other - return newint(value) - - def __rmod__(self, other): - value = super(newint, self).__rmod__(other) - if value is NotImplemented: - return other % long(self) - return newint(value) - - def __divmod__(self, other): - value = super(newint, self).__divmod__(other) - if value is NotImplemented: - mylong = long(self) - return (mylong // other, mylong % other) - return (newint(value[0]), newint(value[1])) - - def __rdivmod__(self, other): - value = super(newint, self).__rdivmod__(other) - if value is NotImplemented: - mylong = long(self) - return (other // mylong, other % mylong) - return (newint(value[0]), newint(value[1])) - - def __pow__(self, other): - value = super(newint, self).__pow__(other) - if value is NotImplemented: - return long(self) ** other - return newint(value) - - def __rpow__(self, other): - value = super(newint, self).__rpow__(other) - if value is NotImplemented: - return other ** long(self) - return newint(value) - - def __lshift__(self, other): - if not isint(other): - raise TypeError( - "unsupported operand type(s) for <<: '%s' and '%s'" % - (type(self).__name__, type(other).__name__)) - return newint(super(newint, self).__lshift__(other)) - - def __rshift__(self, other): - if not isint(other): - raise TypeError( - "unsupported operand type(s) for >>: '%s' and '%s'" % - (type(self).__name__, type(other).__name__)) - return newint(super(newint, self).__rshift__(other)) - - def __and__(self, other): - if not isint(other): - raise TypeError( - "unsupported operand type(s) for &: '%s' and '%s'" % - (type(self).__name__, type(other).__name__)) - return newint(super(newint, self).__and__(other)) - - def __or__(self, other): - if not isint(other): - raise TypeError( - "unsupported operand type(s) for |: '%s' and '%s'" % - (type(self).__name__, type(other).__name__)) - return newint(super(newint, self).__or__(other)) - - def __xor__(self, other): - if not isint(other): - raise TypeError( - "unsupported operand type(s) for ^: '%s' and '%s'" % - (type(self).__name__, type(other).__name__)) - return newint(super(newint, self).__xor__(other)) - - def __neg__(self): - return newint(super(newint, self).__neg__()) - - def __pos__(self): - return newint(super(newint, self).__pos__()) - - def __abs__(self): - return newint(super(newint, self).__abs__()) - - def __invert__(self): - return newint(super(newint, self).__invert__()) - - def __int__(self): - return self - - def __nonzero__(self): - return self.__bool__() - - def __bool__(self): - """ - So subclasses can override this, Py3-style - """ - return super(newint, self).__nonzero__() - - def __native__(self): - return long(self) - - def to_bytes(self, length, byteorder='big', signed=False): - """ - Return an array of bytes representing an integer. - - The integer is represented using length bytes. An OverflowError is - raised if the integer is not representable with the given number of - bytes. - - The byteorder argument determines the byte order used to represent the - integer. If byteorder is 'big', the most significant byte is at the - beginning of the byte array. If byteorder is 'little', the most - significant byte is at the end of the byte array. To request the native - byte order of the host system, use `sys.byteorder' as the byte order value. - - The signed keyword-only argument determines whether two's complement is - used to represent the integer. If signed is False and a negative integer - is given, an OverflowError is raised. - """ - if length < 0: - raise ValueError("length argument must be non-negative") - if length == 0 and self == 0: - return newbytes() - if signed and self < 0: - bits = length * 8 - num = (2**bits) + self - if num <= 0: - raise OverflowError("int too smal to convert") - else: - if self < 0: - raise OverflowError("can't convert negative int to unsigned") - num = self - if byteorder not in ('little', 'big'): - raise ValueError("byteorder must be either 'little' or 'big'") - h = b'%x' % num - s = newbytes((b'0'*(len(h) % 2) + h).zfill(length*2).decode('hex')) - if signed: - high_set = s[0] & 0x80 - if self > 0 and high_set: - raise OverflowError("int too big to convert") - if self < 0 and not high_set: - raise OverflowError("int too small to convert") - if len(s) > length: - raise OverflowError("int too big to convert") - return s if byteorder == 'big' else s[::-1] - - @classmethod - def from_bytes(cls, mybytes, byteorder='big', signed=False): - """ - Return the integer represented by the given array of bytes. - - The mybytes argument must either support the buffer protocol or be an - iterable object producing bytes. Bytes and bytearray are examples of - built-in objects that support the buffer protocol. - - The byteorder argument determines the byte order used to represent the - integer. If byteorder is 'big', the most significant byte is at the - beginning of the byte array. If byteorder is 'little', the most - significant byte is at the end of the byte array. To request the native - byte order of the host system, use `sys.byteorder' as the byte order value. - - The signed keyword-only argument indicates whether two's complement is - used to represent the integer. - """ - if byteorder not in ('little', 'big'): - raise ValueError("byteorder must be either 'little' or 'big'") - if isinstance(mybytes, unicode): - raise TypeError("cannot convert unicode objects to bytes") - # mybytes can also be passed as a sequence of integers on Py3. - # Test for this: - elif isinstance(mybytes, Iterable): - mybytes = newbytes(mybytes) - b = mybytes if byteorder == 'big' else mybytes[::-1] - if len(b) == 0: - b = b'\x00' - # The encode() method has been disabled by newbytes, but Py2's - # str has it: - num = int(native(b).encode('hex'), 16) - if signed and (b[0] & 0x80): - num = num - (2 ** (len(b)*8)) - return cls(num) - - -# def _twos_comp(val, bits): -# """compute the 2's compliment of int value val""" -# if( (val&(1<<(bits-1))) != 0 ): -# val = val - (1<<bits) -# return val - - -__all__ = ['newint'] diff --git a/contrib/python/future/future/types/newlist.py b/contrib/python/future/future/types/newlist.py deleted file mode 100644 index 74d8f6cebed..00000000000 --- a/contrib/python/future/future/types/newlist.py +++ /dev/null @@ -1,95 +0,0 @@ -""" -A list subclass for Python 2 that behaves like Python 3's list. - -The primary difference is that lists have a .copy() method in Py3. - -Example use: - ->>> from builtins import list ->>> l1 = list() # instead of {} for an empty list ->>> l1.append('hello') ->>> l2 = l1.copy() - -""" - -import sys -import copy - -from future.utils import with_metaclass -from future.types.newobject import newobject - - -_builtin_list = list -ver = sys.version_info[:2] - - -class BaseNewList(type): - def __instancecheck__(cls, instance): - if cls == newlist: - return isinstance(instance, _builtin_list) - else: - return issubclass(instance.__class__, cls) - - -class newlist(with_metaclass(BaseNewList, _builtin_list)): - """ - A backport of the Python 3 list object to Py2 - """ - def copy(self): - """ - L.copy() -> list -- a shallow copy of L - """ - return copy.copy(self) - - def clear(self): - """L.clear() -> None -- remove all items from L""" - for i in range(len(self)): - self.pop() - - def __new__(cls, *args, **kwargs): - """ - list() -> new empty list - list(iterable) -> new list initialized from iterable's items - """ - - if len(args) == 0: - return super(newlist, cls).__new__(cls) - elif type(args[0]) == newlist: - value = args[0] - else: - value = args[0] - return super(newlist, cls).__new__(cls, value) - - def __add__(self, value): - return newlist(super(newlist, self).__add__(value)) - - def __radd__(self, left): - " left + self " - try: - return newlist(left) + self - except: - return NotImplemented - - def __getitem__(self, y): - """ - x.__getitem__(y) <==> x[y] - - Warning: a bug in Python 2.x prevents indexing via a slice from - returning a newlist object. - """ - if isinstance(y, slice): - return newlist(super(newlist, self).__getitem__(y)) - else: - return super(newlist, self).__getitem__(y) - - def __native__(self): - """ - Hook for the future.utils.native() function - """ - return list(self) - - def __nonzero__(self): - return len(self) > 0 - - -__all__ = ['newlist'] diff --git a/contrib/python/future/future/types/newmemoryview.py b/contrib/python/future/future/types/newmemoryview.py deleted file mode 100644 index 09f804dcf4a..00000000000 --- a/contrib/python/future/future/types/newmemoryview.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -A pretty lame implementation of a memoryview object for Python 2.6. -""" -from numbers import Integral -import string - -from future.utils import istext, isbytes, PY2, with_metaclass -from future.types import no, issubset - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - -# class BaseNewBytes(type): -# def __instancecheck__(cls, instance): -# return isinstance(instance, _builtin_bytes) - - -class newmemoryview(object): # with_metaclass(BaseNewBytes, _builtin_bytes)): - """ - A pretty lame backport of the Python 2.7 and Python 3.x - memoryviewview object to Py2.6. - """ - def __init__(self, obj): - return obj - - -__all__ = ['newmemoryview'] diff --git a/contrib/python/future/future/types/newobject.py b/contrib/python/future/future/types/newobject.py deleted file mode 100644 index 31b84fc12cf..00000000000 --- a/contrib/python/future/future/types/newobject.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -An object subclass for Python 2 that gives new-style classes written in the -style of Python 3 (with ``__next__`` and unicode-returning ``__str__`` methods) -the appropriate Python 2-style ``next`` and ``__unicode__`` methods for compatible. - -Example use:: - - from builtins import object - - my_unicode_str = u'Unicode string: \u5b54\u5b50' - - class A(object): - def __str__(self): - return my_unicode_str - - a = A() - print(str(a)) - - # On Python 2, these relations hold: - assert unicode(a) == my_unicode_string - assert str(a) == my_unicode_string.encode('utf-8') - - -Another example:: - - from builtins import object - - class Upper(object): - def __init__(self, iterable): - self._iter = iter(iterable) - def __next__(self): # note the Py3 interface - return next(self._iter).upper() - def __iter__(self): - return self - - assert list(Upper('hello')) == list('HELLO') - -""" - - -class newobject(object): - """ - A magical object class that provides Python 2 compatibility methods:: - next - __unicode__ - __nonzero__ - - Subclasses of this class can merely define the Python 3 methods (__next__, - __str__, and __bool__). - """ - def next(self): - if hasattr(self, '__next__'): - return type(self).__next__(self) - raise TypeError('newobject is not an iterator') - - def __unicode__(self): - # All subclasses of the builtin object should have __str__ defined. - # Note that old-style classes do not have __str__ defined. - if hasattr(self, '__str__'): - s = type(self).__str__(self) - else: - s = str(self) - if isinstance(s, unicode): - return s - else: - return s.decode('utf-8') - - def __nonzero__(self): - if hasattr(self, '__bool__'): - return type(self).__bool__(self) - if hasattr(self, '__len__'): - return type(self).__len__(self) - # object has no __nonzero__ method - return True - - # Are these ever needed? - # def __div__(self): - # return self.__truediv__() - - # def __idiv__(self, other): - # return self.__itruediv__(other) - - def __long__(self): - if not hasattr(self, '__int__'): - return NotImplemented - return self.__int__() # not type(self).__int__(self) - - # def __new__(cls, *args, **kwargs): - # """ - # dict() -> new empty dictionary - # dict(mapping) -> new dictionary initialized from a mapping object's - # (key, value) pairs - # dict(iterable) -> new dictionary initialized as if via: - # d = {} - # for k, v in iterable: - # d[k] = v - # dict(**kwargs) -> new dictionary initialized with the name=value pairs - # in the keyword argument list. For example: dict(one=1, two=2) - # """ - - # if len(args) == 0: - # return super(newdict, cls).__new__(cls) - # elif type(args[0]) == newdict: - # return args[0] - # else: - # value = args[0] - # return super(newdict, cls).__new__(cls, value) - - def __native__(self): - """ - Hook for the future.utils.native() function - """ - return object(self) - - __slots__ = [] - -__all__ = ['newobject'] diff --git a/contrib/python/future/future/types/newopen.py b/contrib/python/future/future/types/newopen.py deleted file mode 100644 index b75d45afb24..00000000000 --- a/contrib/python/future/future/types/newopen.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -A substitute for the Python 3 open() function. - -Note that io.open() is more complete but maybe slower. Even so, the -completeness may be a better default. TODO: compare these -""" - -_builtin_open = open - -class newopen(object): - """Wrapper providing key part of Python 3 open() interface. - - From IPython's py3compat.py module. License: BSD. - """ - def __init__(self, fname, mode="r", encoding="utf-8"): - self.f = _builtin_open(fname, mode) - self.enc = encoding - - def write(self, s): - return self.f.write(s.encode(self.enc)) - - def read(self, size=-1): - return self.f.read(size).decode(self.enc) - - def close(self): - return self.f.close() - - def __enter__(self): - return self - - def __exit__(self, etype, value, traceback): - self.f.close() diff --git a/contrib/python/future/future/types/newrange.py b/contrib/python/future/future/types/newrange.py deleted file mode 100644 index eda01a5a502..00000000000 --- a/contrib/python/future/future/types/newrange.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Nearly identical to xrange.py, by Dan Crosta, from - - https://github.com/dcrosta/xrange.git - -This is included here in the ``future`` package rather than pointed to as -a dependency because there is no package for ``xrange`` on PyPI. It is -also tweaked to appear like a regular Python 3 ``range`` object rather -than a Python 2 xrange. - -From Dan Crosta's README: - - "A pure-Python implementation of Python 2.7's xrange built-in, with - some features backported from the Python 3.x range built-in (which - replaced xrange) in that version." - - Read more at - https://late.am/post/2012/06/18/what-the-heck-is-an-xrange -""" -from __future__ import absolute_import - -from future.utils import PY2 - -if PY2: - from collections import Sequence, Iterator -else: - from collections.abc import Sequence, Iterator -from itertools import islice - -from future.backports.misc import count # with step parameter on Py2.6 -# For backward compatibility with python-future versions < 0.14.4: -_count = count - - -class newrange(Sequence): - """ - Pure-Python backport of Python 3's range object. See `the CPython - documentation for details: - <http://docs.python.org/py3k/library/functions.html#range>`_ - """ - - def __init__(self, *args): - if len(args) == 1: - start, stop, step = 0, args[0], 1 - elif len(args) == 2: - start, stop, step = args[0], args[1], 1 - elif len(args) == 3: - start, stop, step = args - else: - raise TypeError('range() requires 1-3 int arguments') - - try: - start, stop, step = int(start), int(stop), int(step) - except ValueError: - raise TypeError('an integer is required') - - if step == 0: - raise ValueError('range() arg 3 must not be zero') - elif step < 0: - stop = min(stop, start) - else: - stop = max(stop, start) - - self._start = start - self._stop = stop - self._step = step - self._len = (stop - start) // step + bool((stop - start) % step) - - @property - def start(self): - return self._start - - @property - def stop(self): - return self._stop - - @property - def step(self): - return self._step - - def __repr__(self): - if self._step == 1: - return 'range(%d, %d)' % (self._start, self._stop) - return 'range(%d, %d, %d)' % (self._start, self._stop, self._step) - - def __eq__(self, other): - return (isinstance(other, newrange) and - (self._len == 0 == other._len or - (self._start, self._step, self._len) == - (other._start, other._step, self._len))) - - def __len__(self): - return self._len - - def index(self, value): - """Return the 0-based position of integer `value` in - the sequence this range represents.""" - try: - diff = value - self._start - except TypeError: - raise ValueError('%r is not in range' % value) - quotient, remainder = divmod(diff, self._step) - if remainder == 0 and 0 <= quotient < self._len: - return abs(quotient) - raise ValueError('%r is not in range' % value) - - def count(self, value): - """Return the number of ocurrences of integer `value` - in the sequence this range represents.""" - # a value can occur exactly zero or one times - return int(value in self) - - def __contains__(self, value): - """Return ``True`` if the integer `value` occurs in - the sequence this range represents.""" - try: - self.index(value) - return True - except ValueError: - return False - - def __reversed__(self): - return iter(self[::-1]) - - def __getitem__(self, index): - """Return the element at position ``index`` in the sequence - this range represents, or raise :class:`IndexError` if the - position is out of range.""" - if isinstance(index, slice): - return self.__getitem_slice(index) - if index < 0: - # negative indexes access from the end - index = self._len + index - if index < 0 or index >= self._len: - raise IndexError('range object index out of range') - return self._start + index * self._step - - def __getitem_slice(self, slce): - """Return a range which represents the requested slce - of the sequence represented by this range. - """ - scaled_indices = (self._step * n for n in slce.indices(self._len)) - start_offset, stop_offset, new_step = scaled_indices - return newrange(self._start + start_offset, - self._start + stop_offset, - new_step) - - def __iter__(self): - """Return an iterator which enumerates the elements of the - sequence this range represents.""" - return range_iterator(self) - - -class range_iterator(Iterator): - """An iterator for a :class:`range`. - """ - def __init__(self, range_): - self._stepper = islice(count(range_.start, range_.step), len(range_)) - - def __iter__(self): - return self - - def __next__(self): - return next(self._stepper) - - def next(self): - return next(self._stepper) - - -__all__ = ['newrange'] diff --git a/contrib/python/future/future/types/newstr.py b/contrib/python/future/future/types/newstr.py deleted file mode 100644 index 8ca191f9786..00000000000 --- a/contrib/python/future/future/types/newstr.py +++ /dev/null @@ -1,426 +0,0 @@ -""" -This module redefines ``str`` on Python 2.x to be a subclass of the Py2 -``unicode`` type that behaves like the Python 3.x ``str``. - -The main differences between ``newstr`` and Python 2.x's ``unicode`` type are -the stricter type-checking and absence of a `u''` prefix in the representation. - -It is designed to be used together with the ``unicode_literals`` import -as follows: - - >>> from __future__ import unicode_literals - >>> from builtins import str, isinstance - -On Python 3.x and normally on Python 2.x, these expressions hold - - >>> str('blah') is 'blah' - True - >>> isinstance('blah', str) - True - -However, on Python 2.x, with this import: - - >>> from __future__ import unicode_literals - -the same expressions are False: - - >>> str('blah') is 'blah' - False - >>> isinstance('blah', str) - False - -This module is designed to be imported together with ``unicode_literals`` on -Python 2 to bring the meaning of ``str`` back into alignment with unprefixed -string literals (i.e. ``unicode`` subclasses). - -Note that ``str()`` (and ``print()``) would then normally call the -``__unicode__`` method on objects in Python 2. To define string -representations of your objects portably across Py3 and Py2, use the -:func:`python_2_unicode_compatible` decorator in :mod:`future.utils`. - -""" - -from numbers import Number - -from future.utils import PY3, istext, with_metaclass, isnewbytes -from future.types import no, issubset -from future.types.newobject import newobject - - -if PY3: - # We'll probably never use newstr on Py3 anyway... - unicode = str - from collections.abc import Iterable -else: - from collections import Iterable - - -class BaseNewStr(type): - def __instancecheck__(cls, instance): - if cls == newstr: - return isinstance(instance, unicode) - else: - return issubclass(instance.__class__, cls) - - -class newstr(with_metaclass(BaseNewStr, unicode)): - """ - A backport of the Python 3 str object to Py2 - """ - no_convert_msg = "Can't convert '{0}' object to str implicitly" - - def __new__(cls, *args, **kwargs): - """ - From the Py3 str docstring: - - str(object='') -> str - str(bytes_or_buffer[, encoding[, errors]]) -> str - - Create a new string object from the given object. If encoding or - errors is specified, then the object must expose a data buffer - that will be decoded using the given encoding and error handler. - Otherwise, returns the result of object.__str__() (if defined) - or repr(object). - encoding defaults to sys.getdefaultencoding(). - errors defaults to 'strict'. - - """ - if len(args) == 0: - return super(newstr, cls).__new__(cls) - # Special case: If someone requests str(str(u'abc')), return the same - # object (same id) for consistency with Py3.3. This is not true for - # other objects like list or dict. - elif type(args[0]) == newstr and cls == newstr: - return args[0] - elif isinstance(args[0], unicode): - value = args[0] - elif isinstance(args[0], bytes): # i.e. Py2 bytes or newbytes - if 'encoding' in kwargs or len(args) > 1: - value = args[0].decode(*args[1:], **kwargs) - else: - value = args[0].__str__() - else: - value = args[0] - return super(newstr, cls).__new__(cls, value) - - def __repr__(self): - """ - Without the u prefix - """ - - value = super(newstr, self).__repr__() - # assert value[0] == u'u' - return value[1:] - - def __getitem__(self, y): - """ - Warning: Python <= 2.7.6 has a bug that causes this method never to be called - when y is a slice object. Therefore the type of newstr()[:2] is wrong - (unicode instead of newstr). - """ - return newstr(super(newstr, self).__getitem__(y)) - - def __contains__(self, key): - errmsg = "'in <string>' requires string as left operand, not {0}" - # Don't use isinstance() here because we only want to catch - # newstr, not Python 2 unicode: - if type(key) == newstr: - newkey = key - elif isinstance(key, unicode) or isinstance(key, bytes) and not isnewbytes(key): - newkey = newstr(key) - else: - raise TypeError(errmsg.format(type(key))) - return issubset(list(newkey), list(self)) - - @no('newbytes') - def __add__(self, other): - return newstr(super(newstr, self).__add__(other)) - - @no('newbytes') - def __radd__(self, left): - " left + self " - try: - return newstr(left) + self - except: - return NotImplemented - - def __mul__(self, other): - return newstr(super(newstr, self).__mul__(other)) - - def __rmul__(self, other): - return newstr(super(newstr, self).__rmul__(other)) - - def join(self, iterable): - errmsg = 'sequence item {0}: expected unicode string, found bytes' - for i, item in enumerate(iterable): - # Here we use type() rather than isinstance() because - # __instancecheck__ is being overridden. E.g. - # isinstance(b'abc', newbytes) is True on Py2. - if isnewbytes(item): - raise TypeError(errmsg.format(i)) - # Support use as a staticmethod: str.join('-', ['a', 'b']) - if type(self) == newstr: - return newstr(super(newstr, self).join(iterable)) - else: - return newstr(super(newstr, newstr(self)).join(iterable)) - - @no('newbytes') - def find(self, sub, *args): - return super(newstr, self).find(sub, *args) - - @no('newbytes') - def rfind(self, sub, *args): - return super(newstr, self).rfind(sub, *args) - - @no('newbytes', (1, 2)) - def replace(self, old, new, *args): - return newstr(super(newstr, self).replace(old, new, *args)) - - def decode(self, *args): - raise AttributeError("decode method has been disabled in newstr") - - def encode(self, encoding='utf-8', errors='strict'): - """ - Returns bytes - - Encode S using the codec registered for encoding. Default encoding - is 'utf-8'. errors may be given to set a different error - handling scheme. Default is 'strict' meaning that encoding errors raise - a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and - 'xmlcharrefreplace' as well as any other name registered with - codecs.register_error that can handle UnicodeEncodeErrors. - """ - from future.types.newbytes import newbytes - # Py2 unicode.encode() takes encoding and errors as optional parameter, - # not keyword arguments as in Python 3 str. - - # For the surrogateescape error handling mechanism, the - # codecs.register_error() function seems to be inadequate for an - # implementation of it when encoding. (Decoding seems fine, however.) - # For example, in the case of - # u'\udcc3'.encode('ascii', 'surrogateescape_handler') - # after registering the ``surrogateescape_handler`` function in - # future.utils.surrogateescape, both Python 2.x and 3.x raise an - # exception anyway after the function is called because the unicode - # string it has to return isn't encodable strictly as ASCII. - - if errors == 'surrogateescape': - if encoding == 'utf-16': - # Known to fail here. See test_encoding_works_normally() - raise NotImplementedError('FIXME: surrogateescape handling is ' - 'not yet implemented properly') - # Encode char by char, building up list of byte-strings - mybytes = [] - for c in self: - code = ord(c) - if 0xD800 <= code <= 0xDCFF: - mybytes.append(newbytes([code - 0xDC00])) - else: - mybytes.append(c.encode(encoding=encoding)) - return newbytes(b'').join(mybytes) - return newbytes(super(newstr, self).encode(encoding, errors)) - - @no('newbytes', 1) - def startswith(self, prefix, *args): - if isinstance(prefix, Iterable): - for thing in prefix: - if isnewbytes(thing): - raise TypeError(self.no_convert_msg.format(type(thing))) - return super(newstr, self).startswith(prefix, *args) - - @no('newbytes', 1) - def endswith(self, prefix, *args): - # Note we need the decorator above as well as the isnewbytes() - # check because prefix can be either a bytes object or e.g. a - # tuple of possible prefixes. (If it's a bytes object, each item - # in it is an int.) - if isinstance(prefix, Iterable): - for thing in prefix: - if isnewbytes(thing): - raise TypeError(self.no_convert_msg.format(type(thing))) - return super(newstr, self).endswith(prefix, *args) - - @no('newbytes', 1) - def split(self, sep=None, maxsplit=-1): - # Py2 unicode.split() takes maxsplit as an optional parameter, - # not as a keyword argument as in Python 3 str. - parts = super(newstr, self).split(sep, maxsplit) - return [newstr(part) for part in parts] - - @no('newbytes', 1) - def rsplit(self, sep=None, maxsplit=-1): - # Py2 unicode.rsplit() takes maxsplit as an optional parameter, - # not as a keyword argument as in Python 3 str. - parts = super(newstr, self).rsplit(sep, maxsplit) - return [newstr(part) for part in parts] - - @no('newbytes', 1) - def partition(self, sep): - parts = super(newstr, self).partition(sep) - return tuple(newstr(part) for part in parts) - - @no('newbytes', 1) - def rpartition(self, sep): - parts = super(newstr, self).rpartition(sep) - return tuple(newstr(part) for part in parts) - - @no('newbytes', 1) - def index(self, sub, *args): - """ - Like newstr.find() but raise ValueError when the substring is not - found. - """ - pos = self.find(sub, *args) - if pos == -1: - raise ValueError('substring not found') - return pos - - def splitlines(self, keepends=False): - """ - S.splitlines(keepends=False) -> list of strings - - Return a list of the lines in S, breaking at line boundaries. - Line breaks are not included in the resulting list unless keepends - is given and true. - """ - # Py2 unicode.splitlines() takes keepends as an optional parameter, - # not as a keyword argument as in Python 3 str. - parts = super(newstr, self).splitlines(keepends) - return [newstr(part) for part in parts] - - def __eq__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__eq__(other) - else: - return NotImplemented - - def __hash__(self): - if (isinstance(self, unicode) or - isinstance(self, bytes) and not isnewbytes(self)): - return super(newstr, self).__hash__() - else: - raise NotImplementedError() - - def __ne__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__ne__(other) - else: - return True - - unorderable_err = 'unorderable types: str() and {0}' - - def __lt__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__lt__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __le__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__le__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __gt__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__gt__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __ge__(self, other): - if (isinstance(other, unicode) or - isinstance(other, bytes) and not isnewbytes(other)): - return super(newstr, self).__ge__(other) - raise TypeError(self.unorderable_err.format(type(other))) - - def __getattribute__(self, name): - """ - A trick to cause the ``hasattr`` builtin-fn to return False for - the 'decode' method on Py2. - """ - if name in ['decode', u'decode']: - raise AttributeError("decode method has been disabled in newstr") - return super(newstr, self).__getattribute__(name) - - def __native__(self): - """ - A hook for the future.utils.native() function. - """ - return unicode(self) - - @staticmethod - def maketrans(x, y=None, z=None): - """ - Return a translation table usable for str.translate(). - - If there is only one argument, it must be a dictionary mapping Unicode - ordinals (integers) or characters to Unicode ordinals, strings or None. - Character keys will be then converted to ordinals. - If there are two arguments, they must be strings of equal length, and - in the resulting dictionary, each character in x will be mapped to the - character at the same position in y. If there is a third argument, it - must be a string, whose characters will be mapped to None in the result. - """ - - if y is None: - assert z is None - if not isinstance(x, dict): - raise TypeError('if you give only one argument to maketrans it must be a dict') - result = {} - for (key, value) in x.items(): - if len(key) > 1: - raise ValueError('keys in translate table must be strings or integers') - result[ord(key)] = value - else: - if not isinstance(x, unicode) and isinstance(y, unicode): - raise TypeError('x and y must be unicode strings') - if not len(x) == len(y): - raise ValueError('the first two maketrans arguments must have equal length') - result = {} - for (xi, yi) in zip(x, y): - if len(xi) > 1: - raise ValueError('keys in translate table must be strings or integers') - result[ord(xi)] = ord(yi) - - if z is not None: - for char in z: - result[ord(char)] = None - return result - - def translate(self, table): - """ - S.translate(table) -> str - - Return a copy of the string S, where all characters have been mapped - through the given translation table, which must be a mapping of - Unicode ordinals to Unicode ordinals, strings, or None. - Unmapped characters are left untouched. Characters mapped to None - are deleted. - """ - l = [] - for c in self: - if ord(c) in table: - val = table[ord(c)] - if val is None: - continue - elif isinstance(val, unicode): - l.append(val) - else: - l.append(chr(val)) - else: - l.append(c) - return ''.join(l) - - def isprintable(self): - raise NotImplementedError('fixme') - - def isidentifier(self): - raise NotImplementedError('fixme') - - def format_map(self): - raise NotImplementedError('fixme') - - -__all__ = ['newstr'] diff --git a/contrib/python/future/future/utils/__init__.py b/contrib/python/future/future/utils/__init__.py deleted file mode 100644 index 46bd96def31..00000000000 --- a/contrib/python/future/future/utils/__init__.py +++ /dev/null @@ -1,767 +0,0 @@ -""" -A selection of cross-compatible functions for Python 2 and 3. - -This module exports useful functions for 2/3 compatible code: - - * bind_method: binds functions to classes - * ``native_str_to_bytes`` and ``bytes_to_native_str`` - * ``native_str``: always equal to the native platform string object (because - this may be shadowed by imports from future.builtins) - * lists: lrange(), lmap(), lzip(), lfilter() - * iterable method compatibility: - - iteritems, iterkeys, itervalues - - viewitems, viewkeys, viewvalues - - These use the original method if available, otherwise they use items, - keys, values. - - * types: - - * text_type: unicode in Python 2, str in Python 3 - * string_types: basestring in Python 2, str in Python 3 - * binary_type: str in Python 2, bytes in Python 3 - * integer_types: (int, long) in Python 2, int in Python 3 - * class_types: (type, types.ClassType) in Python 2, type in Python 3 - - * bchr(c): - Take an integer and make a 1-character byte string - * bord(c) - Take the result of indexing on a byte string and make an integer - * tobytes(s) - Take a text string, a byte string, or a sequence of characters taken - from a byte string, and make a byte string. - - * raise_from() - * raise_with_traceback() - -This module also defines these decorators: - - * ``python_2_unicode_compatible`` - * ``with_metaclass`` - * ``implements_iterator`` - -Some of the functions in this module come from the following sources: - - * Jinja2 (BSD licensed: see - https://github.com/mitsuhiko/jinja2/blob/master/LICENSE) - * Pandas compatibility module pandas.compat - * six.py by Benjamin Peterson - * Django -""" - -import types -import sys -import numbers -import functools -import copy -import inspect - - -PY3 = sys.version_info[0] >= 3 -PY34_PLUS = sys.version_info[0:2] >= (3, 4) -PY35_PLUS = sys.version_info[0:2] >= (3, 5) -PY36_PLUS = sys.version_info[0:2] >= (3, 6) -PY2 = sys.version_info[0] == 2 -PY26 = sys.version_info[0:2] == (2, 6) -PY27 = sys.version_info[0:2] == (2, 7) -PYPY = hasattr(sys, 'pypy_translation_info') - - -def python_2_unicode_compatible(cls): - """ - A decorator that defines __unicode__ and __str__ methods under Python - 2. Under Python 3, this decorator is a no-op. - - To support Python 2 and 3 with a single code base, define a __str__ - method returning unicode text and apply this decorator to the class, like - this:: - - >>> from future.utils import python_2_unicode_compatible - - >>> @python_2_unicode_compatible - ... class MyClass(object): - ... def __str__(self): - ... return u'Unicode string: \u5b54\u5b50' - - >>> a = MyClass() - - Then, after this import: - - >>> from future.builtins import str - - the following is ``True`` on both Python 3 and 2:: - - >>> str(a) == a.encode('utf-8').decode('utf-8') - True - - and, on a Unicode-enabled terminal with the right fonts, these both print the - Chinese characters for Confucius:: - - >>> print(a) - >>> print(str(a)) - - The implementation comes from django.utils.encoding. - """ - if not PY3: - cls.__unicode__ = cls.__str__ - cls.__str__ = lambda self: self.__unicode__().encode('utf-8') - return cls - - -def with_metaclass(meta, *bases): - """ - Function from jinja2/_compat.py. License: BSD. - - Use it like this:: - - class BaseForm(object): - pass - - class FormType(type): - pass - - class Form(with_metaclass(FormType, BaseForm)): - pass - - This requires a bit of explanation: the basic idea is to make a - dummy metaclass for one level of class instantiation that replaces - itself with the actual metaclass. Because of internal type checks - we also need to make sure that we downgrade the custom metaclass - for one level to something closer to type (that's why __call__ and - __init__ comes back from type etc.). - - This has the advantage over six.with_metaclass of not introducing - dummy classes into the final MRO. - """ - class metaclass(meta): - __call__ = type.__call__ - __init__ = type.__init__ - def __new__(cls, name, this_bases, d): - if this_bases is None: - return type.__new__(cls, name, (), d) - return meta(name, bases, d) - return metaclass('temporary_class', None, {}) - - -# Definitions from pandas.compat and six.py follow: -if PY3: - def bchr(s): - return bytes([s]) - def bstr(s): - if isinstance(s, str): - return bytes(s, 'latin-1') - else: - return bytes(s) - def bord(s): - return s - - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - -else: - # Python 2 - def bchr(s): - return chr(s) - def bstr(s): - return str(s) - def bord(s): - return ord(s) - - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - -### - -if PY3: - def tobytes(s): - if isinstance(s, bytes): - return s - else: - if isinstance(s, str): - return s.encode('latin-1') - else: - return bytes(s) -else: - # Python 2 - def tobytes(s): - if isinstance(s, unicode): - return s.encode('latin-1') - else: - return ''.join(s) - -tobytes.__doc__ = """ - Encodes to latin-1 (where the first 256 chars are the same as - ASCII.) - """ - -if PY3: - def native_str_to_bytes(s, encoding='utf-8'): - return s.encode(encoding) - - def bytes_to_native_str(b, encoding='utf-8'): - return b.decode(encoding) - - def text_to_native_str(t, encoding=None): - return t -else: - # Python 2 - def native_str_to_bytes(s, encoding=None): - from future.types import newbytes # to avoid a circular import - return newbytes(s) - - def bytes_to_native_str(b, encoding=None): - return native(b) - - def text_to_native_str(t, encoding='ascii'): - """ - Use this to create a Py2 native string when "from __future__ import - unicode_literals" is in effect. - """ - return unicode(t).encode(encoding) - -native_str_to_bytes.__doc__ = """ - On Py3, returns an encoded string. - On Py2, returns a newbytes type, ignoring the ``encoding`` argument. - """ - -if PY3: - # list-producing versions of the major Python iterating functions - def lrange(*args, **kwargs): - return list(range(*args, **kwargs)) - - def lzip(*args, **kwargs): - return list(zip(*args, **kwargs)) - - def lmap(*args, **kwargs): - return list(map(*args, **kwargs)) - - def lfilter(*args, **kwargs): - return list(filter(*args, **kwargs)) -else: - import __builtin__ - # Python 2-builtin ranges produce lists - lrange = __builtin__.range - lzip = __builtin__.zip - lmap = __builtin__.map - lfilter = __builtin__.filter - - -def isidentifier(s, dotted=False): - ''' - A function equivalent to the str.isidentifier method on Py3 - ''' - if dotted: - return all(isidentifier(a) for a in s.split('.')) - if PY3: - return s.isidentifier() - else: - import re - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - return bool(_name_re.match(s)) - - -def viewitems(obj, **kwargs): - """ - Function for iterating over dictionary items with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewitems", None) - if not func: - func = obj.items - return func(**kwargs) - - -def viewkeys(obj, **kwargs): - """ - Function for iterating over dictionary keys with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewkeys", None) - if not func: - func = obj.keys - return func(**kwargs) - - -def viewvalues(obj, **kwargs): - """ - Function for iterating over dictionary values with the same set-like - behaviour on Py2.7 as on Py3. - - Passes kwargs to method.""" - func = getattr(obj, "viewvalues", None) - if not func: - func = obj.values - return func(**kwargs) - - -def iteritems(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewitems(). - """ - func = getattr(obj, "iteritems", None) - if not func: - func = obj.items - return func(**kwargs) - - -def iterkeys(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewkeys(). - """ - func = getattr(obj, "iterkeys", None) - if not func: - func = obj.keys - return func(**kwargs) - - -def itervalues(obj, **kwargs): - """Use this only if compatibility with Python versions before 2.7 is - required. Otherwise, prefer viewvalues(). - """ - func = getattr(obj, "itervalues", None) - if not func: - func = obj.values - return func(**kwargs) - - -def bind_method(cls, name, func): - """Bind a method to class, python 2 and python 3 compatible. - - Parameters - ---------- - - cls : type - class to receive bound method - name : basestring - name of method on class instance - func : function - function to be bound as method - - Returns - ------- - None - """ - # only python 2 has an issue with bound/unbound methods - if not PY3: - setattr(cls, name, types.MethodType(func, None, cls)) - else: - setattr(cls, name, func) - - -def getexception(): - return sys.exc_info()[1] - - -def _get_caller_globals_and_locals(): - """ - Returns the globals and locals of the calling frame. - - Is there an alternative to frame hacking here? - """ - caller_frame = inspect.stack()[2] - myglobals = caller_frame[0].f_globals - mylocals = caller_frame[0].f_locals - return myglobals, mylocals - - -def _repr_strip(mystring): - """ - Returns the string without any initial or final quotes. - """ - r = repr(mystring) - if r.startswith("'") and r.endswith("'"): - return r[1:-1] - else: - return r - - -if PY3: - def raise_from(exc, cause): - """ - Equivalent to: - - raise EXCEPTION from CAUSE - - on Python 3. (See PEP 3134). - """ - myglobals, mylocals = _get_caller_globals_and_locals() - - # We pass the exception and cause along with other globals - # when we exec(): - myglobals = myglobals.copy() - myglobals['__python_future_raise_from_exc'] = exc - myglobals['__python_future_raise_from_cause'] = cause - execstr = "raise __python_future_raise_from_exc from __python_future_raise_from_cause" - exec(execstr, myglobals, mylocals) - - def raise_(tp, value=None, tb=None): - """ - A function that matches the Python 2.x ``raise`` statement. This - allows re-raising exceptions with the cls value and traceback on - Python 2 and 3. - """ - if isinstance(tp, BaseException): - # If the first object is an instance, the type of the exception - # is the class of the instance, the instance itself is the value, - # and the second object must be None. - if value is not None: - raise TypeError("instance exception may not have a separate value") - exc = tp - elif isinstance(tp, type) and not issubclass(tp, BaseException): - # If the first object is a class, it becomes the type of the - # exception. - raise TypeError("class must derive from BaseException, not %s" % tp.__name__) - else: - # The second object is used to determine the exception value: If it - # is an instance of the class, the instance becomes the exception - # value. If the second object is a tuple, it is used as the argument - # list for the class constructor; if it is None, an empty argument - # list is used, and any other object is treated as a single argument - # to the constructor. The instance so created by calling the - # constructor is used as the exception value. - if isinstance(value, tp): - exc = value - elif isinstance(value, tuple): - exc = tp(*value) - elif value is None: - exc = tp() - else: - exc = tp(value) - - if exc.__traceback__ is not tb: - raise exc.with_traceback(tb) - raise exc - - def raise_with_traceback(exc, traceback=Ellipsis): - if traceback == Ellipsis: - _, _, traceback = sys.exc_info() - raise exc.with_traceback(traceback) - -else: - def raise_from(exc, cause): - """ - Equivalent to: - - raise EXCEPTION from CAUSE - - on Python 3. (See PEP 3134). - """ - # Is either arg an exception class (e.g. IndexError) rather than - # instance (e.g. IndexError('my message here')? If so, pass the - # name of the class undisturbed through to "raise ... from ...". - if isinstance(exc, type) and issubclass(exc, Exception): - e = exc() - # exc = exc.__name__ - # execstr = "e = " + _repr_strip(exc) + "()" - # myglobals, mylocals = _get_caller_globals_and_locals() - # exec(execstr, myglobals, mylocals) - else: - e = exc - e.__suppress_context__ = False - if isinstance(cause, type) and issubclass(cause, Exception): - e.__cause__ = cause() - e.__cause__.__traceback__ = sys.exc_info()[2] - e.__suppress_context__ = True - elif cause is None: - e.__cause__ = None - e.__suppress_context__ = True - elif isinstance(cause, BaseException): - e.__cause__ = cause - object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2]) - e.__suppress_context__ = True - else: - raise TypeError("exception causes must derive from BaseException") - e.__context__ = sys.exc_info()[1] - raise e - - exec(''' -def raise_(tp, value=None, tb=None): - raise tp, value, tb - -def raise_with_traceback(exc, traceback=Ellipsis): - if traceback == Ellipsis: - _, _, traceback = sys.exc_info() - raise exc, None, traceback -'''.strip()) - - -raise_with_traceback.__doc__ = ( -"""Raise exception with existing traceback. -If traceback is not passed, uses sys.exc_info() to get traceback.""" -) - - -# Deprecated alias for backward compatibility with ``future`` versions < 0.11: -reraise = raise_ - - -def implements_iterator(cls): - ''' - From jinja2/_compat.py. License: BSD. - - Use as a decorator like this:: - - @implements_iterator - class UppercasingIterator(object): - def __init__(self, iterable): - self._iter = iter(iterable) - def __iter__(self): - return self - def __next__(self): - return next(self._iter).upper() - - ''' - if PY3: - return cls - else: - cls.next = cls.__next__ - del cls.__next__ - return cls - -if PY3: - get_next = lambda x: x.next -else: - get_next = lambda x: x.__next__ - - -def encode_filename(filename): - if PY3: - return filename - else: - if isinstance(filename, unicode): - return filename.encode('utf-8') - return filename - - -def is_new_style(cls): - """ - Python 2.7 has both new-style and old-style classes. Old-style classes can - be pesky in some circumstances, such as when using inheritance. Use this - function to test for whether a class is new-style. (Python 3 only has - new-style classes.) - """ - return hasattr(cls, '__class__') and ('__dict__' in dir(cls) - or hasattr(cls, '__slots__')) - -# The native platform string and bytes types. Useful because ``str`` and -# ``bytes`` are redefined on Py2 by ``from future.builtins import *``. -native_str = str -native_bytes = bytes - - -def istext(obj): - """ - Deprecated. Use:: - >>> isinstance(obj, str) - after this import: - >>> from future.builtins import str - """ - return isinstance(obj, type(u'')) - - -def isbytes(obj): - """ - Deprecated. Use:: - >>> isinstance(obj, bytes) - after this import: - >>> from future.builtins import bytes - """ - return isinstance(obj, type(b'')) - - -def isnewbytes(obj): - """ - Equivalent to the result of ``type(obj) == type(newbytes)`` - in other words, it is REALLY a newbytes instance, not a Py2 native str - object? - - Note that this does not cover subclasses of newbytes, and it is not - equivalent to ininstance(obj, newbytes) - """ - return type(obj).__name__ == 'newbytes' - - -def isint(obj): - """ - Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or - ``long``. - - Instead of using this function, you can use: - - >>> from future.builtins import int - >>> isinstance(obj, int) - - The following idiom is equivalent: - - >>> from numbers import Integral - >>> isinstance(obj, Integral) - """ - - return isinstance(obj, numbers.Integral) - - -def native(obj): - """ - On Py3, this is a no-op: native(obj) -> obj - - On Py2, returns the corresponding native Py2 types that are - superclasses for backported objects from Py3: - - >>> from builtins import str, bytes, int - - >>> native(str(u'ABC')) - u'ABC' - >>> type(native(str(u'ABC'))) - unicode - - >>> native(bytes(b'ABC')) - b'ABC' - >>> type(native(bytes(b'ABC'))) - bytes - - >>> native(int(10**20)) - 100000000000000000000L - >>> type(native(int(10**20))) - long - - Existing native types on Py2 will be returned unchanged: - - >>> type(native(u'ABC')) - unicode - """ - if hasattr(obj, '__native__'): - return obj.__native__() - else: - return obj - - -# Implementation of exec_ is from ``six``: -if PY3: - import builtins - exec_ = getattr(builtins, "exec") -else: - def exec_(code, globs=None, locs=None): - """Execute code in a namespace.""" - if globs is None: - frame = sys._getframe(1) - globs = frame.f_globals - if locs is None: - locs = frame.f_locals - del frame - elif locs is None: - locs = globs - exec("""exec code in globs, locs""") - - -# Defined here for backward compatibility: -def old_div(a, b): - """ - DEPRECATED: import ``old_div`` from ``past.utils`` instead. - - Equivalent to ``a / b`` on Python 2 without ``from __future__ import - division``. - - TODO: generalize this to other objects (like arrays etc.) - """ - if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): - return a // b - else: - return a / b - - -def as_native_str(encoding='utf-8'): - ''' - A decorator to turn a function or method call that returns text, i.e. - unicode, into one that returns a native platform str. - - Use it as a decorator like this:: - - from __future__ import unicode_literals - - class MyClass(object): - @as_native_str(encoding='ascii') - def __repr__(self): - return next(self._iter).upper() - ''' - if PY3: - return lambda f: f - else: - def encoder(f): - @functools.wraps(f) - def wrapper(*args, **kwargs): - return f(*args, **kwargs).encode(encoding=encoding) - return wrapper - return encoder - -# listvalues and listitems definitions from Nick Coghlan's (withdrawn) -# PEP 496: -try: - dict.iteritems -except AttributeError: - # Python 3 - def listvalues(d): - return list(d.values()) - def listitems(d): - return list(d.items()) -else: - # Python 2 - def listvalues(d): - return d.values() - def listitems(d): - return d.items() - -if PY3: - def ensure_new_type(obj): - return obj -else: - def ensure_new_type(obj): - from future.types.newbytes import newbytes - from future.types.newstr import newstr - from future.types.newint import newint - from future.types.newdict import newdict - - native_type = type(native(obj)) - - # Upcast only if the type is already a native (non-future) type - if issubclass(native_type, type(obj)): - # Upcast - if native_type == str: # i.e. Py2 8-bit str - return newbytes(obj) - elif native_type == unicode: - return newstr(obj) - elif native_type == int: - return newint(obj) - elif native_type == long: - return newint(obj) - elif native_type == dict: - return newdict(obj) - else: - return obj - else: - # Already a new type - assert type(obj) in [newbytes, newstr] - return obj - - -__all__ = ['PY2', 'PY26', 'PY3', 'PYPY', - 'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr', - 'bytes_to_native_str', 'class_types', 'encode_filename', - 'ensure_new_type', 'exec_', 'get_next', 'getexception', - 'implements_iterator', 'integer_types', 'is_new_style', 'isbytes', - 'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems', - 'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues', - 'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str', - 'native_str_to_bytes', 'old_div', - 'python_2_unicode_compatible', 'raise_', - 'raise_with_traceback', 'reraise', 'string_types', - 'text_to_native_str', 'text_type', 'tobytes', 'viewitems', - 'viewkeys', 'viewvalues', 'with_metaclass' - ] diff --git a/contrib/python/future/future/utils/surrogateescape.py b/contrib/python/future/future/utils/surrogateescape.py deleted file mode 100644 index 0dcc9fa6e60..00000000000 --- a/contrib/python/future/future/utils/surrogateescape.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error -handler of Python 3. - -Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc -""" - -# This code is released under the Python license and the BSD 2-clause license - -import codecs -import sys - -from future import utils - - -FS_ERRORS = 'surrogateescape' - -# # -- Python 2/3 compatibility ------------------------------------- -# FS_ERRORS = 'my_surrogateescape' - -def u(text): - if utils.PY3: - return text - else: - return text.decode('unicode_escape') - -def b(data): - if utils.PY3: - return data.encode('latin1') - else: - return data - -if utils.PY3: - _unichr = chr - bytes_chr = lambda code: bytes((code,)) -else: - _unichr = unichr - bytes_chr = chr - -def surrogateescape_handler(exc): - """ - Pure Python implementation of the PEP 383: the "surrogateescape" error - handler of Python 3. Undecodable bytes will be replaced by a Unicode - character U+DCxx on decoding, and these are translated into the - original bytes on encoding. - """ - mystring = exc.object[exc.start:exc.end] - - try: - if isinstance(exc, UnicodeDecodeError): - # mystring is a byte-string in this case - decoded = replace_surrogate_decode(mystring) - elif isinstance(exc, UnicodeEncodeError): - # In the case of u'\udcc3'.encode('ascii', - # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an - # exception anyway after this function is called, even though I think - # it's doing what it should. It seems that the strict encoder is called - # to encode the unicode string that this function returns ... - decoded = replace_surrogate_encode(mystring) - else: - raise exc - except NotASurrogateError: - raise exc - return (decoded, exc.end) - - -class NotASurrogateError(Exception): - pass - - -def replace_surrogate_encode(mystring): - """ - Returns a (unicode) string, not the more logical bytes, because the codecs - register_error functionality expects this. - """ - decoded = [] - for ch in mystring: - # if utils.PY3: - # code = ch - # else: - code = ord(ch) - - # The following magic comes from Py3.3's Python/codecs.c file: - if not 0xD800 <= code <= 0xDCFF: - # Not a surrogate. Fail with the original exception. - raise NotASurrogateError - # mybytes = [0xe0 | (code >> 12), - # 0x80 | ((code >> 6) & 0x3f), - # 0x80 | (code & 0x3f)] - # Is this a good idea? - if 0xDC00 <= code <= 0xDC7F: - decoded.append(_unichr(code - 0xDC00)) - elif code <= 0xDCFF: - decoded.append(_unichr(code - 0xDC00)) - else: - raise NotASurrogateError - return str().join(decoded) - - -def replace_surrogate_decode(mybytes): - """ - Returns a (unicode) string - """ - decoded = [] - for ch in mybytes: - # We may be parsing newbytes (in which case ch is an int) or a native - # str on Py2 - if isinstance(ch, int): - code = ch - else: - code = ord(ch) - if 0x80 <= code <= 0xFF: - decoded.append(_unichr(0xDC00 + code)) - elif code <= 0x7F: - decoded.append(_unichr(code)) - else: - # # It may be a bad byte - # # Try swallowing it. - # continue - # print("RAISE!") - raise NotASurrogateError - return str().join(decoded) - - -def encodefilename(fn): - if FS_ENCODING == 'ascii': - # ASCII encoder of Python 2 expects that the error handler returns a - # Unicode string encodable to ASCII, whereas our surrogateescape error - # handler has to return bytes in 0x80-0xFF range. - encoded = [] - for index, ch in enumerate(fn): - code = ord(ch) - if code < 128: - ch = bytes_chr(code) - elif 0xDC80 <= code <= 0xDCFF: - ch = bytes_chr(code - 0xDC00) - else: - raise UnicodeEncodeError(FS_ENCODING, - fn, index, index+1, - 'ordinal not in range(128)') - encoded.append(ch) - return bytes().join(encoded) - elif FS_ENCODING == 'utf-8': - # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF - # doesn't go through our error handler - encoded = [] - for index, ch in enumerate(fn): - code = ord(ch) - if 0xD800 <= code <= 0xDFFF: - if 0xDC80 <= code <= 0xDCFF: - ch = bytes_chr(code - 0xDC00) - encoded.append(ch) - else: - raise UnicodeEncodeError( - FS_ENCODING, - fn, index, index+1, 'surrogates not allowed') - else: - ch_utf8 = ch.encode('utf-8') - encoded.append(ch_utf8) - return bytes().join(encoded) - else: - return fn.encode(FS_ENCODING, FS_ERRORS) - -def decodefilename(fn): - return fn.decode(FS_ENCODING, FS_ERRORS) - -FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') -# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') -# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') - - -# normalize the filesystem encoding name. -# For example, we expect "utf-8", not "UTF8". -FS_ENCODING = codecs.lookup(FS_ENCODING).name - - -def register_surrogateescape(): - """ - Registers the surrogateescape error handler on Python 2 (only) - """ - if utils.PY3: - return - try: - codecs.lookup_error(FS_ERRORS) - except LookupError: - codecs.register_error(FS_ERRORS, surrogateescape_handler) - - -if __name__ == '__main__': - pass - # # Tests: - # register_surrogateescape() - - # b = decodefilename(fn) - # assert b == encoded, "%r != %r" % (b, encoded) - # c = encodefilename(b) - # assert c == fn, '%r != %r' % (c, fn) - # # print("ok") diff --git a/contrib/python/future/html/__init__.py b/contrib/python/future/html/__init__.py deleted file mode 100644 index e957e745708..00000000000 --- a/contrib/python/future/html/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - from future.moves.html import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/html/entities.py b/contrib/python/future/html/entities.py deleted file mode 100644 index 211649e5313..00000000000 --- a/contrib/python/future/html/entities.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import -from future.utils import PY3 - -if PY3: - from html.entities import * -else: - from future.moves.html.entities import * diff --git a/contrib/python/future/html/parser.py b/contrib/python/future/html/parser.py deleted file mode 100644 index e39488797eb..00000000000 --- a/contrib/python/future/html/parser.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] >= 3: - raise ImportError('Cannot import module from python-future source folder') -else: - from future.moves.html.parser import * diff --git a/contrib/python/future/http/__init__.py b/contrib/python/future/http/__init__.py deleted file mode 100644 index e4f853e53ce..00000000000 --- a/contrib/python/future/http/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - pass -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/http/client.py b/contrib/python/future/http/client.py deleted file mode 100644 index a6a31006bde..00000000000 --- a/contrib/python/future/http/client.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 - -from httplib import * -from httplib import HTTPMessage - -# These constants aren't included in __all__ in httplib.py: - -from httplib import (HTTP_PORT, - HTTPS_PORT, - - CONTINUE, - SWITCHING_PROTOCOLS, - PROCESSING, - - OK, - CREATED, - ACCEPTED, - NON_AUTHORITATIVE_INFORMATION, - NO_CONTENT, - RESET_CONTENT, - PARTIAL_CONTENT, - MULTI_STATUS, - IM_USED, - - MULTIPLE_CHOICES, - MOVED_PERMANENTLY, - FOUND, - SEE_OTHER, - NOT_MODIFIED, - USE_PROXY, - TEMPORARY_REDIRECT, - - BAD_REQUEST, - UNAUTHORIZED, - PAYMENT_REQUIRED, - FORBIDDEN, - NOT_FOUND, - METHOD_NOT_ALLOWED, - NOT_ACCEPTABLE, - PROXY_AUTHENTICATION_REQUIRED, - REQUEST_TIMEOUT, - CONFLICT, - GONE, - LENGTH_REQUIRED, - PRECONDITION_FAILED, - REQUEST_ENTITY_TOO_LARGE, - REQUEST_URI_TOO_LONG, - UNSUPPORTED_MEDIA_TYPE, - REQUESTED_RANGE_NOT_SATISFIABLE, - EXPECTATION_FAILED, - UNPROCESSABLE_ENTITY, - LOCKED, - FAILED_DEPENDENCY, - UPGRADE_REQUIRED, - - INTERNAL_SERVER_ERROR, - NOT_IMPLEMENTED, - BAD_GATEWAY, - SERVICE_UNAVAILABLE, - GATEWAY_TIMEOUT, - HTTP_VERSION_NOT_SUPPORTED, - INSUFFICIENT_STORAGE, - NOT_EXTENDED, - - MAXAMOUNT, - ) - -# These are not available on Python 2.6.x: -try: - from httplib import LineTooLong, LineAndFileWrapper -except ImportError: - pass - -# These may not be available on all versions of Python 2.6.x or 2.7.x -try: - from httplib import ( - _CS_IDLE, - _CS_REQ_STARTED, - _CS_REQ_SENT, - _MAXLINE, - _MAXHEADERS, - _is_legal_header_name, - _is_illegal_header_value, - _METHODS_EXPECTING_BODY - ) -except ImportError: - pass diff --git a/contrib/python/future/http/cookiejar.py b/contrib/python/future/http/cookiejar.py deleted file mode 100644 index d847b2bf2d0..00000000000 --- a/contrib/python/future/http/cookiejar.py +++ /dev/null @@ -1,6 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 - -from cookielib import * diff --git a/contrib/python/future/http/cookies.py b/contrib/python/future/http/cookies.py deleted file mode 100644 index eb2a82388b8..00000000000 --- a/contrib/python/future/http/cookies.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 - -from Cookie import * -from Cookie import Morsel # left out of __all__ on Py2.7! diff --git a/contrib/python/future/http/server.py b/contrib/python/future/http/server.py deleted file mode 100644 index 29710557820..00000000000 --- a/contrib/python/future/http/server.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 - -from BaseHTTPServer import * -from CGIHTTPServer import * -from SimpleHTTPServer import * -try: - from CGIHTTPServer import _url_collapse_path # needed for a test -except ImportError: - try: - # Python 2.7.0 to 2.7.3 - from CGIHTTPServer import ( - _url_collapse_path_split as _url_collapse_path) - except ImportError: - # Doesn't exist on Python 2.6.x. Ignore it. - pass diff --git a/contrib/python/future/past/__init__.py b/contrib/python/future/past/__init__.py deleted file mode 100644 index 14713039332..00000000000 --- a/contrib/python/future/past/__init__.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -""" -past: compatibility with Python 2 from Python 3 -=============================================== - -``past`` is a package to aid with Python 2/3 compatibility. Whereas ``future`` -contains backports of Python 3 constructs to Python 2, ``past`` provides -implementations of some Python 2 constructs in Python 3 and tools to import and -run Python 2 code in Python 3. It is intended to be used sparingly, as a way of -running old Python 2 code from Python 3 until the code is ported properly. - -Potential uses for libraries: - -- as a step in porting a Python 2 codebase to Python 3 (e.g. with the ``futurize`` script) -- to provide Python 3 support for previously Python 2-only libraries with the - same APIs as on Python 2 -- particularly with regard to 8-bit strings (the - ``past.builtins.str`` type). -- to aid in providing minimal-effort Python 3 support for applications using - libraries that do not yet wish to upgrade their code properly to Python 3, or - wish to upgrade it gradually to Python 3 style. - - -Here are some code examples that run identically on Python 3 and 2:: - - >>> from past.builtins import str as oldstr - - >>> philosopher = oldstr(u'\u5b54\u5b50'.encode('utf-8')) - >>> # This now behaves like a Py2 byte-string on both Py2 and Py3. - >>> # For example, indexing returns a Python 2-like string object, not - >>> # an integer: - >>> philosopher[0] - '\xe5' - >>> type(philosopher[0]) - <past.builtins.oldstr> - - >>> # List-producing versions of range, reduce, map, filter - >>> from past.builtins import range, reduce - >>> range(10) - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - >>> reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) - 15 - - >>> # Other functions removed in Python 3 are resurrected ... - >>> from past.builtins import execfile - >>> execfile('myfile.py') - - >>> from past.builtins import raw_input - >>> name = raw_input('What is your name? ') - What is your name? [cursor] - - >>> from past.builtins import reload - >>> reload(mymodule) # equivalent to imp.reload(mymodule) in Python 3 - - >>> from past.builtins import xrange - >>> for i in xrange(10): - ... pass - - -It also provides import hooks so you can import and use Python 2 modules like -this:: - - $ python3 - - >>> from past.translation import autotranslate - >>> authotranslate('mypy2module') - >>> import mypy2module - -until the authors of the Python 2 modules have upgraded their code. Then, for -example:: - - >>> mypy2module.func_taking_py2_string(oldstr(b'abcd')) - - -Credits -------- - -:Author: Ed Schofield, Jordan M. Adler, et al -:Sponsor: Python Charmers Pty Ltd, Australia: http://pythoncharmers.com - - -Licensing ---------- -Copyright 2013-2019 Python Charmers Pty Ltd, Australia. -The software is distributed under an MIT licence. See LICENSE.txt. -""" - -from future import __version__, __copyright__, __license__ - -__title__ = 'past' -__author__ = 'Ed Schofield' diff --git a/contrib/python/future/past/builtins/__init__.py b/contrib/python/future/past/builtins/__init__.py deleted file mode 100644 index 1b19e373c87..00000000000 --- a/contrib/python/future/past/builtins/__init__.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -A resurrection of some old functions from Python 2 for use in Python 3. These -should be used sparingly, to help with porting efforts, since code using them -is no longer standard Python 3 code. - -This module provides the following: - -1. Implementations of these builtin functions which have no equivalent on Py3: - -- apply -- chr -- cmp -- execfile - -2. Aliases: - -- intern <- sys.intern -- raw_input <- input -- reduce <- functools.reduce -- reload <- imp.reload -- unichr <- chr -- unicode <- str -- xrange <- range - -3. List-producing versions of the corresponding Python 3 iterator-producing functions: - -- filter -- map -- range -- zip - -4. Forward-ported Py2 types: - -- basestring -- dict -- str -- long -- unicode - -""" - -from future.utils import PY3 -from past.builtins.noniterators import (filter, map, range, reduce, zip) -# from past.builtins.misc import (ascii, hex, input, oct, open) -if PY3: - from past.types import (basestring, - olddict as dict, - oldstr as str, - long, - unicode) -else: - from __builtin__ import (basestring, dict, str, long, unicode) - -from past.builtins.misc import (apply, chr, cmp, execfile, intern, oct, - raw_input, reload, unichr, unicode, xrange) -from past import utils - - -if utils.PY3: - # We only import names that shadow the builtins on Py3. No other namespace - # pollution on Py3. - - # Only shadow builtins on Py3; no new names - __all__ = ['filter', 'map', 'range', 'reduce', 'zip', - 'basestring', 'dict', 'str', 'long', 'unicode', - 'apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', - 'reload', 'unichr', 'xrange' - ] - -else: - # No namespace pollution on Py2 - __all__ = [] diff --git a/contrib/python/future/past/builtins/misc.py b/contrib/python/future/past/builtins/misc.py deleted file mode 100644 index a1bae82f5b8..00000000000 --- a/contrib/python/future/past/builtins/misc.py +++ /dev/null @@ -1,97 +0,0 @@ -from __future__ import unicode_literals - -import inspect - -from future.utils import PY2, PY3, exec_ - -if PY2: - from collections import Mapping -else: - from collections.abc import Mapping - -if PY3: - import builtins - from collections.abc import Mapping - - def apply(f, *args, **kw): - return f(*args, **kw) - - from past.builtins import str as oldstr - - def chr(i): - """ - Return a byte-string of one character with ordinal i; 0 <= i <= 256 - """ - return oldstr(bytes((i,))) - - def cmp(x, y): - """ - cmp(x, y) -> integer - - Return negative if x<y, zero if x==y, positive if x>y. - """ - return (x > y) - (x < y) - - from sys import intern - - def oct(number): - """oct(number) -> string - - Return the octal representation of an integer - """ - return '0' + builtins.oct(number)[2:] - - import warnings - warnings.filterwarnings("ignore", category=DeprecationWarning, module="past.builtins.misc") - - raw_input = input - from imp import reload - unicode = str - unichr = chr - xrange = range -else: - import __builtin__ - from collections import Mapping - apply = __builtin__.apply - chr = __builtin__.chr - cmp = __builtin__.cmp - execfile = __builtin__.execfile - intern = __builtin__.intern - oct = __builtin__.oct - raw_input = __builtin__.raw_input - reload = __builtin__.reload - unicode = __builtin__.unicode - unichr = __builtin__.unichr - xrange = __builtin__.xrange - - -if PY3: - def execfile(filename, myglobals=None, mylocals=None): - """ - Read and execute a Python script from a file in the given namespaces. - The globals and locals are dictionaries, defaulting to the current - globals and locals. If only globals is given, locals defaults to it. - """ - if myglobals is None: - # There seems to be no alternative to frame hacking here. - caller_frame = inspect.stack()[1] - myglobals = caller_frame[0].f_globals - mylocals = caller_frame[0].f_locals - elif mylocals is None: - # Only if myglobals is given do we set mylocals to it. - mylocals = myglobals - if not isinstance(myglobals, Mapping): - raise TypeError('globals must be a mapping') - if not isinstance(mylocals, Mapping): - raise TypeError('locals must be a mapping') - with open(filename, "rb") as fin: - source = fin.read() - code = compile(source, filename, "exec") - exec_(code, myglobals, mylocals) - - -if PY3: - __all__ = ['apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', - 'reload', 'unichr', 'unicode', 'xrange'] -else: - __all__ = [] diff --git a/contrib/python/future/past/builtins/noniterators.py b/contrib/python/future/past/builtins/noniterators.py deleted file mode 100644 index 183ffffda48..00000000000 --- a/contrib/python/future/past/builtins/noniterators.py +++ /dev/null @@ -1,272 +0,0 @@ -""" -This module is designed to be used as follows:: - - from past.builtins.noniterators import filter, map, range, reduce, zip - -And then, for example:: - - assert isinstance(range(5), list) - -The list-producing functions this brings in are:: - -- ``filter`` -- ``map`` -- ``range`` -- ``reduce`` -- ``zip`` - -""" - -from __future__ import division, absolute_import, print_function - -from itertools import chain, starmap -import itertools # since zip_longest doesn't exist on Py2 -from past.types import basestring -from past.utils import PY3 - - -def flatmap(f, items): - return chain.from_iterable(map(f, items)) - - -if PY3: - import builtins - - # list-producing versions of the major Python iterating functions - def oldfilter(*args): - """ - filter(function or None, sequence) -> list, tuple, or string - - Return those items of sequence for which function(item) is true. - If function is None, return the items that are true. If sequence - is a tuple or string, return the same type, else return a list. - """ - mytype = type(args[1]) - if isinstance(args[1], basestring): - return mytype().join(builtins.filter(*args)) - elif isinstance(args[1], (tuple, list)): - return mytype(builtins.filter(*args)) - else: - # Fall back to list. Is this the right thing to do? - return list(builtins.filter(*args)) - - # This is surprisingly difficult to get right. For example, the - # solutions here fail with the test cases in the docstring below: - # http://stackoverflow.com/questions/8072755/ - def oldmap(func, *iterables): - """ - map(function, sequence[, sequence, ...]) -> list - - Return a list of the results of applying the function to the - items of the argument sequence(s). If more than one sequence is - given, the function is called with an argument list consisting of - the corresponding item of each sequence, substituting None for - missing values when not all sequences have the same length. If - the function is None, return a list of the items of the sequence - (or a list of tuples if more than one sequence). - - Test cases: - >>> oldmap(None, 'hello world') - ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'] - - >>> oldmap(None, range(4)) - [0, 1, 2, 3] - - More test cases are in test_past.test_builtins. - """ - zipped = itertools.zip_longest(*iterables) - l = list(zipped) - if len(l) == 0: - return [] - if func is None: - result = l - else: - result = list(starmap(func, l)) - - # Inspect to see whether it's a simple sequence of tuples - try: - if max([len(item) for item in result]) == 1: - return list(chain.from_iterable(result)) - # return list(flatmap(func, result)) - except TypeError as e: - # Simple objects like ints have no len() - pass - return result - - ############################ - ### For reference, the source code for Py2.7 map function: - # static PyObject * - # builtin_map(PyObject *self, PyObject *args) - # { - # typedef struct { - # PyObject *it; /* the iterator object */ - # int saw_StopIteration; /* bool: did the iterator end? */ - # } sequence; - # - # PyObject *func, *result; - # sequence *seqs = NULL, *sqp; - # Py_ssize_t n, len; - # register int i, j; - # - # n = PyTuple_Size(args); - # if (n < 2) { - # PyErr_SetString(PyExc_TypeError, - # "map() requires at least two args"); - # return NULL; - # } - # - # func = PyTuple_GetItem(args, 0); - # n--; - # - # if (func == Py_None) { - # if (PyErr_WarnPy3k("map(None, ...) not supported in 3.x; " - # "use list(...)", 1) < 0) - # return NULL; - # if (n == 1) { - # /* map(None, S) is the same as list(S). */ - # return PySequence_List(PyTuple_GetItem(args, 1)); - # } - # } - # - # /* Get space for sequence descriptors. Must NULL out the iterator - # * pointers so that jumping to Fail_2 later doesn't see trash. - # */ - # if ((seqs = PyMem_NEW(sequence, n)) == NULL) { - # PyErr_NoMemory(); - # return NULL; - # } - # for (i = 0; i < n; ++i) { - # seqs[i].it = (PyObject*)NULL; - # seqs[i].saw_StopIteration = 0; - # } - # - # /* Do a first pass to obtain iterators for the arguments, and set len - # * to the largest of their lengths. - # */ - # len = 0; - # for (i = 0, sqp = seqs; i < n; ++i, ++sqp) { - # PyObject *curseq; - # Py_ssize_t curlen; - # - # /* Get iterator. */ - # curseq = PyTuple_GetItem(args, i+1); - # sqp->it = PyObject_GetIter(curseq); - # if (sqp->it == NULL) { - # static char errmsg[] = - # "argument %d to map() must support iteration"; - # char errbuf[sizeof(errmsg) + 25]; - # PyOS_snprintf(errbuf, sizeof(errbuf), errmsg, i+2); - # PyErr_SetString(PyExc_TypeError, errbuf); - # goto Fail_2; - # } - # - # /* Update len. */ - # curlen = _PyObject_LengthHint(curseq, 8); - # if (curlen > len) - # len = curlen; - # } - # - # /* Get space for the result list. */ - # if ((result = (PyObject *) PyList_New(len)) == NULL) - # goto Fail_2; - # - # /* Iterate over the sequences until all have stopped. */ - # for (i = 0; ; ++i) { - # PyObject *alist, *item=NULL, *value; - # int numactive = 0; - # - # if (func == Py_None && n == 1) - # alist = NULL; - # else if ((alist = PyTuple_New(n)) == NULL) - # goto Fail_1; - # - # for (j = 0, sqp = seqs; j < n; ++j, ++sqp) { - # if (sqp->saw_StopIteration) { - # Py_INCREF(Py_None); - # item = Py_None; - # } - # else { - # item = PyIter_Next(sqp->it); - # if (item) - # ++numactive; - # else { - # if (PyErr_Occurred()) { - # Py_XDECREF(alist); - # goto Fail_1; - # } - # Py_INCREF(Py_None); - # item = Py_None; - # sqp->saw_StopIteration = 1; - # } - # } - # if (alist) - # PyTuple_SET_ITEM(alist, j, item); - # else - # break; - # } - # - # if (!alist) - # alist = item; - # - # if (numactive == 0) { - # Py_DECREF(alist); - # break; - # } - # - # if (func == Py_None) - # value = alist; - # else { - # value = PyEval_CallObject(func, alist); - # Py_DECREF(alist); - # if (value == NULL) - # goto Fail_1; - # } - # if (i >= len) { - # int status = PyList_Append(result, value); - # Py_DECREF(value); - # if (status < 0) - # goto Fail_1; - # } - # else if (PyList_SetItem(result, i, value) < 0) - # goto Fail_1; - # } - # - # if (i < len && PyList_SetSlice(result, i, len, NULL) < 0) - # goto Fail_1; - # - # goto Succeed; - # - # Fail_1: - # Py_DECREF(result); - # Fail_2: - # result = NULL; - # Succeed: - # assert(seqs); - # for (i = 0; i < n; ++i) - # Py_XDECREF(seqs[i].it); - # PyMem_DEL(seqs); - # return result; - # } - - def oldrange(*args, **kwargs): - return list(builtins.range(*args, **kwargs)) - - def oldzip(*args, **kwargs): - return list(builtins.zip(*args, **kwargs)) - - filter = oldfilter - map = oldmap - range = oldrange - from functools import reduce - zip = oldzip - __all__ = ['filter', 'map', 'range', 'reduce', 'zip'] - -else: - import __builtin__ - # Python 2-builtin ranges produce lists - filter = __builtin__.filter - map = __builtin__.map - range = __builtin__.range - reduce = __builtin__.reduce - zip = __builtin__.zip - __all__ = [] diff --git a/contrib/python/future/past/tests/__init__.py b/contrib/python/future/past/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/contrib/python/future/past/tests/__init__.py +++ /dev/null diff --git a/contrib/python/future/past/types/__init__.py b/contrib/python/future/past/types/__init__.py deleted file mode 100644 index 91dd270f2df..00000000000 --- a/contrib/python/future/past/types/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Forward-ports of types from Python 2 for use with Python 3: - -- ``basestring``: equivalent to ``(str, bytes)`` in ``isinstance`` checks -- ``dict``: with list-producing .keys() etc. methods -- ``str``: bytes-like, but iterating over them doesn't product integers -- ``long``: alias of Py3 int with ``L`` suffix in the ``repr`` -- ``unicode``: alias of Py3 str with ``u`` prefix in the ``repr`` - -""" - -from past import utils - -if utils.PY2: - import __builtin__ - basestring = __builtin__.basestring - dict = __builtin__.dict - str = __builtin__.str - long = __builtin__.long - unicode = __builtin__.unicode - __all__ = [] -else: - from .basestring import basestring - from .olddict import olddict - from .oldstr import oldstr - long = int - unicode = str - # from .unicode import unicode - __all__ = ['basestring', 'olddict', 'oldstr', 'long', 'unicode'] diff --git a/contrib/python/future/past/types/basestring.py b/contrib/python/future/past/types/basestring.py deleted file mode 100644 index 1cab22f6caf..00000000000 --- a/contrib/python/future/past/types/basestring.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -An implementation of the basestring type for Python 3 - -Example use: - ->>> s = b'abc' ->>> assert isinstance(s, basestring) ->>> from past.types import str as oldstr ->>> s2 = oldstr(b'abc') ->>> assert isinstance(s2, basestring) - -""" - -import sys - -from past.utils import with_metaclass, PY2 - -if PY2: - str = unicode - -ver = sys.version_info[:2] - - -class BaseBaseString(type): - def __instancecheck__(cls, instance): - return isinstance(instance, (bytes, str)) - - def __subclasshook__(cls, thing): - # TODO: What should go here? - raise NotImplemented - - -class basestring(with_metaclass(BaseBaseString)): - """ - A minimal backport of the Python 2 basestring type to Py3 - """ - - -__all__ = ['basestring'] diff --git a/contrib/python/future/past/types/olddict.py b/contrib/python/future/past/types/olddict.py deleted file mode 100644 index f4f92a26a6b..00000000000 --- a/contrib/python/future/past/types/olddict.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -A dict subclass for Python 3 that behaves like Python 2's dict - -Example use: - ->>> from past.builtins import dict ->>> d1 = dict() # instead of {} for an empty dict ->>> d2 = dict(key1='value1', key2='value2') - -The keys, values and items methods now return lists on Python 3.x and there are -methods for iterkeys, itervalues, iteritems, and viewkeys etc. - ->>> for d in (d1, d2): -... assert isinstance(d.keys(), list) -... assert isinstance(d.values(), list) -... assert isinstance(d.items(), list) -""" - -import sys - -from past.utils import with_metaclass - - -_builtin_dict = dict -ver = sys.version_info[:2] - - -class BaseOldDict(type): - def __instancecheck__(cls, instance): - return isinstance(instance, _builtin_dict) - - -class olddict(with_metaclass(BaseOldDict, _builtin_dict)): - """ - A backport of the Python 3 dict object to Py2 - """ - iterkeys = _builtin_dict.keys - viewkeys = _builtin_dict.keys - - def keys(self): - return list(super(olddict, self).keys()) - - itervalues = _builtin_dict.values - viewvalues = _builtin_dict.values - - def values(self): - return list(super(olddict, self).values()) - - iteritems = _builtin_dict.items - viewitems = _builtin_dict.items - - def items(self): - return list(super(olddict, self).items()) - - def has_key(self, k): - """ - D.has_key(k) -> True if D has a key k, else False - """ - return k in self - - # def __new__(cls, *args, **kwargs): - # """ - # dict() -> new empty dictionary - # dict(mapping) -> new dictionary initialized from a mapping object's - # (key, value) pairs - # dict(iterable) -> new dictionary initialized as if via: - # d = {} - # for k, v in iterable: - # d[k] = v - # dict(**kwargs) -> new dictionary initialized with the name=value pairs - # in the keyword argument list. For example: dict(one=1, two=2) - - # """ - # - # if len(args) == 0: - # return super(olddict, cls).__new__(cls) - # # Was: elif isinstance(args[0], newbytes): - # # We use type() instead of the above because we're redefining - # # this to be True for all unicode string subclasses. Warning: - # # This may render newstr un-subclassable. - # elif type(args[0]) == olddict: - # return args[0] - # # elif isinstance(args[0], _builtin_dict): - # # value = args[0] - # else: - # value = args[0] - # return super(olddict, cls).__new__(cls, value) - - def __native__(self): - """ - Hook for the past.utils.native() function - """ - return super(oldbytes, self) - - -__all__ = ['olddict'] diff --git a/contrib/python/future/past/types/oldstr.py b/contrib/python/future/past/types/oldstr.py deleted file mode 100644 index a477d8844e1..00000000000 --- a/contrib/python/future/past/types/oldstr.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Pure-Python implementation of a Python 2-like str object for Python 3. -""" - -from numbers import Integral - -from past.utils import PY2, with_metaclass - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - -_builtin_bytes = bytes - - -class BaseOldStr(type): - def __instancecheck__(cls, instance): - return isinstance(instance, _builtin_bytes) - - -def unescape(s): - """ - Interprets strings with escape sequences - - Example: - >>> s = unescape(r'abc\\def') # i.e. 'abc\\\\def' - >>> print(s) - 'abc\def' - >>> s2 = unescape('abc\\ndef') - >>> len(s2) - 8 - >>> print(s2) - abc - def - """ - return s.encode().decode('unicode_escape') - - -class oldstr(with_metaclass(BaseOldStr, _builtin_bytes)): - """ - A forward port of the Python 2 8-bit string object to Py3 - """ - # Python 2 strings have no __iter__ method: - @property - def __iter__(self): - raise AttributeError - - def __dir__(self): - return [thing for thing in dir(_builtin_bytes) if thing != '__iter__'] - - # def __new__(cls, *args, **kwargs): - # """ - # From the Py3 bytes docstring: - - # bytes(iterable_of_ints) -> bytes - # bytes(string, encoding[, errors]) -> bytes - # bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer - # bytes(int) -> bytes object of size given by the parameter initialized with null bytes - # bytes() -> empty bytes object - # - # Construct an immutable array of bytes from: - # - an iterable yielding integers in range(256) - # - a text string encoded using the specified encoding - # - any object implementing the buffer API. - # - an integer - # """ - # - # if len(args) == 0: - # return super(newbytes, cls).__new__(cls) - # # Was: elif isinstance(args[0], newbytes): - # # We use type() instead of the above because we're redefining - # # this to be True for all unicode string subclasses. Warning: - # # This may render newstr un-subclassable. - # elif type(args[0]) == newbytes: - # return args[0] - # elif isinstance(args[0], _builtin_bytes): - # value = args[0] - # elif isinstance(args[0], unicode): - # if 'encoding' not in kwargs: - # raise TypeError('unicode string argument without an encoding') - # ### - # # Was: value = args[0].encode(**kwargs) - # # Python 2.6 string encode() method doesn't take kwargs: - # # Use this instead: - # newargs = [kwargs['encoding']] - # if 'errors' in kwargs: - # newargs.append(kwargs['errors']) - # value = args[0].encode(*newargs) - # ### - # elif isinstance(args[0], Iterable): - # if len(args[0]) == 0: - # # What is this? - # raise ValueError('unknown argument type') - # elif len(args[0]) > 0 and isinstance(args[0][0], Integral): - # # It's a list of integers - # value = b''.join([chr(x) for x in args[0]]) - # else: - # raise ValueError('item cannot be interpreted as an integer') - # elif isinstance(args[0], Integral): - # if args[0] < 0: - # raise ValueError('negative count') - # value = b'\x00' * args[0] - # else: - # value = args[0] - # return super(newbytes, cls).__new__(cls, value) - - def __repr__(self): - s = super(oldstr, self).__repr__() # e.g. b'abc' on Py3, b'abc' on Py3 - return s[1:] - - def __str__(self): - s = super(oldstr, self).__str__() # e.g. "b'abc'" or "b'abc\\ndef' - # TODO: fix this: - assert s[:2] == "b'" and s[-1] == "'" - return unescape(s[2:-1]) # e.g. 'abc' or 'abc\ndef' - - def __getitem__(self, y): - if isinstance(y, Integral): - return super(oldstr, self).__getitem__(slice(y, y+1)) - else: - return super(oldstr, self).__getitem__(y) - - def __getslice__(self, *args): - return self.__getitem__(slice(*args)) - - def __contains__(self, key): - if isinstance(key, int): - return False - - def __native__(self): - return bytes(self) - - -__all__ = ['oldstr'] diff --git a/contrib/python/future/past/utils/__init__.py b/contrib/python/future/past/utils/__init__.py deleted file mode 100644 index f6b2642df5d..00000000000 --- a/contrib/python/future/past/utils/__init__.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Various non-built-in utility functions and definitions for Py2 -compatibility in Py3. - -For example: - - >>> # The old_div() function behaves like Python 2's / operator - >>> # without "from __future__ import division" - >>> from past.utils import old_div - >>> old_div(3, 2) # like 3/2 in Py2 - 0 - >>> old_div(3, 2.0) # like 3/2.0 in Py2 - 1.5 -""" - -import sys -import numbers - -PY3 = sys.version_info[0] >= 3 -PY2 = sys.version_info[0] == 2 -PYPY = hasattr(sys, 'pypy_translation_info') - - -def with_metaclass(meta, *bases): - """ - Function from jinja2/_compat.py. License: BSD. - - Use it like this:: - - class BaseForm(object): - pass - - class FormType(type): - pass - - class Form(with_metaclass(FormType, BaseForm)): - pass - - This requires a bit of explanation: the basic idea is to make a - dummy metaclass for one level of class instantiation that replaces - itself with the actual metaclass. Because of internal type checks - we also need to make sure that we downgrade the custom metaclass - for one level to something closer to type (that's why __call__ and - __init__ comes back from type etc.). - - This has the advantage over six.with_metaclass of not introducing - dummy classes into the final MRO. - """ - class metaclass(meta): - __call__ = type.__call__ - __init__ = type.__init__ - def __new__(cls, name, this_bases, d): - if this_bases is None: - return type.__new__(cls, name, (), d) - return meta(name, bases, d) - return metaclass('temporary_class', None, {}) - - -def native(obj): - """ - On Py2, this is a no-op: native(obj) -> obj - - On Py3, returns the corresponding native Py3 types that are - superclasses for forward-ported objects from Py2: - - >>> from past.builtins import str, dict - - >>> native(str(b'ABC')) # Output on Py3 follows. On Py2, output is 'ABC' - b'ABC' - >>> type(native(str(b'ABC'))) - bytes - - Existing native types on Py3 will be returned unchanged: - - >>> type(native(b'ABC')) - bytes - """ - if hasattr(obj, '__native__'): - return obj.__native__() - else: - return obj - - -# An alias for future.utils.old_div(): -def old_div(a, b): - """ - Equivalent to ``a / b`` on Python 2 without ``from __future__ import - division``. - - TODO: generalize this to other objects (like arrays etc.) - """ - if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): - return a // b - else: - return a / b - -__all__ = ['PY3', 'PY2', 'PYPY', 'with_metaclass', 'native', 'old_div'] diff --git a/contrib/python/future/queue/__init__.py b/contrib/python/future/queue/__init__.py deleted file mode 100644 index 22bd296b63d..00000000000 --- a/contrib/python/future/queue/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from Queue import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/reprlib/__init__.py b/contrib/python/future/reprlib/__init__.py deleted file mode 100644 index 6ccf9c006f4..00000000000 --- a/contrib/python/future/reprlib/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - from repr import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/socketserver/__init__.py b/contrib/python/future/socketserver/__init__.py deleted file mode 100644 index c5b8c9c28be..00000000000 --- a/contrib/python/future/socketserver/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - from SocketServer import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/winreg/__init__.py b/contrib/python/future/winreg/__init__.py deleted file mode 100644 index 97243bbb8fc..00000000000 --- a/contrib/python/future/winreg/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import absolute_import -import sys -__future_module__ = True - -if sys.version_info[0] < 3: - from _winreg import * -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/xmlrpc/__init__.py b/contrib/python/future/xmlrpc/__init__.py deleted file mode 100644 index e4f853e53ce..00000000000 --- a/contrib/python/future/xmlrpc/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import absolute_import -import sys - -if sys.version_info[0] < 3: - pass -else: - raise ImportError('This package should not be accessible on Python 3. ' - 'Either you are trying to run from the python-future src folder ' - 'or your installation of python-future is corrupted.') diff --git a/contrib/python/future/xmlrpc/client.py b/contrib/python/future/xmlrpc/client.py deleted file mode 100644 index a8d0827e9b5..00000000000 --- a/contrib/python/future/xmlrpc/client.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 -from xmlrpclib import * diff --git a/contrib/python/future/xmlrpc/server.py b/contrib/python/future/xmlrpc/server.py deleted file mode 100644 index a8d0827e9b5..00000000000 --- a/contrib/python/future/xmlrpc/server.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import -import sys - -assert sys.version_info[0] < 3 -from xmlrpclib import * |