aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/future
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:45:01 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:01 +0300
commit2d37894b1b037cf24231090eda8589bbb44fb6fc (patch)
treebe835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/python/future
parent718c552901d703c502ccbefdfc3c9028d608b947 (diff)
downloadydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/future')
-rw-r--r--contrib/python/future/_dummy_thread/__init__.py20
-rw-r--r--contrib/python/future/_markupbase/__init__.py20
-rw-r--r--contrib/python/future/_thread/__init__.py20
-rw-r--r--contrib/python/future/builtins/__init__.py24
-rw-r--r--contrib/python/future/copyreg/__init__.py18
-rw-r--r--contrib/python/future/future/__init__.py176
-rw-r--r--contrib/python/future/future/backports/__init__.py50
-rw-r--r--contrib/python/future/future/backports/_markupbase.py844
-rw-r--r--contrib/python/future/future/backports/datetime.py4304
-rw-r--r--contrib/python/future/future/backports/email/__init__.py156
-rw-r--r--contrib/python/future/future/backports/email/_encoded_words.py464
-rw-r--r--contrib/python/future/future/backports/email/_header_value_parser.py5930
-rw-r--r--contrib/python/future/future/backports/email/_parseaddr.py1092
-rw-r--r--contrib/python/future/future/backports/email/_policybase.py730
-rw-r--r--contrib/python/future/future/backports/email/base64mime.py240
-rw-r--r--contrib/python/future/future/backports/email/charset.py818
-rw-r--r--contrib/python/future/future/backports/email/encoders.py180
-rw-r--r--contrib/python/future/future/backports/email/errors.py222
-rw-r--r--contrib/python/future/future/backports/email/feedparser.py1050
-rw-r--r--contrib/python/future/future/backports/email/generator.py996
-rw-r--r--contrib/python/future/future/backports/email/header.py1162
-rw-r--r--contrib/python/future/future/backports/email/headerregistry.py1184
-rw-r--r--contrib/python/future/future/backports/email/iterators.py148
-rw-r--r--contrib/python/future/future/backports/email/message.py1758
-rw-r--r--contrib/python/future/future/backports/email/mime/application.py78
-rw-r--r--contrib/python/future/future/backports/email/mime/audio.py148
-rw-r--r--contrib/python/future/future/backports/email/mime/base.py50
-rw-r--r--contrib/python/future/future/backports/email/mime/image.py96
-rw-r--r--contrib/python/future/future/backports/email/mime/message.py72
-rw-r--r--contrib/python/future/future/backports/email/mime/multipart.py98
-rw-r--r--contrib/python/future/future/backports/email/mime/nonmultipart.py48
-rw-r--r--contrib/python/future/future/backports/email/mime/text.py88
-rw-r--r--contrib/python/future/future/backports/email/parser.py270
-rw-r--r--contrib/python/future/future/backports/email/policy.py386
-rw-r--r--contrib/python/future/future/backports/email/quoprimime.py652
-rw-r--r--contrib/python/future/future/backports/email/utils.py800
-rw-r--r--contrib/python/future/future/backports/html/__init__.py54
-rw-r--r--contrib/python/future/future/backports/html/entities.py5028
-rw-r--r--contrib/python/future/future/backports/html/parser.py1072
-rw-r--r--contrib/python/future/future/backports/http/client.py2656
-rw-r--r--contrib/python/future/future/backports/http/cookiejar.py4214
-rw-r--r--contrib/python/future/future/backports/http/cookies.py1192
-rw-r--r--contrib/python/future/future/backports/http/server.py2452
-rw-r--r--contrib/python/future/future/backports/misc.py1874
-rw-r--r--contrib/python/future/future/backports/socket.py908
-rw-r--r--contrib/python/future/future/backports/socketserver.py1494
-rw-r--r--contrib/python/future/future/backports/total_ordering.py76
-rw-r--r--contrib/python/future/future/backports/urllib/error.py150
-rw-r--r--contrib/python/future/future/backports/urllib/parse.py1978
-rw-r--r--contrib/python/future/future/backports/urllib/request.py5278
-rw-r--r--contrib/python/future/future/backports/urllib/response.py206
-rw-r--r--contrib/python/future/future/backports/urllib/robotparser.py422
-rw-r--r--contrib/python/future/future/backports/xmlrpc/__init__.py2
-rw-r--r--contrib/python/future/future/backports/xmlrpc/client.py2984
-rw-r--r--contrib/python/future/future/backports/xmlrpc/server.py1998
-rw-r--r--contrib/python/future/future/builtins/__init__.py96
-rw-r--r--contrib/python/future/future/builtins/disabled.py132
-rw-r--r--contrib/python/future/future/builtins/iterators.py104
-rw-r--r--contrib/python/future/future/builtins/misc.py244
-rw-r--r--contrib/python/future/future/builtins/newnext.py138
-rw-r--r--contrib/python/future/future/builtins/newround.py190
-rw-r--r--contrib/python/future/future/builtins/newsuper.py222
-rw-r--r--contrib/python/future/future/moves/__init__.py14
-rw-r--r--contrib/python/future/future/moves/_dummy_thread.py14
-rw-r--r--contrib/python/future/future/moves/_markupbase.py16
-rw-r--r--contrib/python/future/future/moves/_thread.py16
-rw-r--r--contrib/python/future/future/moves/builtins.py20
-rw-r--r--contrib/python/future/future/moves/collections.py36
-rw-r--r--contrib/python/future/future/moves/configparser.py16
-rw-r--r--contrib/python/future/future/moves/copyreg.py14
-rw-r--r--contrib/python/future/future/moves/dbm/__init__.py40
-rw-r--r--contrib/python/future/future/moves/dbm/dumb.py18
-rw-r--r--contrib/python/future/future/moves/dbm/ndbm.py18
-rw-r--r--contrib/python/future/future/moves/html/__init__.py62
-rw-r--r--contrib/python/future/future/moves/html/entities.py16
-rw-r--r--contrib/python/future/future/moves/html/parser.py16
-rw-r--r--contrib/python/future/future/moves/http/__init__.py8
-rw-r--r--contrib/python/future/future/moves/http/client.py16
-rw-r--r--contrib/python/future/future/moves/http/cookiejar.py16
-rw-r--r--contrib/python/future/future/moves/http/cookies.py18
-rw-r--r--contrib/python/future/future/moves/http/server.py40
-rw-r--r--contrib/python/future/future/moves/itertools.py16
-rw-r--r--contrib/python/future/future/moves/pickle.py22
-rw-r--r--contrib/python/future/future/moves/queue.py16
-rw-r--r--contrib/python/future/future/moves/reprlib.py16
-rw-r--r--contrib/python/future/future/moves/socketserver.py16
-rw-r--r--contrib/python/future/future/moves/subprocess.py22
-rw-r--r--contrib/python/future/future/moves/sys.py16
-rw-r--r--contrib/python/future/future/moves/urllib/__init__.py10
-rw-r--r--contrib/python/future/future/moves/urllib/error.py28
-rw-r--r--contrib/python/future/future/moves/urllib/parse.py52
-rw-r--r--contrib/python/future/future/moves/urllib/request.py160
-rw-r--r--contrib/python/future/future/moves/urllib/response.py24
-rw-r--r--contrib/python/future/future/moves/urllib/robotparser.py16
-rw-r--r--contrib/python/future/future/moves/winreg.py16
-rw-r--r--contrib/python/future/future/moves/xmlrpc/client.py14
-rw-r--r--contrib/python/future/future/moves/xmlrpc/server.py14
-rw-r--r--contrib/python/future/future/standard_library/__init__.py1620
-rw-r--r--contrib/python/future/future/tests/base.py1060
-rw-r--r--contrib/python/future/future/types/__init__.py510
-rw-r--r--contrib/python/future/future/types/newbytes.py836
-rw-r--r--contrib/python/future/future/types/newdict.py222
-rw-r--r--contrib/python/future/future/types/newint.py754
-rw-r--r--contrib/python/future/future/types/newlist.py190
-rw-r--r--contrib/python/future/future/types/newmemoryview.py48
-rw-r--r--contrib/python/future/future/types/newobject.py230
-rw-r--r--contrib/python/future/future/types/newopen.py64
-rw-r--r--contrib/python/future/future/types/newrange.py314
-rw-r--r--contrib/python/future/future/types/newstr.py798
-rw-r--r--contrib/python/future/future/utils/__init__.py1428
-rw-r--r--contrib/python/future/future/utils/surrogateescape.py394
-rw-r--r--contrib/python/future/html/__init__.py18
-rw-r--r--contrib/python/future/html/entities.py14
-rw-r--r--contrib/python/future/html/parser.py14
-rw-r--r--contrib/python/future/http/__init__.py18
-rw-r--r--contrib/python/future/http/client.py174
-rw-r--r--contrib/python/future/http/cookiejar.py12
-rw-r--r--contrib/python/future/http/cookies.py14
-rw-r--r--contrib/python/future/http/server.py36
-rw-r--r--contrib/python/future/queue/__init__.py20
-rw-r--r--contrib/python/future/reprlib/__init__.py18
-rw-r--r--contrib/python/future/socketserver/__init__.py18
-rw-r--r--contrib/python/future/winreg/__init__.py20
-rw-r--r--contrib/python/future/xmlrpc/__init__.py18
-rw-r--r--contrib/python/future/xmlrpc/client.py10
-rw-r--r--contrib/python/future/xmlrpc/server.py10
-rw-r--r--contrib/python/future/ya.make266
127 files changed, 34788 insertions, 34788 deletions
diff --git a/contrib/python/future/_dummy_thread/__init__.py b/contrib/python/future/_dummy_thread/__init__.py
index a113b02bea..63dced6e5e 100644
--- a/contrib/python/future/_dummy_thread/__init__.py
+++ b/contrib/python/future/_dummy_thread/__init__.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from dummy_thread import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from dummy_thread import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/_markupbase/__init__.py b/contrib/python/future/_markupbase/__init__.py
index 9313c77af9..290906540c 100644
--- a/contrib/python/future/_markupbase/__init__.py
+++ b/contrib/python/future/_markupbase/__init__.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from markupbase import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from markupbase import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/_thread/__init__.py b/contrib/python/future/_thread/__init__.py
index 0e6898334c..9f2a51c75a 100644
--- a/contrib/python/future/_thread/__init__.py
+++ b/contrib/python/future/_thread/__init__.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from thread import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from thread import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/builtins/__init__.py b/contrib/python/future/builtins/__init__.py
index e01cc0bdb2..4f936f2844 100644
--- a/contrib/python/future/builtins/__init__.py
+++ b/contrib/python/future/builtins/__init__.py
@@ -1,12 +1,12 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from __builtin__ import *
- # Overwrite any old definitions with the equivalent future.builtins ones:
- from future.builtins import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from __builtin__ import *
+ # Overwrite any old definitions with the equivalent future.builtins ones:
+ from future.builtins import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/copyreg/__init__.py b/contrib/python/future/copyreg/__init__.py
index 34763a6a1c..51bd4b9a74 100644
--- a/contrib/python/future/copyreg/__init__.py
+++ b/contrib/python/future/copyreg/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- from copy_reg import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ from copy_reg import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/future/__init__.py b/contrib/python/future/future/__init__.py
index 18fea3d01f..ad419d67e2 100644
--- a/contrib/python/future/future/__init__.py
+++ b/contrib/python/future/future/__init__.py
@@ -1,93 +1,93 @@
-"""
-future: Easy, safe support for Python 2/3 compatibility
-=======================================================
-
-``future`` is the missing compatibility layer between Python 2 and Python
-3. It allows you to use a single, clean Python 3.x-compatible codebase to
-support both Python 2 and Python 3 with minimal overhead.
-
-It is designed to be used as follows::
-
- from __future__ import (absolute_import, division,
- print_function, unicode_literals)
- from builtins import (
- bytes, dict, int, list, object, range, str,
- ascii, chr, hex, input, next, oct, open,
- pow, round, super,
- filter, map, zip)
-
-followed by predominantly standard, idiomatic Python 3 code that then runs
-similarly on Python 2.6/2.7 and Python 3.3+.
-
-The imports have no effect on Python 3. On Python 2, they shadow the
-corresponding builtins, which normally have different semantics on Python 3
-versus 2, to provide their Python 3 semantics.
-
-
-Standard library reorganization
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``future`` supports the standard library reorganization (PEP 3108) through the
-following Py3 interfaces:
-
- >>> # Top-level packages with Py3 names provided on Py2:
- >>> import html.parser
- >>> import queue
- >>> import tkinter.dialog
- >>> import xmlrpc.client
- >>> # etc.
-
- >>> # Aliases provided for extensions to existing Py2 module names:
- >>> from future.standard_library import install_aliases
- >>> install_aliases()
-
- >>> from collections import Counter, OrderedDict # backported to Py2.6
- >>> from collections import UserDict, UserList, UserString
- >>> import urllib.request
- >>> from itertools import filterfalse, zip_longest
- >>> from subprocess import getoutput, getstatusoutput
-
-
-Automatic conversion
---------------------
-
-An included script called `futurize
-<http://python-future.org/automatic_conversion.html>`_ aids in converting
-code (from either Python 2 or Python 3) to code compatible with both
-platforms. It is similar to ``python-modernize`` but goes further in
-providing Python 3 compatibility through the use of the backported types
-and builtin functions in ``future``.
-
-
-Documentation
--------------
-
-See: http://python-future.org
-
-
-Credits
--------
-
+"""
+future: Easy, safe support for Python 2/3 compatibility
+=======================================================
+
+``future`` is the missing compatibility layer between Python 2 and Python
+3. It allows you to use a single, clean Python 3.x-compatible codebase to
+support both Python 2 and Python 3 with minimal overhead.
+
+It is designed to be used as follows::
+
+ from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+ from builtins import (
+ bytes, dict, int, list, object, range, str,
+ ascii, chr, hex, input, next, oct, open,
+ pow, round, super,
+ filter, map, zip)
+
+followed by predominantly standard, idiomatic Python 3 code that then runs
+similarly on Python 2.6/2.7 and Python 3.3+.
+
+The imports have no effect on Python 3. On Python 2, they shadow the
+corresponding builtins, which normally have different semantics on Python 3
+versus 2, to provide their Python 3 semantics.
+
+
+Standard library reorganization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``future`` supports the standard library reorganization (PEP 3108) through the
+following Py3 interfaces:
+
+ >>> # Top-level packages with Py3 names provided on Py2:
+ >>> import html.parser
+ >>> import queue
+ >>> import tkinter.dialog
+ >>> import xmlrpc.client
+ >>> # etc.
+
+ >>> # Aliases provided for extensions to existing Py2 module names:
+ >>> from future.standard_library import install_aliases
+ >>> install_aliases()
+
+ >>> from collections import Counter, OrderedDict # backported to Py2.6
+ >>> from collections import UserDict, UserList, UserString
+ >>> import urllib.request
+ >>> from itertools import filterfalse, zip_longest
+ >>> from subprocess import getoutput, getstatusoutput
+
+
+Automatic conversion
+--------------------
+
+An included script called `futurize
+<http://python-future.org/automatic_conversion.html>`_ aids in converting
+code (from either Python 2 or Python 3) to code compatible with both
+platforms. It is similar to ``python-modernize`` but goes further in
+providing Python 3 compatibility through the use of the backported types
+and builtin functions in ``future``.
+
+
+Documentation
+-------------
+
+See: http://python-future.org
+
+
+Credits
+-------
+
:Author: Ed Schofield, Jordan M. Adler, et al
-:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
- Ltd, Singapore. http://pythoncharmers.com
-:Others: See docs/credits.rst or http://python-future.org/credits.html
-
-
-Licensing
----------
+:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte
+ Ltd, Singapore. http://pythoncharmers.com
+:Others: See docs/credits.rst or http://python-future.org/credits.html
+
+
+Licensing
+---------
Copyright 2013-2019 Python Charmers Pty Ltd, Australia.
-The software is distributed under an MIT licence. See LICENSE.txt.
-
-"""
-
-__title__ = 'future'
-__author__ = 'Ed Schofield'
-__license__ = 'MIT'
+The software is distributed under an MIT licence. See LICENSE.txt.
+
+"""
+
+__title__ = 'future'
+__author__ = 'Ed Schofield'
+__license__ = 'MIT'
__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd'
-__ver_major__ = 0
+__ver_major__ = 0
__ver_minor__ = 18
__ver_patch__ = 2
-__ver_sub__ = ''
-__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
- __ver_patch__, __ver_sub__)
+__ver_sub__ = ''
+__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__,
+ __ver_patch__, __ver_sub__)
diff --git a/contrib/python/future/future/backports/__init__.py b/contrib/python/future/future/backports/__init__.py
index 11e71023e1..c71e065354 100644
--- a/contrib/python/future/future/backports/__init__.py
+++ b/contrib/python/future/future/backports/__init__.py
@@ -1,26 +1,26 @@
-"""
-future.backports package
-"""
-
-from __future__ import absolute_import
-
-import sys
-
-__future_module__ = True
-from future.standard_library import import_top_level_modules
-
-
+"""
+future.backports package
+"""
+
+from __future__ import absolute_import
+
+import sys
+
+__future_module__ = True
+from future.standard_library import import_top_level_modules
+
+
if sys.version_info[0] >= 3:
- import_top_level_modules()
-
-
-from .misc import (ceil,
- OrderedDict,
- Counter,
- ChainMap,
- check_output,
- count,
- recursive_repr,
- _count_elements,
- cmp_to_key
- )
+ import_top_level_modules()
+
+
+from .misc import (ceil,
+ OrderedDict,
+ Counter,
+ ChainMap,
+ check_output,
+ count,
+ recursive_repr,
+ _count_elements,
+ cmp_to_key
+ )
diff --git a/contrib/python/future/future/backports/_markupbase.py b/contrib/python/future/future/backports/_markupbase.py
index c3d205c155..d51bfc7ef1 100644
--- a/contrib/python/future/future/backports/_markupbase.py
+++ b/contrib/python/future/future/backports/_markupbase.py
@@ -1,422 +1,422 @@
-"""Shared support for scanning document type declarations in HTML and XHTML.
-
-Backported for python-future from Python 3.3. Reason: ParserBase is an
-old-style class in the Python 2.7 source of markupbase.py, which I suspect
-might be the cause of sporadic unit-test failures on travis-ci.org with
-test_htmlparser.py. The test failures look like this:
-
- ======================================================================
-
-ERROR: test_attr_entity_replacement (future.tests.test_htmlparser.AttributesStrictTestCase)
-
-----------------------------------------------------------------------
-
-Traceback (most recent call last):
- File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 661, in test_attr_entity_replacement
- [("starttag", "a", [("b", "&><\"'")])])
- File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 93, in _run_check
- collector = self.get_collector()
- File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 617, in get_collector
- return EventCollector(strict=True)
- File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 27, in __init__
- html.parser.HTMLParser.__init__(self, *args, **kw)
- File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 135, in __init__
- self.reset()
- File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 143, in reset
- _markupbase.ParserBase.reset(self)
-
-TypeError: unbound method reset() must be called with ParserBase instance as first argument (got EventCollector instance instead)
-
-This module is used as a foundation for the html.parser module. It has no
-documented public API and should not be used directly.
-
-"""
-
-import re
-
-_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
-_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
-_commentclose = re.compile(r'--\s*>')
-_markedsectionclose = re.compile(r']\s*]\s*>')
-
-# An analysis of the MS-Word extensions is available at
-# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
-
-_msmarkedsectionclose = re.compile(r']\s*>')
-
-del re
-
-
-class ParserBase(object):
- """Parser base class which provides some common support methods used
- by the SGML/HTML and XHTML parsers."""
-
- def __init__(self):
- if self.__class__ is ParserBase:
- raise RuntimeError(
- "_markupbase.ParserBase must be subclassed")
-
- def error(self, message):
- raise NotImplementedError(
- "subclasses of ParserBase must override error()")
-
- def reset(self):
- self.lineno = 1
- self.offset = 0
-
- def getpos(self):
- """Return current line number and offset."""
- return self.lineno, self.offset
-
- # Internal -- update line number and offset. This should be
- # called for each piece of data exactly once, in order -- in other
- # words the concatenation of all the input strings to this
- # function should be exactly the entire input.
- def updatepos(self, i, j):
- if i >= j:
- return j
- rawdata = self.rawdata
- nlines = rawdata.count("\n", i, j)
- if nlines:
- self.lineno = self.lineno + nlines
- pos = rawdata.rindex("\n", i, j) # Should not fail
- self.offset = j-(pos+1)
- else:
- self.offset = self.offset + j-i
- return j
-
- _decl_otherchars = ''
-
- # Internal -- parse declaration (for use by subclasses).
- def parse_declaration(self, i):
- # This is some sort of declaration; in "HTML as
- # deployed," this should only be the document type
- # declaration ("<!DOCTYPE html...>").
- # ISO 8879:1986, however, has more complex
- # declaration syntax for elements in <!...>, including:
- # --comment--
- # [marked section]
- # name in the following list: ENTITY, DOCTYPE, ELEMENT,
- # ATTLIST, NOTATION, SHORTREF, USEMAP,
- # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
- rawdata = self.rawdata
- j = i + 2
- assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
- if rawdata[j:j+1] == ">":
- # the empty comment <!>
- return j + 1
- if rawdata[j:j+1] in ("-", ""):
- # Start of comment followed by buffer boundary,
- # or just a buffer boundary.
- return -1
- # A simple, practical version could look like: ((name|stringlit) S*) + '>'
- n = len(rawdata)
- if rawdata[j:j+2] == '--': #comment
- # Locate --.*-- as the body of the comment
- return self.parse_comment(i)
- elif rawdata[j] == '[': #marked section
- # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
- # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
- # Note that this is extended by Microsoft Office "Save as Web" function
- # to include [if...] and [endif].
- return self.parse_marked_section(i)
- else: #all other declaration elements
- decltype, j = self._scan_name(j, i)
- if j < 0:
- return j
- if decltype == "doctype":
- self._decl_otherchars = ''
- while j < n:
- c = rawdata[j]
- if c == ">":
- # end of declaration syntax
- data = rawdata[i+2:j]
- if decltype == "doctype":
- self.handle_decl(data)
- else:
- # According to the HTML5 specs sections "8.2.4.44 Bogus
- # comment state" and "8.2.4.45 Markup declaration open
- # state", a comment token should be emitted.
- # Calling unknown_decl provides more flexibility though.
- self.unknown_decl(data)
- return j + 1
- if c in "\"'":
- m = _declstringlit_match(rawdata, j)
- if not m:
- return -1 # incomplete
- j = m.end()
- elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
- name, j = self._scan_name(j, i)
- elif c in self._decl_otherchars:
- j = j + 1
- elif c == "[":
- # this could be handled in a separate doctype parser
- if decltype == "doctype":
- j = self._parse_doctype_subset(j + 1, i)
- elif decltype in set(["attlist", "linktype", "link", "element"]):
- # must tolerate []'d groups in a content model in an element declaration
- # also in data attribute specifications of attlist declaration
- # also link type declaration subsets in linktype declarations
- # also link attribute specification lists in link declarations
- self.error("unsupported '[' char in %s declaration" % decltype)
- else:
- self.error("unexpected '[' char in declaration")
- else:
- self.error(
- "unexpected %r char in declaration" % rawdata[j])
- if j < 0:
- return j
- return -1 # incomplete
-
- # Internal -- parse a marked section
- # Override this to handle MS-word extension syntax <![if word]>content<![endif]>
- def parse_marked_section(self, i, report=1):
- rawdata= self.rawdata
- assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
- sectName, j = self._scan_name( i+3, i )
- if j < 0:
- return j
- if sectName in set(["temp", "cdata", "ignore", "include", "rcdata"]):
- # look for standard ]]> ending
- match= _markedsectionclose.search(rawdata, i+3)
- elif sectName in set(["if", "else", "endif"]):
- # look for MS Office ]> ending
- match= _msmarkedsectionclose.search(rawdata, i+3)
- else:
- self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
- if not match:
- return -1
- if report:
- j = match.start(0)
- self.unknown_decl(rawdata[i+3: j])
- return match.end(0)
-
- # Internal -- parse comment, return length or -1 if not terminated
- def parse_comment(self, i, report=1):
- rawdata = self.rawdata
- if rawdata[i:i+4] != '<!--':
- self.error('unexpected call to parse_comment()')
- match = _commentclose.search(rawdata, i+4)
- if not match:
- return -1
- if report:
- j = match.start(0)
- self.handle_comment(rawdata[i+4: j])
- return match.end(0)
-
- # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
- # returning the index just past any whitespace following the trailing ']'.
- def _parse_doctype_subset(self, i, declstartpos):
- rawdata = self.rawdata
- n = len(rawdata)
- j = i
- while j < n:
- c = rawdata[j]
- if c == "<":
- s = rawdata[j:j+2]
- if s == "<":
- # end of buffer; incomplete
- return -1
- if s != "<!":
- self.updatepos(declstartpos, j + 1)
- self.error("unexpected char in internal subset (in %r)" % s)
- if (j + 2) == n:
- # end of buffer; incomplete
- return -1
- if (j + 4) > n:
- # end of buffer; incomplete
- return -1
- if rawdata[j:j+4] == "<!--":
- j = self.parse_comment(j, report=0)
- if j < 0:
- return j
- continue
- name, j = self._scan_name(j + 2, declstartpos)
- if j == -1:
- return -1
- if name not in set(["attlist", "element", "entity", "notation"]):
- self.updatepos(declstartpos, j + 2)
- self.error(
- "unknown declaration %r in internal subset" % name)
- # handle the individual names
- meth = getattr(self, "_parse_doctype_" + name)
- j = meth(j, declstartpos)
- if j < 0:
- return j
- elif c == "%":
- # parameter entity reference
- if (j + 1) == n:
- # end of buffer; incomplete
- return -1
- s, j = self._scan_name(j + 1, declstartpos)
- if j < 0:
- return j
- if rawdata[j] == ";":
- j = j + 1
- elif c == "]":
- j = j + 1
- while j < n and rawdata[j].isspace():
- j = j + 1
- if j < n:
- if rawdata[j] == ">":
- return j
- self.updatepos(declstartpos, j)
- self.error("unexpected char after internal subset")
- else:
- return -1
- elif c.isspace():
- j = j + 1
- else:
- self.updatepos(declstartpos, j)
- self.error("unexpected char %r in internal subset" % c)
- # end of buffer reached
- return -1
-
- # Internal -- scan past <!ELEMENT declarations
- def _parse_doctype_element(self, i, declstartpos):
- name, j = self._scan_name(i, declstartpos)
- if j == -1:
- return -1
- # style content model; just skip until '>'
- rawdata = self.rawdata
- if '>' in rawdata[j:]:
- return rawdata.find(">", j) + 1
- return -1
-
- # Internal -- scan past <!ATTLIST declarations
- def _parse_doctype_attlist(self, i, declstartpos):
- rawdata = self.rawdata
- name, j = self._scan_name(i, declstartpos)
- c = rawdata[j:j+1]
- if c == "":
- return -1
- if c == ">":
- return j + 1
- while 1:
- # scan a series of attribute descriptions; simplified:
- # name type [value] [#constraint]
- name, j = self._scan_name(j, declstartpos)
- if j < 0:
- return j
- c = rawdata[j:j+1]
- if c == "":
- return -1
- if c == "(":
- # an enumerated type; look for ')'
- if ")" in rawdata[j:]:
- j = rawdata.find(")", j) + 1
- else:
- return -1
- while rawdata[j:j+1].isspace():
- j = j + 1
- if not rawdata[j:]:
- # end of buffer, incomplete
- return -1
- else:
- name, j = self._scan_name(j, declstartpos)
- c = rawdata[j:j+1]
- if not c:
- return -1
- if c in "'\"":
- m = _declstringlit_match(rawdata, j)
- if m:
- j = m.end()
- else:
- return -1
- c = rawdata[j:j+1]
- if not c:
- return -1
- if c == "#":
- if rawdata[j:] == "#":
- # end of buffer
- return -1
- name, j = self._scan_name(j + 1, declstartpos)
- if j < 0:
- return j
- c = rawdata[j:j+1]
- if not c:
- return -1
- if c == '>':
- # all done
- return j + 1
-
- # Internal -- scan past <!NOTATION declarations
- def _parse_doctype_notation(self, i, declstartpos):
- name, j = self._scan_name(i, declstartpos)
- if j < 0:
- return j
- rawdata = self.rawdata
- while 1:
- c = rawdata[j:j+1]
- if not c:
- # end of buffer; incomplete
- return -1
- if c == '>':
- return j + 1
- if c in "'\"":
- m = _declstringlit_match(rawdata, j)
- if not m:
- return -1
- j = m.end()
- else:
- name, j = self._scan_name(j, declstartpos)
- if j < 0:
- return j
-
- # Internal -- scan past <!ENTITY declarations
- def _parse_doctype_entity(self, i, declstartpos):
- rawdata = self.rawdata
- if rawdata[i:i+1] == "%":
- j = i + 1
- while 1:
- c = rawdata[j:j+1]
- if not c:
- return -1
- if c.isspace():
- j = j + 1
- else:
- break
- else:
- j = i
- name, j = self._scan_name(j, declstartpos)
- if j < 0:
- return j
- while 1:
- c = self.rawdata[j:j+1]
- if not c:
- return -1
- if c in "'\"":
- m = _declstringlit_match(rawdata, j)
- if m:
- j = m.end()
- else:
- return -1 # incomplete
- elif c == ">":
- return j + 1
- else:
- name, j = self._scan_name(j, declstartpos)
- if j < 0:
- return j
-
- # Internal -- scan a name token and the new position and the token, or
- # return -1 if we've reached the end of the buffer.
- def _scan_name(self, i, declstartpos):
- rawdata = self.rawdata
- n = len(rawdata)
- if i == n:
- return None, -1
- m = _declname_match(rawdata, i)
- if m:
- s = m.group()
- name = s.strip()
- if (i + len(s)) == n:
- return None, -1 # end of buffer
- return name.lower(), m.end()
- else:
- self.updatepos(declstartpos, i)
- self.error("expected name token at %r"
- % rawdata[declstartpos:declstartpos+20])
-
- # To be overridden -- handlers for unknown objects
- def unknown_decl(self, data):
- pass
+"""Shared support for scanning document type declarations in HTML and XHTML.
+
+Backported for python-future from Python 3.3. Reason: ParserBase is an
+old-style class in the Python 2.7 source of markupbase.py, which I suspect
+might be the cause of sporadic unit-test failures on travis-ci.org with
+test_htmlparser.py. The test failures look like this:
+
+ ======================================================================
+
+ERROR: test_attr_entity_replacement (future.tests.test_htmlparser.AttributesStrictTestCase)
+
+----------------------------------------------------------------------
+
+Traceback (most recent call last):
+ File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 661, in test_attr_entity_replacement
+ [("starttag", "a", [("b", "&><\"'")])])
+ File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 93, in _run_check
+ collector = self.get_collector()
+ File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 617, in get_collector
+ return EventCollector(strict=True)
+ File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 27, in __init__
+ html.parser.HTMLParser.__init__(self, *args, **kw)
+ File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 135, in __init__
+ self.reset()
+ File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 143, in reset
+ _markupbase.ParserBase.reset(self)
+
+TypeError: unbound method reset() must be called with ParserBase instance as first argument (got EventCollector instance instead)
+
+This module is used as a foundation for the html.parser module. It has no
+documented public API and should not be used directly.
+
+"""
+
+import re
+
+_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
+_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
+_commentclose = re.compile(r'--\s*>')
+_markedsectionclose = re.compile(r']\s*]\s*>')
+
+# An analysis of the MS-Word extensions is available at
+# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
+
+_msmarkedsectionclose = re.compile(r']\s*>')
+
+del re
+
+
+class ParserBase(object):
+ """Parser base class which provides some common support methods used
+ by the SGML/HTML and XHTML parsers."""
+
+ def __init__(self):
+ if self.__class__ is ParserBase:
+ raise RuntimeError(
+ "_markupbase.ParserBase must be subclassed")
+
+ def error(self, message):
+ raise NotImplementedError(
+ "subclasses of ParserBase must override error()")
+
+ def reset(self):
+ self.lineno = 1
+ self.offset = 0
+
+ def getpos(self):
+ """Return current line number and offset."""
+ return self.lineno, self.offset
+
+ # Internal -- update line number and offset. This should be
+ # called for each piece of data exactly once, in order -- in other
+ # words the concatenation of all the input strings to this
+ # function should be exactly the entire input.
+ def updatepos(self, i, j):
+ if i >= j:
+ return j
+ rawdata = self.rawdata
+ nlines = rawdata.count("\n", i, j)
+ if nlines:
+ self.lineno = self.lineno + nlines
+ pos = rawdata.rindex("\n", i, j) # Should not fail
+ self.offset = j-(pos+1)
+ else:
+ self.offset = self.offset + j-i
+ return j
+
+ _decl_otherchars = ''
+
+ # Internal -- parse declaration (for use by subclasses).
+ def parse_declaration(self, i):
+ # This is some sort of declaration; in "HTML as
+ # deployed," this should only be the document type
+ # declaration ("<!DOCTYPE html...>").
+ # ISO 8879:1986, however, has more complex
+ # declaration syntax for elements in <!...>, including:
+ # --comment--
+ # [marked section]
+ # name in the following list: ENTITY, DOCTYPE, ELEMENT,
+ # ATTLIST, NOTATION, SHORTREF, USEMAP,
+ # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
+ rawdata = self.rawdata
+ j = i + 2
+ assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
+ if rawdata[j:j+1] == ">":
+ # the empty comment <!>
+ return j + 1
+ if rawdata[j:j+1] in ("-", ""):
+ # Start of comment followed by buffer boundary,
+ # or just a buffer boundary.
+ return -1
+ # A simple, practical version could look like: ((name|stringlit) S*) + '>'
+ n = len(rawdata)
+ if rawdata[j:j+2] == '--': #comment
+ # Locate --.*-- as the body of the comment
+ return self.parse_comment(i)
+ elif rawdata[j] == '[': #marked section
+ # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
+ # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
+ # Note that this is extended by Microsoft Office "Save as Web" function
+ # to include [if...] and [endif].
+ return self.parse_marked_section(i)
+ else: #all other declaration elements
+ decltype, j = self._scan_name(j, i)
+ if j < 0:
+ return j
+ if decltype == "doctype":
+ self._decl_otherchars = ''
+ while j < n:
+ c = rawdata[j]
+ if c == ">":
+ # end of declaration syntax
+ data = rawdata[i+2:j]
+ if decltype == "doctype":
+ self.handle_decl(data)
+ else:
+ # According to the HTML5 specs sections "8.2.4.44 Bogus
+ # comment state" and "8.2.4.45 Markup declaration open
+ # state", a comment token should be emitted.
+ # Calling unknown_decl provides more flexibility though.
+ self.unknown_decl(data)
+ return j + 1
+ if c in "\"'":
+ m = _declstringlit_match(rawdata, j)
+ if not m:
+ return -1 # incomplete
+ j = m.end()
+ elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+ name, j = self._scan_name(j, i)
+ elif c in self._decl_otherchars:
+ j = j + 1
+ elif c == "[":
+ # this could be handled in a separate doctype parser
+ if decltype == "doctype":
+ j = self._parse_doctype_subset(j + 1, i)
+ elif decltype in set(["attlist", "linktype", "link", "element"]):
+ # must tolerate []'d groups in a content model in an element declaration
+ # also in data attribute specifications of attlist declaration
+ # also link type declaration subsets in linktype declarations
+ # also link attribute specification lists in link declarations
+ self.error("unsupported '[' char in %s declaration" % decltype)
+ else:
+ self.error("unexpected '[' char in declaration")
+ else:
+ self.error(
+ "unexpected %r char in declaration" % rawdata[j])
+ if j < 0:
+ return j
+ return -1 # incomplete
+
+ # Internal -- parse a marked section
+ # Override this to handle MS-word extension syntax <![if word]>content<![endif]>
+ def parse_marked_section(self, i, report=1):
+ rawdata= self.rawdata
+ assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
+ sectName, j = self._scan_name( i+3, i )
+ if j < 0:
+ return j
+ if sectName in set(["temp", "cdata", "ignore", "include", "rcdata"]):
+ # look for standard ]]> ending
+ match= _markedsectionclose.search(rawdata, i+3)
+ elif sectName in set(["if", "else", "endif"]):
+ # look for MS Office ]> ending
+ match= _msmarkedsectionclose.search(rawdata, i+3)
+ else:
+ self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
+ if not match:
+ return -1
+ if report:
+ j = match.start(0)
+ self.unknown_decl(rawdata[i+3: j])
+ return match.end(0)
+
+ # Internal -- parse comment, return length or -1 if not terminated
+ def parse_comment(self, i, report=1):
+ rawdata = self.rawdata
+ if rawdata[i:i+4] != '<!--':
+ self.error('unexpected call to parse_comment()')
+ match = _commentclose.search(rawdata, i+4)
+ if not match:
+ return -1
+ if report:
+ j = match.start(0)
+ self.handle_comment(rawdata[i+4: j])
+ return match.end(0)
+
+ # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
+ # returning the index just past any whitespace following the trailing ']'.
+ def _parse_doctype_subset(self, i, declstartpos):
+ rawdata = self.rawdata
+ n = len(rawdata)
+ j = i
+ while j < n:
+ c = rawdata[j]
+ if c == "<":
+ s = rawdata[j:j+2]
+ if s == "<":
+ # end of buffer; incomplete
+ return -1
+ if s != "<!":
+ self.updatepos(declstartpos, j + 1)
+ self.error("unexpected char in internal subset (in %r)" % s)
+ if (j + 2) == n:
+ # end of buffer; incomplete
+ return -1
+ if (j + 4) > n:
+ # end of buffer; incomplete
+ return -1
+ if rawdata[j:j+4] == "<!--":
+ j = self.parse_comment(j, report=0)
+ if j < 0:
+ return j
+ continue
+ name, j = self._scan_name(j + 2, declstartpos)
+ if j == -1:
+ return -1
+ if name not in set(["attlist", "element", "entity", "notation"]):
+ self.updatepos(declstartpos, j + 2)
+ self.error(
+ "unknown declaration %r in internal subset" % name)
+ # handle the individual names
+ meth = getattr(self, "_parse_doctype_" + name)
+ j = meth(j, declstartpos)
+ if j < 0:
+ return j
+ elif c == "%":
+ # parameter entity reference
+ if (j + 1) == n:
+ # end of buffer; incomplete
+ return -1
+ s, j = self._scan_name(j + 1, declstartpos)
+ if j < 0:
+ return j
+ if rawdata[j] == ";":
+ j = j + 1
+ elif c == "]":
+ j = j + 1
+ while j < n and rawdata[j].isspace():
+ j = j + 1
+ if j < n:
+ if rawdata[j] == ">":
+ return j
+ self.updatepos(declstartpos, j)
+ self.error("unexpected char after internal subset")
+ else:
+ return -1
+ elif c.isspace():
+ j = j + 1
+ else:
+ self.updatepos(declstartpos, j)
+ self.error("unexpected char %r in internal subset" % c)
+ # end of buffer reached
+ return -1
+
+ # Internal -- scan past <!ELEMENT declarations
+ def _parse_doctype_element(self, i, declstartpos):
+ name, j = self._scan_name(i, declstartpos)
+ if j == -1:
+ return -1
+ # style content model; just skip until '>'
+ rawdata = self.rawdata
+ if '>' in rawdata[j:]:
+ return rawdata.find(">", j) + 1
+ return -1
+
+ # Internal -- scan past <!ATTLIST declarations
+ def _parse_doctype_attlist(self, i, declstartpos):
+ rawdata = self.rawdata
+ name, j = self._scan_name(i, declstartpos)
+ c = rawdata[j:j+1]
+ if c == "":
+ return -1
+ if c == ">":
+ return j + 1
+ while 1:
+ # scan a series of attribute descriptions; simplified:
+ # name type [value] [#constraint]
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+ c = rawdata[j:j+1]
+ if c == "":
+ return -1
+ if c == "(":
+ # an enumerated type; look for ')'
+ if ")" in rawdata[j:]:
+ j = rawdata.find(")", j) + 1
+ else:
+ return -1
+ while rawdata[j:j+1].isspace():
+ j = j + 1
+ if not rawdata[j:]:
+ # end of buffer, incomplete
+ return -1
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if m:
+ j = m.end()
+ else:
+ return -1
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c == "#":
+ if rawdata[j:] == "#":
+ # end of buffer
+ return -1
+ name, j = self._scan_name(j + 1, declstartpos)
+ if j < 0:
+ return j
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c == '>':
+ # all done
+ return j + 1
+
+ # Internal -- scan past <!NOTATION declarations
+ def _parse_doctype_notation(self, i, declstartpos):
+ name, j = self._scan_name(i, declstartpos)
+ if j < 0:
+ return j
+ rawdata = self.rawdata
+ while 1:
+ c = rawdata[j:j+1]
+ if not c:
+ # end of buffer; incomplete
+ return -1
+ if c == '>':
+ return j + 1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if not m:
+ return -1
+ j = m.end()
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+
+ # Internal -- scan past <!ENTITY declarations
+ def _parse_doctype_entity(self, i, declstartpos):
+ rawdata = self.rawdata
+ if rawdata[i:i+1] == "%":
+ j = i + 1
+ while 1:
+ c = rawdata[j:j+1]
+ if not c:
+ return -1
+ if c.isspace():
+ j = j + 1
+ else:
+ break
+ else:
+ j = i
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+ while 1:
+ c = self.rawdata[j:j+1]
+ if not c:
+ return -1
+ if c in "'\"":
+ m = _declstringlit_match(rawdata, j)
+ if m:
+ j = m.end()
+ else:
+ return -1 # incomplete
+ elif c == ">":
+ return j + 1
+ else:
+ name, j = self._scan_name(j, declstartpos)
+ if j < 0:
+ return j
+
+ # Internal -- scan a name token and the new position and the token, or
+ # return -1 if we've reached the end of the buffer.
+ def _scan_name(self, i, declstartpos):
+ rawdata = self.rawdata
+ n = len(rawdata)
+ if i == n:
+ return None, -1
+ m = _declname_match(rawdata, i)
+ if m:
+ s = m.group()
+ name = s.strip()
+ if (i + len(s)) == n:
+ return None, -1 # end of buffer
+ return name.lower(), m.end()
+ else:
+ self.updatepos(declstartpos, i)
+ self.error("expected name token at %r"
+ % rawdata[declstartpos:declstartpos+20])
+
+ # To be overridden -- handlers for unknown objects
+ def unknown_decl(self, data):
+ pass
diff --git a/contrib/python/future/future/backports/datetime.py b/contrib/python/future/future/backports/datetime.py
index 5b7065c467..3261014e05 100644
--- a/contrib/python/future/future/backports/datetime.py
+++ b/contrib/python/future/future/backports/datetime.py
@@ -1,2152 +1,2152 @@
-"""Concrete date/time and related types.
-
-See http://www.iana.org/time-zones/repository/tz-link.html for
-time zone and DST data sources.
-"""
-from __future__ import division
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import absolute_import
-from future.builtins import str
-from future.builtins import bytes
-from future.builtins import map
-from future.builtins import round
-from future.builtins import int
-from future.builtins import object
-from future.utils import native_str, PY2
-
-import time as _time
-import math as _math
-
-def _cmp(x, y):
- return 0 if x == y else 1 if x > y else -1
-
-MINYEAR = 1
-MAXYEAR = 9999
-_MAXORDINAL = 3652059 # date.max.toordinal()
-
-# Utility functions, adapted from Python's Demo/classes/Dates.py, which
-# also assumes the current Gregorian calendar indefinitely extended in
-# both directions. Difference: Dates.py calls January 1 of year 0 day
-# number 1. The code here calls January 1 of year 1 day number 1. This is
-# to match the definition of the "proleptic Gregorian" calendar in Dershowitz
-# and Reingold's "Calendrical Calculations", where it's the base calendar
-# for all computations. See the book for algorithms for converting between
-# proleptic Gregorian ordinals and many other calendar systems.
-
-_DAYS_IN_MONTH = [None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
-
-_DAYS_BEFORE_MONTH = [None]
-dbm = 0
-for dim in _DAYS_IN_MONTH[1:]:
- _DAYS_BEFORE_MONTH.append(dbm)
- dbm += dim
-del dbm, dim
-
-def _is_leap(year):
- "year -> 1 if leap year, else 0."
- return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
-
-def _days_before_year(year):
- "year -> number of days before January 1st of year."
- y = year - 1
- return y*365 + y//4 - y//100 + y//400
-
-def _days_in_month(year, month):
- "year, month -> number of days in that month in that year."
- assert 1 <= month <= 12, month
- if month == 2 and _is_leap(year):
- return 29
- return _DAYS_IN_MONTH[month]
-
-def _days_before_month(year, month):
- "year, month -> number of days in year preceding first day of month."
- assert 1 <= month <= 12, 'month must be in 1..12'
- return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year))
-
-def _ymd2ord(year, month, day):
- "year, month, day -> ordinal, considering 01-Jan-0001 as day 1."
- assert 1 <= month <= 12, 'month must be in 1..12'
- dim = _days_in_month(year, month)
- assert 1 <= day <= dim, ('day must be in 1..%d' % dim)
- return (_days_before_year(year) +
- _days_before_month(year, month) +
- day)
-
-_DI400Y = _days_before_year(401) # number of days in 400 years
-_DI100Y = _days_before_year(101) # " " " " 100 "
-_DI4Y = _days_before_year(5) # " " " " 4 "
-
-# A 4-year cycle has an extra leap day over what we'd get from pasting
-# together 4 single years.
-assert _DI4Y == 4 * 365 + 1
-
-# Similarly, a 400-year cycle has an extra leap day over what we'd get from
-# pasting together 4 100-year cycles.
-assert _DI400Y == 4 * _DI100Y + 1
-
-# OTOH, a 100-year cycle has one fewer leap day than we'd get from
-# pasting together 25 4-year cycles.
-assert _DI100Y == 25 * _DI4Y - 1
-
-def _ord2ymd(n):
- "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1."
-
- # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years
- # repeats exactly every 400 years. The basic strategy is to find the
- # closest 400-year boundary at or before n, then work with the offset
- # from that boundary to n. Life is much clearer if we subtract 1 from
- # n first -- then the values of n at 400-year boundaries are exactly
- # those divisible by _DI400Y:
- #
- # D M Y n n-1
- # -- --- ---- ---------- ----------------
- # 31 Dec -400 -_DI400Y -_DI400Y -1
- # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary
- # ...
- # 30 Dec 000 -1 -2
- # 31 Dec 000 0 -1
- # 1 Jan 001 1 0 400-year boundary
- # 2 Jan 001 2 1
- # 3 Jan 001 3 2
- # ...
- # 31 Dec 400 _DI400Y _DI400Y -1
- # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary
- n -= 1
- n400, n = divmod(n, _DI400Y)
- year = n400 * 400 + 1 # ..., -399, 1, 401, ...
-
- # Now n is the (non-negative) offset, in days, from January 1 of year, to
- # the desired date. Now compute how many 100-year cycles precede n.
- # Note that it's possible for n100 to equal 4! In that case 4 full
- # 100-year cycles precede the desired day, which implies the desired
- # day is December 31 at the end of a 400-year cycle.
- n100, n = divmod(n, _DI100Y)
-
- # Now compute how many 4-year cycles precede it.
- n4, n = divmod(n, _DI4Y)
-
- # And now how many single years. Again n1 can be 4, and again meaning
- # that the desired day is December 31 at the end of the 4-year cycle.
- n1, n = divmod(n, 365)
-
- year += n100 * 100 + n4 * 4 + n1
- if n1 == 4 or n100 == 4:
- assert n == 0
- return year-1, 12, 31
-
- # Now the year is correct, and n is the offset from January 1. We find
- # the month via an estimate that's either exact or one too large.
- leapyear = n1 == 3 and (n4 != 24 or n100 == 3)
- assert leapyear == _is_leap(year)
- month = (n + 50) >> 5
- preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear)
- if preceding > n: # estimate is too large
- month -= 1
- preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear)
- n -= preceding
- assert 0 <= n < _days_in_month(year, month)
-
- # Now the year and month are correct, and n is the offset from the
- # start of that month: we're done!
- return year, month, n+1
-
-# Month and day names. For localized versions, see the calendar module.
-_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
-_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
-
-
-def _build_struct_time(y, m, d, hh, mm, ss, dstflag):
- wday = (_ymd2ord(y, m, d) + 6) % 7
- dnum = _days_before_month(y, m) + d
- return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag))
-
-def _format_time(hh, mm, ss, us):
- # Skip trailing microseconds when us==0.
- result = "%02d:%02d:%02d" % (hh, mm, ss)
- if us:
- result += ".%06d" % us
- return result
-
-# Correctly substitute for %z and %Z escapes in strftime formats.
-def _wrap_strftime(object, format, timetuple):
- # Don't call utcoffset() or tzname() unless actually needed.
- freplace = None # the string to use for %f
- zreplace = None # the string to use for %z
- Zreplace = None # the string to use for %Z
-
- # Scan format for %z and %Z escapes, replacing as needed.
- newformat = []
- push = newformat.append
- i, n = 0, len(format)
- while i < n:
- ch = format[i]
- i += 1
- if ch == '%':
- if i < n:
- ch = format[i]
- i += 1
- if ch == 'f':
- if freplace is None:
- freplace = '%06d' % getattr(object,
- 'microsecond', 0)
- newformat.append(freplace)
- elif ch == 'z':
- if zreplace is None:
- zreplace = ""
- if hasattr(object, "utcoffset"):
- offset = object.utcoffset()
- if offset is not None:
- sign = '+'
- if offset.days < 0:
- offset = -offset
- sign = '-'
- h, m = divmod(offset, timedelta(hours=1))
- assert not m % timedelta(minutes=1), "whole minute"
- m //= timedelta(minutes=1)
- zreplace = '%c%02d%02d' % (sign, h, m)
- assert '%' not in zreplace
- newformat.append(zreplace)
- elif ch == 'Z':
- if Zreplace is None:
- Zreplace = ""
- if hasattr(object, "tzname"):
- s = object.tzname()
- if s is not None:
- # strftime is going to have at this: escape %
- Zreplace = s.replace('%', '%%')
- newformat.append(Zreplace)
- else:
- push('%')
- push(ch)
- else:
- push('%')
- else:
- push(ch)
- newformat = "".join(newformat)
- return _time.strftime(newformat, timetuple)
-
-def _call_tzinfo_method(tzinfo, methname, tzinfoarg):
- if tzinfo is None:
- return None
- return getattr(tzinfo, methname)(tzinfoarg)
-
-# Just raise TypeError if the arg isn't None or a string.
-def _check_tzname(name):
- if name is not None and not isinstance(name, str):
- raise TypeError("tzinfo.tzname() must return None or string, "
- "not '%s'" % type(name))
-
-# name is the offset-producing method, "utcoffset" or "dst".
-# offset is what it returned.
-# If offset isn't None or timedelta, raises TypeError.
-# If offset is None, returns None.
-# Else offset is checked for being in range, and a whole # of minutes.
-# If it is, its integer value is returned. Else ValueError is raised.
-def _check_utc_offset(name, offset):
- assert name in ("utcoffset", "dst")
- if offset is None:
- return
- if not isinstance(offset, timedelta):
- raise TypeError("tzinfo.%s() must return None "
- "or timedelta, not '%s'" % (name, type(offset)))
- if offset % timedelta(minutes=1) or offset.microseconds:
- raise ValueError("tzinfo.%s() must return a whole number "
- "of minutes, got %s" % (name, offset))
- if not -timedelta(1) < offset < timedelta(1):
- raise ValueError("%s()=%s, must be must be strictly between"
- " -timedelta(hours=24) and timedelta(hours=24)"
- % (name, offset))
-
-def _check_date_fields(year, month, day):
- if not isinstance(year, int):
- raise TypeError('int expected')
- if not MINYEAR <= year <= MAXYEAR:
- raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year)
- if not 1 <= month <= 12:
- raise ValueError('month must be in 1..12', month)
- dim = _days_in_month(year, month)
- if not 1 <= day <= dim:
- raise ValueError('day must be in 1..%d' % dim, day)
-
-def _check_time_fields(hour, minute, second, microsecond):
- if not isinstance(hour, int):
- raise TypeError('int expected')
- if not 0 <= hour <= 23:
- raise ValueError('hour must be in 0..23', hour)
- if not 0 <= minute <= 59:
- raise ValueError('minute must be in 0..59', minute)
- if not 0 <= second <= 59:
- raise ValueError('second must be in 0..59', second)
- if not 0 <= microsecond <= 999999:
- raise ValueError('microsecond must be in 0..999999', microsecond)
-
-def _check_tzinfo_arg(tz):
- if tz is not None and not isinstance(tz, tzinfo):
- raise TypeError("tzinfo argument must be None or of a tzinfo subclass")
-
-def _cmperror(x, y):
- raise TypeError("can't compare '%s' to '%s'" % (
- type(x).__name__, type(y).__name__))
-
-class timedelta(object):
- """Represent the difference between two datetime objects.
-
- Supported operators:
-
- - add, subtract timedelta
- - unary plus, minus, abs
- - compare to timedelta
- - multiply, divide by int
-
- In addition, datetime supports subtraction of two datetime objects
- returning a timedelta, and addition or subtraction of a datetime
- and a timedelta giving a datetime.
-
- Representation: (days, seconds, microseconds). Why? Because I
- felt like it.
- """
- __slots__ = '_days', '_seconds', '_microseconds'
-
- def __new__(cls, days=0, seconds=0, microseconds=0,
- milliseconds=0, minutes=0, hours=0, weeks=0):
- # Doing this efficiently and accurately in C is going to be difficult
- # and error-prone, due to ubiquitous overflow possibilities, and that
- # C double doesn't have enough bits of precision to represent
- # microseconds over 10K years faithfully. The code here tries to make
- # explicit where go-fast assumptions can be relied on, in order to
- # guide the C implementation; it's way more convoluted than speed-
- # ignoring auto-overflow-to-long idiomatic Python could be.
-
- # XXX Check that all inputs are ints or floats.
-
- # Final values, all integer.
- # s and us fit in 32-bit signed ints; d isn't bounded.
- d = s = us = 0
-
- # Normalize everything to days, seconds, microseconds.
- days += weeks*7
- seconds += minutes*60 + hours*3600
- microseconds += milliseconds*1000
-
- # Get rid of all fractions, and normalize s and us.
- # Take a deep breath <wink>.
- if isinstance(days, float):
- dayfrac, days = _math.modf(days)
- daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.))
- assert daysecondswhole == int(daysecondswhole) # can't overflow
- s = int(daysecondswhole)
- assert days == int(days)
- d = int(days)
- else:
- daysecondsfrac = 0.0
- d = days
- assert isinstance(daysecondsfrac, float)
- assert abs(daysecondsfrac) <= 1.0
- assert isinstance(d, int)
- assert abs(s) <= 24 * 3600
- # days isn't referenced again before redefinition
-
- if isinstance(seconds, float):
- secondsfrac, seconds = _math.modf(seconds)
- assert seconds == int(seconds)
- seconds = int(seconds)
- secondsfrac += daysecondsfrac
- assert abs(secondsfrac) <= 2.0
- else:
- secondsfrac = daysecondsfrac
- # daysecondsfrac isn't referenced again
- assert isinstance(secondsfrac, float)
- assert abs(secondsfrac) <= 2.0
-
- assert isinstance(seconds, int)
- days, seconds = divmod(seconds, 24*3600)
- d += days
- s += int(seconds) # can't overflow
- assert isinstance(s, int)
- assert abs(s) <= 2 * 24 * 3600
- # seconds isn't referenced again before redefinition
-
- usdouble = secondsfrac * 1e6
- assert abs(usdouble) < 2.1e6 # exact value not critical
- # secondsfrac isn't referenced again
-
- if isinstance(microseconds, float):
- microseconds += usdouble
- microseconds = round(microseconds, 0)
- seconds, microseconds = divmod(microseconds, 1e6)
- assert microseconds == int(microseconds)
- assert seconds == int(seconds)
- days, seconds = divmod(seconds, 24.*3600.)
- assert days == int(days)
- assert seconds == int(seconds)
- d += int(days)
- s += int(seconds) # can't overflow
- assert isinstance(s, int)
- assert abs(s) <= 3 * 24 * 3600
- else:
- seconds, microseconds = divmod(microseconds, 1000000)
- days, seconds = divmod(seconds, 24*3600)
- d += days
- s += int(seconds) # can't overflow
- assert isinstance(s, int)
- assert abs(s) <= 3 * 24 * 3600
- microseconds = float(microseconds)
- microseconds += usdouble
- microseconds = round(microseconds, 0)
- assert abs(s) <= 3 * 24 * 3600
- assert abs(microseconds) < 3.1e6
-
- # Just a little bit of carrying possible for microseconds and seconds.
- assert isinstance(microseconds, float)
- assert int(microseconds) == microseconds
- us = int(microseconds)
- seconds, us = divmod(us, 1000000)
- s += seconds # cant't overflow
- assert isinstance(s, int)
- days, s = divmod(s, 24*3600)
- d += days
-
- assert isinstance(d, int)
- assert isinstance(s, int) and 0 <= s < 24*3600
- assert isinstance(us, int) and 0 <= us < 1000000
-
- self = object.__new__(cls)
-
- self._days = d
- self._seconds = s
- self._microseconds = us
- if abs(d) > 999999999:
- raise OverflowError("timedelta # of days is too large: %d" % d)
-
- return self
-
- def __repr__(self):
- if self._microseconds:
- return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
- self._days,
- self._seconds,
- self._microseconds)
- if self._seconds:
- return "%s(%d, %d)" % ('datetime.' + self.__class__.__name__,
- self._days,
- self._seconds)
- return "%s(%d)" % ('datetime.' + self.__class__.__name__, self._days)
-
- def __str__(self):
- mm, ss = divmod(self._seconds, 60)
- hh, mm = divmod(mm, 60)
- s = "%d:%02d:%02d" % (hh, mm, ss)
- if self._days:
- def plural(n):
- return n, abs(n) != 1 and "s" or ""
- s = ("%d day%s, " % plural(self._days)) + s
- if self._microseconds:
- s = s + ".%06d" % self._microseconds
- return s
-
- def total_seconds(self):
- """Total seconds in the duration."""
- return ((self.days * 86400 + self.seconds)*10**6 +
- self.microseconds) / 10**6
-
- # Read-only field accessors
- @property
- def days(self):
- """days"""
- return self._days
-
- @property
- def seconds(self):
- """seconds"""
- return self._seconds
-
- @property
- def microseconds(self):
- """microseconds"""
- return self._microseconds
-
- def __add__(self, other):
- if isinstance(other, timedelta):
- # for CPython compatibility, we cannot use
- # our __class__ here, but need a real timedelta
- return timedelta(self._days + other._days,
- self._seconds + other._seconds,
- self._microseconds + other._microseconds)
- return NotImplemented
-
- __radd__ = __add__
-
- def __sub__(self, other):
- if isinstance(other, timedelta):
- # for CPython compatibility, we cannot use
- # our __class__ here, but need a real timedelta
- return timedelta(self._days - other._days,
- self._seconds - other._seconds,
- self._microseconds - other._microseconds)
- return NotImplemented
-
- def __rsub__(self, other):
- if isinstance(other, timedelta):
- return -self + other
- return NotImplemented
-
- def __neg__(self):
- # for CPython compatibility, we cannot use
- # our __class__ here, but need a real timedelta
- return timedelta(-self._days,
- -self._seconds,
- -self._microseconds)
-
- def __pos__(self):
- return self
-
- def __abs__(self):
- if self._days < 0:
- return -self
- else:
- return self
-
- def __mul__(self, other):
- if isinstance(other, int):
- # for CPython compatibility, we cannot use
- # our __class__ here, but need a real timedelta
- return timedelta(self._days * other,
- self._seconds * other,
- self._microseconds * other)
- if isinstance(other, float):
- a, b = other.as_integer_ratio()
- return self * a / b
- return NotImplemented
-
- __rmul__ = __mul__
-
- def _to_microseconds(self):
- return ((self._days * (24*3600) + self._seconds) * 1000000 +
- self._microseconds)
-
- def __floordiv__(self, other):
- if not isinstance(other, (int, timedelta)):
- return NotImplemented
- usec = self._to_microseconds()
- if isinstance(other, timedelta):
- return usec // other._to_microseconds()
- if isinstance(other, int):
- return timedelta(0, 0, usec // other)
-
- def __truediv__(self, other):
- if not isinstance(other, (int, float, timedelta)):
- return NotImplemented
- usec = self._to_microseconds()
- if isinstance(other, timedelta):
- return usec / other._to_microseconds()
- if isinstance(other, int):
- return timedelta(0, 0, usec / other)
- if isinstance(other, float):
- a, b = other.as_integer_ratio()
- return timedelta(0, 0, b * usec / a)
-
- def __mod__(self, other):
- if isinstance(other, timedelta):
- r = self._to_microseconds() % other._to_microseconds()
- return timedelta(0, 0, r)
- return NotImplemented
-
- def __divmod__(self, other):
- if isinstance(other, timedelta):
- q, r = divmod(self._to_microseconds(),
- other._to_microseconds())
- return q, timedelta(0, 0, r)
- return NotImplemented
-
- # Comparisons of timedelta objects with other.
-
- def __eq__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) == 0
- else:
- return False
-
- def __ne__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) != 0
- else:
- return True
-
- def __le__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) <= 0
- else:
- _cmperror(self, other)
-
- def __lt__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) < 0
- else:
- _cmperror(self, other)
-
- def __ge__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) >= 0
- else:
- _cmperror(self, other)
-
- def __gt__(self, other):
- if isinstance(other, timedelta):
- return self._cmp(other) > 0
- else:
- _cmperror(self, other)
-
- def _cmp(self, other):
- assert isinstance(other, timedelta)
- return _cmp(self._getstate(), other._getstate())
-
- def __hash__(self):
- return hash(self._getstate())
-
- def __bool__(self):
- return (self._days != 0 or
- self._seconds != 0 or
- self._microseconds != 0)
-
- # Pickle support.
-
- def _getstate(self):
- return (self._days, self._seconds, self._microseconds)
-
- def __reduce__(self):
- return (self.__class__, self._getstate())
-
-timedelta.min = timedelta(-999999999)
-timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59,
- microseconds=999999)
-timedelta.resolution = timedelta(microseconds=1)
-
-class date(object):
- """Concrete date type.
-
- Constructors:
-
- __new__()
- fromtimestamp()
- today()
- fromordinal()
-
- Operators:
-
- __repr__, __str__
- __cmp__, __hash__
- __add__, __radd__, __sub__ (add/radd only with timedelta arg)
-
- Methods:
-
- timetuple()
- toordinal()
- weekday()
- isoweekday(), isocalendar(), isoformat()
- ctime()
- strftime()
-
- Properties (readonly):
- year, month, day
- """
- __slots__ = '_year', '_month', '_day'
-
- def __new__(cls, year, month=None, day=None):
- """Constructor.
-
- Arguments:
-
- year, month, day (required, base 1)
- """
- if (isinstance(year, bytes) and len(year) == 4 and
- 1 <= year[2] <= 12 and month is None): # Month is sane
- # Pickle support
- self = object.__new__(cls)
- self.__setstate(year)
- return self
- _check_date_fields(year, month, day)
- self = object.__new__(cls)
- self._year = year
- self._month = month
- self._day = day
- return self
-
- # Additional constructors
-
- @classmethod
- def fromtimestamp(cls, t):
- "Construct a date from a POSIX timestamp (like time.time())."
- y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t)
- return cls(y, m, d)
-
- @classmethod
- def today(cls):
- "Construct a date from time.time()."
- t = _time.time()
- return cls.fromtimestamp(t)
-
- @classmethod
- def fromordinal(cls, n):
- """Contruct a date from a proleptic Gregorian ordinal.
-
- January 1 of year 1 is day 1. Only the year, month and day are
- non-zero in the result.
- """
- y, m, d = _ord2ymd(n)
- return cls(y, m, d)
-
- # Conversions to string
-
- def __repr__(self):
- """Convert to formal string, for repr().
-
- >>> dt = datetime(2010, 1, 1)
- >>> repr(dt)
- 'datetime.datetime(2010, 1, 1, 0, 0)'
-
- >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc)
- >>> repr(dt)
- 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)'
- """
- return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
- self._year,
- self._month,
- self._day)
- # XXX These shouldn't depend on time.localtime(), because that
- # clips the usable dates to [1970 .. 2038). At least ctime() is
- # easily done without using strftime() -- that's better too because
- # strftime("%c", ...) is locale specific.
-
-
- def ctime(self):
- "Return ctime() style string."
- weekday = self.toordinal() % 7 or 7
- return "%s %s %2d 00:00:00 %04d" % (
- _DAYNAMES[weekday],
- _MONTHNAMES[self._month],
- self._day, self._year)
-
- def strftime(self, fmt):
- "Format using strftime()."
- return _wrap_strftime(self, fmt, self.timetuple())
-
- def __format__(self, fmt):
- if len(fmt) != 0:
- return self.strftime(fmt)
- return str(self)
-
- def isoformat(self):
- """Return the date formatted according to ISO.
-
- This is 'YYYY-MM-DD'.
-
- References:
- - http://www.w3.org/TR/NOTE-datetime
- - http://www.cl.cam.ac.uk/~mgk25/iso-time.html
- """
- return "%04d-%02d-%02d" % (self._year, self._month, self._day)
-
- __str__ = isoformat
-
- # Read-only field accessors
- @property
- def year(self):
- """year (1-9999)"""
- return self._year
-
- @property
- def month(self):
- """month (1-12)"""
- return self._month
-
- @property
- def day(self):
- """day (1-31)"""
- return self._day
-
- # Standard conversions, __cmp__, __hash__ (and helpers)
-
- def timetuple(self):
- "Return local time tuple compatible with time.localtime()."
- return _build_struct_time(self._year, self._month, self._day,
- 0, 0, 0, -1)
-
- def toordinal(self):
- """Return proleptic Gregorian ordinal for the year, month and day.
-
- January 1 of year 1 is day 1. Only the year, month and day values
- contribute to the result.
- """
- return _ymd2ord(self._year, self._month, self._day)
-
- def replace(self, year=None, month=None, day=None):
- """Return a new date with new values for the specified fields."""
- if year is None:
- year = self._year
- if month is None:
- month = self._month
- if day is None:
- day = self._day
- _check_date_fields(year, month, day)
- return date(year, month, day)
-
- # Comparisons of date objects with other.
-
- def __eq__(self, other):
- if isinstance(other, date):
- return self._cmp(other) == 0
- return NotImplemented
-
- def __ne__(self, other):
- if isinstance(other, date):
- return self._cmp(other) != 0
- return NotImplemented
-
- def __le__(self, other):
- if isinstance(other, date):
- return self._cmp(other) <= 0
- return NotImplemented
-
- def __lt__(self, other):
- if isinstance(other, date):
- return self._cmp(other) < 0
- return NotImplemented
-
- def __ge__(self, other):
- if isinstance(other, date):
- return self._cmp(other) >= 0
- return NotImplemented
-
- def __gt__(self, other):
- if isinstance(other, date):
- return self._cmp(other) > 0
- return NotImplemented
-
- def _cmp(self, other):
- assert isinstance(other, date)
- y, m, d = self._year, self._month, self._day
- y2, m2, d2 = other._year, other._month, other._day
- return _cmp((y, m, d), (y2, m2, d2))
-
- def __hash__(self):
- "Hash."
- return hash(self._getstate())
-
- # Computations
-
- def __add__(self, other):
- "Add a date to a timedelta."
- if isinstance(other, timedelta):
- o = self.toordinal() + other.days
- if 0 < o <= _MAXORDINAL:
- return date.fromordinal(o)
- raise OverflowError("result out of range")
- return NotImplemented
-
- __radd__ = __add__
-
- def __sub__(self, other):
- """Subtract two dates, or a date and a timedelta."""
- if isinstance(other, timedelta):
- return self + timedelta(-other.days)
- if isinstance(other, date):
- days1 = self.toordinal()
- days2 = other.toordinal()
- return timedelta(days1 - days2)
- return NotImplemented
-
- def weekday(self):
- "Return day of the week, where Monday == 0 ... Sunday == 6."
- return (self.toordinal() + 6) % 7
-
- # Day-of-the-week and week-of-the-year, according to ISO
-
- def isoweekday(self):
- "Return day of the week, where Monday == 1 ... Sunday == 7."
- # 1-Jan-0001 is a Monday
- return self.toordinal() % 7 or 7
-
- def isocalendar(self):
- """Return a 3-tuple containing ISO year, week number, and weekday.
-
- The first ISO week of the year is the (Mon-Sun) week
- containing the year's first Thursday; everything else derives
- from that.
-
- The first week is 1; Monday is 1 ... Sunday is 7.
-
- ISO calendar algorithm taken from
- http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm
- """
- year = self._year
- week1monday = _isoweek1monday(year)
- today = _ymd2ord(self._year, self._month, self._day)
- # Internally, week and day have origin 0
- week, day = divmod(today - week1monday, 7)
- if week < 0:
- year -= 1
- week1monday = _isoweek1monday(year)
- week, day = divmod(today - week1monday, 7)
- elif week >= 52:
- if today >= _isoweek1monday(year+1):
- year += 1
- week = 0
- return year, week+1, day+1
-
- # Pickle support.
-
- def _getstate(self):
- yhi, ylo = divmod(self._year, 256)
- return bytes([yhi, ylo, self._month, self._day]),
-
- def __setstate(self, string):
- if len(string) != 4 or not (1 <= string[2] <= 12):
- raise TypeError("not enough arguments")
- yhi, ylo, self._month, self._day = string
- self._year = yhi * 256 + ylo
-
- def __reduce__(self):
- return (self.__class__, self._getstate())
-
-_date_class = date # so functions w/ args named "date" can get at the class
-
-date.min = date(1, 1, 1)
-date.max = date(9999, 12, 31)
-date.resolution = timedelta(days=1)
-
-class tzinfo(object):
- """Abstract base class for time zone info classes.
-
- Subclasses must override the name(), utcoffset() and dst() methods.
- """
- __slots__ = ()
- def tzname(self, dt):
- "datetime -> string name of time zone."
- raise NotImplementedError("tzinfo subclass must override tzname()")
-
- def utcoffset(self, dt):
- "datetime -> minutes east of UTC (negative for west of UTC)"
- raise NotImplementedError("tzinfo subclass must override utcoffset()")
-
- def dst(self, dt):
- """datetime -> DST offset in minutes east of UTC.
-
- Return 0 if DST not in effect. utcoffset() must include the DST
- offset.
- """
- raise NotImplementedError("tzinfo subclass must override dst()")
-
- def fromutc(self, dt):
- "datetime in UTC -> datetime in local time."
-
- if not isinstance(dt, datetime):
- raise TypeError("fromutc() requires a datetime argument")
- if dt.tzinfo is not self:
- raise ValueError("dt.tzinfo is not self")
-
- dtoff = dt.utcoffset()
- if dtoff is None:
- raise ValueError("fromutc() requires a non-None utcoffset() "
- "result")
-
- # See the long comment block at the end of this file for an
- # explanation of this algorithm.
- dtdst = dt.dst()
- if dtdst is None:
- raise ValueError("fromutc() requires a non-None dst() result")
- delta = dtoff - dtdst
- if delta:
- dt += delta
- dtdst = dt.dst()
- if dtdst is None:
- raise ValueError("fromutc(): dt.dst gave inconsistent "
- "results; cannot convert")
- return dt + dtdst
-
- # Pickle support.
-
- def __reduce__(self):
- getinitargs = getattr(self, "__getinitargs__", None)
- if getinitargs:
- args = getinitargs()
- else:
- args = ()
- getstate = getattr(self, "__getstate__", None)
- if getstate:
- state = getstate()
- else:
- state = getattr(self, "__dict__", None) or None
- if state is None:
- return (self.__class__, args)
- else:
- return (self.__class__, args, state)
-
-_tzinfo_class = tzinfo
-
-class time(object):
- """Time with time zone.
-
- Constructors:
-
- __new__()
-
- Operators:
-
- __repr__, __str__
- __cmp__, __hash__
-
- Methods:
-
- strftime()
- isoformat()
- utcoffset()
- tzname()
- dst()
-
- Properties (readonly):
- hour, minute, second, microsecond, tzinfo
- """
-
- def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None):
- """Constructor.
-
- Arguments:
-
- hour, minute (required)
- second, microsecond (default to zero)
- tzinfo (default to None)
- """
- self = object.__new__(cls)
- if isinstance(hour, bytes) and len(hour) == 6:
- # Pickle support
- self.__setstate(hour, minute or None)
- return self
- _check_tzinfo_arg(tzinfo)
- _check_time_fields(hour, minute, second, microsecond)
- self._hour = hour
- self._minute = minute
- self._second = second
- self._microsecond = microsecond
- self._tzinfo = tzinfo
- return self
-
- # Read-only field accessors
- @property
- def hour(self):
- """hour (0-23)"""
- return self._hour
-
- @property
- def minute(self):
- """minute (0-59)"""
- return self._minute
-
- @property
- def second(self):
- """second (0-59)"""
- return self._second
-
- @property
- def microsecond(self):
- """microsecond (0-999999)"""
- return self._microsecond
-
- @property
- def tzinfo(self):
- """timezone info object"""
- return self._tzinfo
-
- # Standard conversions, __hash__ (and helpers)
-
- # Comparisons of time objects with other.
-
- def __eq__(self, other):
- if isinstance(other, time):
- return self._cmp(other, allow_mixed=True) == 0
- else:
- return False
-
- def __ne__(self, other):
- if isinstance(other, time):
- return self._cmp(other, allow_mixed=True) != 0
- else:
- return True
-
- def __le__(self, other):
- if isinstance(other, time):
- return self._cmp(other) <= 0
- else:
- _cmperror(self, other)
-
- def __lt__(self, other):
- if isinstance(other, time):
- return self._cmp(other) < 0
- else:
- _cmperror(self, other)
-
- def __ge__(self, other):
- if isinstance(other, time):
- return self._cmp(other) >= 0
- else:
- _cmperror(self, other)
-
- def __gt__(self, other):
- if isinstance(other, time):
- return self._cmp(other) > 0
- else:
- _cmperror(self, other)
-
- def _cmp(self, other, allow_mixed=False):
- assert isinstance(other, time)
- mytz = self._tzinfo
- ottz = other._tzinfo
- myoff = otoff = None
-
- if mytz is ottz:
- base_compare = True
- else:
- myoff = self.utcoffset()
- otoff = other.utcoffset()
- base_compare = myoff == otoff
-
- if base_compare:
- return _cmp((self._hour, self._minute, self._second,
- self._microsecond),
- (other._hour, other._minute, other._second,
- other._microsecond))
- if myoff is None or otoff is None:
- if allow_mixed:
- return 2 # arbitrary non-zero value
- else:
- raise TypeError("cannot compare naive and aware times")
- myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1)
- othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1)
- return _cmp((myhhmm, self._second, self._microsecond),
- (othhmm, other._second, other._microsecond))
-
- def __hash__(self):
- """Hash."""
- tzoff = self.utcoffset()
- if not tzoff: # zero or None
- return hash(self._getstate()[0])
- h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff,
- timedelta(hours=1))
- assert not m % timedelta(minutes=1), "whole minute"
- m //= timedelta(minutes=1)
- if 0 <= h < 24:
- return hash(time(h, m, self.second, self.microsecond))
- return hash((h, m, self.second, self.microsecond))
-
- # Conversion to string
-
- def _tzstr(self, sep=":"):
- """Return formatted timezone offset (+xx:xx) or None."""
- off = self.utcoffset()
- if off is not None:
- if off.days < 0:
- sign = "-"
- off = -off
- else:
- sign = "+"
- hh, mm = divmod(off, timedelta(hours=1))
- assert not mm % timedelta(minutes=1), "whole minute"
- mm //= timedelta(minutes=1)
- assert 0 <= hh < 24
- off = "%s%02d%s%02d" % (sign, hh, sep, mm)
- return off
-
- def __repr__(self):
- """Convert to formal string, for repr()."""
- if self._microsecond != 0:
- s = ", %d, %d" % (self._second, self._microsecond)
- elif self._second != 0:
- s = ", %d" % self._second
- else:
- s = ""
- s= "%s(%d, %d%s)" % ('datetime.' + self.__class__.__name__,
- self._hour, self._minute, s)
- if self._tzinfo is not None:
- assert s[-1:] == ")"
- s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
- return s
-
- def isoformat(self):
- """Return the time formatted according to ISO.
-
- This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if
- self.microsecond == 0.
- """
- s = _format_time(self._hour, self._minute, self._second,
- self._microsecond)
- tz = self._tzstr()
- if tz:
- s += tz
- return s
-
- __str__ = isoformat
-
- def strftime(self, fmt):
- """Format using strftime(). The date part of the timestamp passed
- to underlying strftime should not be used.
- """
- # The year must be >= 1000 else Python's strftime implementation
- # can raise a bogus exception.
- timetuple = (1900, 1, 1,
- self._hour, self._minute, self._second,
- 0, 1, -1)
- return _wrap_strftime(self, fmt, timetuple)
-
- def __format__(self, fmt):
- if len(fmt) != 0:
- return self.strftime(fmt)
- return str(self)
-
- # Timezone functions
-
- def utcoffset(self):
- """Return the timezone offset in minutes east of UTC (negative west of
- UTC)."""
- if self._tzinfo is None:
- return None
- offset = self._tzinfo.utcoffset(None)
- _check_utc_offset("utcoffset", offset)
- return offset
-
- def tzname(self):
- """Return the timezone name.
-
- Note that the name is 100% informational -- there's no requirement that
- it mean anything in particular. For example, "GMT", "UTC", "-500",
- "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
- """
- if self._tzinfo is None:
- return None
- name = self._tzinfo.tzname(None)
- _check_tzname(name)
- return name
-
- def dst(self):
- """Return 0 if DST is not in effect, or the DST offset (in minutes
- eastward) if DST is in effect.
-
- This is purely informational; the DST offset has already been added to
- the UTC offset returned by utcoffset() if applicable, so there's no
- need to consult dst() unless you're interested in displaying the DST
- info.
- """
- if self._tzinfo is None:
- return None
- offset = self._tzinfo.dst(None)
- _check_utc_offset("dst", offset)
- return offset
-
- def replace(self, hour=None, minute=None, second=None, microsecond=None,
- tzinfo=True):
- """Return a new time with new values for the specified fields."""
- if hour is None:
- hour = self.hour
- if minute is None:
- minute = self.minute
- if second is None:
- second = self.second
- if microsecond is None:
- microsecond = self.microsecond
- if tzinfo is True:
- tzinfo = self.tzinfo
- _check_time_fields(hour, minute, second, microsecond)
- _check_tzinfo_arg(tzinfo)
- return time(hour, minute, second, microsecond, tzinfo)
-
- def __bool__(self):
- if self.second or self.microsecond:
- return True
- offset = self.utcoffset() or timedelta(0)
- return timedelta(hours=self.hour, minutes=self.minute) != offset
-
- # Pickle support.
-
- def _getstate(self):
- us2, us3 = divmod(self._microsecond, 256)
- us1, us2 = divmod(us2, 256)
- basestate = bytes([self._hour, self._minute, self._second,
- us1, us2, us3])
- if self._tzinfo is None:
- return (basestate,)
- else:
- return (basestate, self._tzinfo)
-
- def __setstate(self, string, tzinfo):
- if len(string) != 6 or string[0] >= 24:
- raise TypeError("an integer is required")
- (self._hour, self._minute, self._second,
- us1, us2, us3) = string
- self._microsecond = (((us1 << 8) | us2) << 8) | us3
- if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
- self._tzinfo = tzinfo
- else:
- raise TypeError("bad tzinfo state arg %r" % tzinfo)
-
- def __reduce__(self):
- return (time, self._getstate())
-
-_time_class = time # so functions w/ args named "time" can get at the class
-
-time.min = time(0, 0, 0)
-time.max = time(23, 59, 59, 999999)
-time.resolution = timedelta(microseconds=1)
-
-class datetime(date):
- """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
-
- The year, month and day arguments are required. tzinfo may be None, or an
- instance of a tzinfo subclass. The remaining arguments may be ints.
- """
-
- __slots__ = date.__slots__ + (
- '_hour', '_minute', '_second',
- '_microsecond', '_tzinfo')
- def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
- microsecond=0, tzinfo=None):
- if isinstance(year, bytes) and len(year) == 10:
- # Pickle support
- self = date.__new__(cls, year[:4])
- self.__setstate(year, month)
- return self
- _check_tzinfo_arg(tzinfo)
- _check_time_fields(hour, minute, second, microsecond)
- self = date.__new__(cls, year, month, day)
- self._hour = hour
- self._minute = minute
- self._second = second
- self._microsecond = microsecond
- self._tzinfo = tzinfo
- return self
-
- # Read-only field accessors
- @property
- def hour(self):
- """hour (0-23)"""
- return self._hour
-
- @property
- def minute(self):
- """minute (0-59)"""
- return self._minute
-
- @property
- def second(self):
- """second (0-59)"""
- return self._second
-
- @property
- def microsecond(self):
- """microsecond (0-999999)"""
- return self._microsecond
-
- @property
- def tzinfo(self):
- """timezone info object"""
- return self._tzinfo
-
- @classmethod
- def fromtimestamp(cls, t, tz=None):
- """Construct a datetime from a POSIX timestamp (like time.time()).
-
- A timezone info object may be passed in as well.
- """
-
- _check_tzinfo_arg(tz)
-
- converter = _time.localtime if tz is None else _time.gmtime
-
- t, frac = divmod(t, 1.0)
- us = int(frac * 1e6)
-
- # If timestamp is less than one microsecond smaller than a
- # full second, us can be rounded up to 1000000. In this case,
- # roll over to seconds, otherwise, ValueError is raised
- # by the constructor.
- if us == 1000000:
- t += 1
- us = 0
- y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
- ss = min(ss, 59) # clamp out leap seconds if the platform has them
- result = cls(y, m, d, hh, mm, ss, us, tz)
- if tz is not None:
- result = tz.fromutc(result)
- return result
-
- @classmethod
- def utcfromtimestamp(cls, t):
- "Construct a UTC datetime from a POSIX timestamp (like time.time())."
- t, frac = divmod(t, 1.0)
- us = int(frac * 1e6)
-
- # If timestamp is less than one microsecond smaller than a
- # full second, us can be rounded up to 1000000. In this case,
- # roll over to seconds, otherwise, ValueError is raised
- # by the constructor.
- if us == 1000000:
- t += 1
- us = 0
- y, m, d, hh, mm, ss, weekday, jday, dst = _time.gmtime(t)
- ss = min(ss, 59) # clamp out leap seconds if the platform has them
- return cls(y, m, d, hh, mm, ss, us)
-
- # XXX This is supposed to do better than we *can* do by using time.time(),
- # XXX if the platform supports a more accurate way. The C implementation
- # XXX uses gettimeofday on platforms that have it, but that isn't
- # XXX available from Python. So now() may return different results
- # XXX across the implementations.
- @classmethod
- def now(cls, tz=None):
- "Construct a datetime from time.time() and optional time zone info."
- t = _time.time()
- return cls.fromtimestamp(t, tz)
-
- @classmethod
- def utcnow(cls):
- "Construct a UTC datetime from time.time()."
- t = _time.time()
- return cls.utcfromtimestamp(t)
-
- @classmethod
- def combine(cls, date, time):
- "Construct a datetime from a given date and a given time."
- if not isinstance(date, _date_class):
- raise TypeError("date argument must be a date instance")
- if not isinstance(time, _time_class):
- raise TypeError("time argument must be a time instance")
- return cls(date.year, date.month, date.day,
- time.hour, time.minute, time.second, time.microsecond,
- time.tzinfo)
-
- def timetuple(self):
- "Return local time tuple compatible with time.localtime()."
- dst = self.dst()
- if dst is None:
- dst = -1
- elif dst:
- dst = 1
- else:
- dst = 0
- return _build_struct_time(self.year, self.month, self.day,
- self.hour, self.minute, self.second,
- dst)
-
- def timestamp(self):
- "Return POSIX timestamp as float"
- if self._tzinfo is None:
- return _time.mktime((self.year, self.month, self.day,
- self.hour, self.minute, self.second,
- -1, -1, -1)) + self.microsecond / 1e6
- else:
- return (self - _EPOCH).total_seconds()
-
- def utctimetuple(self):
- "Return UTC time tuple compatible with time.gmtime()."
- offset = self.utcoffset()
- if offset:
- self -= offset
- y, m, d = self.year, self.month, self.day
- hh, mm, ss = self.hour, self.minute, self.second
- return _build_struct_time(y, m, d, hh, mm, ss, 0)
-
- def date(self):
- "Return the date part."
- return date(self._year, self._month, self._day)
-
- def time(self):
- "Return the time part, with tzinfo None."
- return time(self.hour, self.minute, self.second, self.microsecond)
-
- def timetz(self):
- "Return the time part, with same tzinfo."
- return time(self.hour, self.minute, self.second, self.microsecond,
- self._tzinfo)
-
- def replace(self, year=None, month=None, day=None, hour=None,
- minute=None, second=None, microsecond=None, tzinfo=True):
- """Return a new datetime with new values for the specified fields."""
- if year is None:
- year = self.year
- if month is None:
- month = self.month
- if day is None:
- day = self.day
- if hour is None:
- hour = self.hour
- if minute is None:
- minute = self.minute
- if second is None:
- second = self.second
- if microsecond is None:
- microsecond = self.microsecond
- if tzinfo is True:
- tzinfo = self.tzinfo
- _check_date_fields(year, month, day)
- _check_time_fields(hour, minute, second, microsecond)
- _check_tzinfo_arg(tzinfo)
- return datetime(year, month, day, hour, minute, second,
- microsecond, tzinfo)
-
- def astimezone(self, tz=None):
- if tz is None:
- if self.tzinfo is None:
- raise ValueError("astimezone() requires an aware datetime")
- ts = (self - _EPOCH) // timedelta(seconds=1)
- localtm = _time.localtime(ts)
- local = datetime(*localtm[:6])
- try:
- # Extract TZ data if available
- gmtoff = localtm.tm_gmtoff
- zone = localtm.tm_zone
- except AttributeError:
- # Compute UTC offset and compare with the value implied
- # by tm_isdst. If the values match, use the zone name
- # implied by tm_isdst.
- delta = local - datetime(*_time.gmtime(ts)[:6])
- dst = _time.daylight and localtm.tm_isdst > 0
- gmtoff = -(_time.altzone if dst else _time.timezone)
- if delta == timedelta(seconds=gmtoff):
- tz = timezone(delta, _time.tzname[dst])
- else:
- tz = timezone(delta)
- else:
- tz = timezone(timedelta(seconds=gmtoff), zone)
-
- elif not isinstance(tz, tzinfo):
- raise TypeError("tz argument must be an instance of tzinfo")
-
- mytz = self.tzinfo
- if mytz is None:
- raise ValueError("astimezone() requires an aware datetime")
-
- if tz is mytz:
- return self
-
- # Convert self to UTC, and attach the new time zone object.
- myoffset = self.utcoffset()
- if myoffset is None:
- raise ValueError("astimezone() requires an aware datetime")
- utc = (self - myoffset).replace(tzinfo=tz)
-
- # Convert from UTC to tz's local time.
- return tz.fromutc(utc)
-
- # Ways to produce a string.
-
- def ctime(self):
- "Return ctime() style string."
- weekday = self.toordinal() % 7 or 7
- return "%s %s %2d %02d:%02d:%02d %04d" % (
- _DAYNAMES[weekday],
- _MONTHNAMES[self._month],
- self._day,
- self._hour, self._minute, self._second,
- self._year)
-
- def isoformat(self, sep='T'):
- """Return the time formatted according to ISO.
-
- This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if
- self.microsecond == 0.
-
- If self.tzinfo is not None, the UTC offset is also attached, giving
- 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'.
-
- Optional argument sep specifies the separator between date and
- time, default 'T'.
- """
- s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day,
- sep) +
- _format_time(self._hour, self._minute, self._second,
- self._microsecond))
- off = self.utcoffset()
- if off is not None:
- if off.days < 0:
- sign = "-"
- off = -off
- else:
- sign = "+"
- hh, mm = divmod(off, timedelta(hours=1))
- assert not mm % timedelta(minutes=1), "whole minute"
- mm //= timedelta(minutes=1)
- s += "%s%02d:%02d" % (sign, hh, mm)
- return s
-
- def __repr__(self):
- """Convert to formal string, for repr()."""
- L = [self._year, self._month, self._day, # These are never zero
- self._hour, self._minute, self._second, self._microsecond]
- if L[-1] == 0:
- del L[-1]
- if L[-1] == 0:
- del L[-1]
- s = ", ".join(map(str, L))
- s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s)
- if self._tzinfo is not None:
- assert s[-1:] == ")"
- s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
- return s
-
- def __str__(self):
- "Convert to string, for str()."
- return self.isoformat(sep=' ')
-
- @classmethod
- def strptime(cls, date_string, format):
- 'string, format -> new datetime parsed from a string (like time.strptime()).'
- import _strptime
- return _strptime._strptime_datetime(cls, date_string, format)
-
- def utcoffset(self):
- """Return the timezone offset in minutes east of UTC (negative west of
- UTC)."""
- if self._tzinfo is None:
- return None
- offset = self._tzinfo.utcoffset(self)
- _check_utc_offset("utcoffset", offset)
- return offset
-
- def tzname(self):
- """Return the timezone name.
-
- Note that the name is 100% informational -- there's no requirement that
- it mean anything in particular. For example, "GMT", "UTC", "-500",
- "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
- """
- name = _call_tzinfo_method(self._tzinfo, "tzname", self)
- _check_tzname(name)
- return name
-
- def dst(self):
- """Return 0 if DST is not in effect, or the DST offset (in minutes
- eastward) if DST is in effect.
-
- This is purely informational; the DST offset has already been added to
- the UTC offset returned by utcoffset() if applicable, so there's no
- need to consult dst() unless you're interested in displaying the DST
- info.
- """
- if self._tzinfo is None:
- return None
- offset = self._tzinfo.dst(self)
- _check_utc_offset("dst", offset)
- return offset
-
- # Comparisons of datetime objects with other.
-
- def __eq__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other, allow_mixed=True) == 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- return False
-
- def __ne__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other, allow_mixed=True) != 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- return True
-
- def __le__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other) <= 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- _cmperror(self, other)
-
- def __lt__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other) < 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- _cmperror(self, other)
-
- def __ge__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other) >= 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- _cmperror(self, other)
-
- def __gt__(self, other):
- if isinstance(other, datetime):
- return self._cmp(other) > 0
- elif not isinstance(other, date):
- return NotImplemented
- else:
- _cmperror(self, other)
-
- def _cmp(self, other, allow_mixed=False):
- assert isinstance(other, datetime)
- mytz = self._tzinfo
- ottz = other._tzinfo
- myoff = otoff = None
-
- if mytz is ottz:
- base_compare = True
- else:
- myoff = self.utcoffset()
- otoff = other.utcoffset()
- base_compare = myoff == otoff
-
- if base_compare:
- return _cmp((self._year, self._month, self._day,
- self._hour, self._minute, self._second,
- self._microsecond),
- (other._year, other._month, other._day,
- other._hour, other._minute, other._second,
- other._microsecond))
- if myoff is None or otoff is None:
- if allow_mixed:
- return 2 # arbitrary non-zero value
- else:
- raise TypeError("cannot compare naive and aware datetimes")
- # XXX What follows could be done more efficiently...
- diff = self - other # this will take offsets into account
- if diff.days < 0:
- return -1
- return diff and 1 or 0
-
- def __add__(self, other):
- "Add a datetime and a timedelta."
- if not isinstance(other, timedelta):
- return NotImplemented
- delta = timedelta(self.toordinal(),
- hours=self._hour,
- minutes=self._minute,
- seconds=self._second,
- microseconds=self._microsecond)
- delta += other
- hour, rem = divmod(delta.seconds, 3600)
- minute, second = divmod(rem, 60)
- if 0 < delta.days <= _MAXORDINAL:
- return datetime.combine(date.fromordinal(delta.days),
- time(hour, minute, second,
- delta.microseconds,
- tzinfo=self._tzinfo))
- raise OverflowError("result out of range")
-
- __radd__ = __add__
-
- def __sub__(self, other):
- "Subtract two datetimes, or a datetime and a timedelta."
- if not isinstance(other, datetime):
- if isinstance(other, timedelta):
- return self + -other
- return NotImplemented
-
- days1 = self.toordinal()
- days2 = other.toordinal()
- secs1 = self._second + self._minute * 60 + self._hour * 3600
- secs2 = other._second + other._minute * 60 + other._hour * 3600
- base = timedelta(days1 - days2,
- secs1 - secs2,
- self._microsecond - other._microsecond)
- if self._tzinfo is other._tzinfo:
- return base
- myoff = self.utcoffset()
- otoff = other.utcoffset()
- if myoff == otoff:
- return base
- if myoff is None or otoff is None:
- raise TypeError("cannot mix naive and timezone-aware time")
- return base + otoff - myoff
-
- def __hash__(self):
- tzoff = self.utcoffset()
- if tzoff is None:
- return hash(self._getstate()[0])
- days = _ymd2ord(self.year, self.month, self.day)
- seconds = self.hour * 3600 + self.minute * 60 + self.second
- return hash(timedelta(days, seconds, self.microsecond) - tzoff)
-
- # Pickle support.
-
- def _getstate(self):
- yhi, ylo = divmod(self._year, 256)
- us2, us3 = divmod(self._microsecond, 256)
- us1, us2 = divmod(us2, 256)
- basestate = bytes([yhi, ylo, self._month, self._day,
- self._hour, self._minute, self._second,
- us1, us2, us3])
- if self._tzinfo is None:
- return (basestate,)
- else:
- return (basestate, self._tzinfo)
-
- def __setstate(self, string, tzinfo):
- (yhi, ylo, self._month, self._day, self._hour,
- self._minute, self._second, us1, us2, us3) = string
- self._year = yhi * 256 + ylo
- self._microsecond = (((us1 << 8) | us2) << 8) | us3
- if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
- self._tzinfo = tzinfo
- else:
- raise TypeError("bad tzinfo state arg %r" % tzinfo)
-
- def __reduce__(self):
- return (self.__class__, self._getstate())
-
-
-datetime.min = datetime(1, 1, 1)
-datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999)
-datetime.resolution = timedelta(microseconds=1)
-
-
-def _isoweek1monday(year):
- # Helper to calculate the day number of the Monday starting week 1
- # XXX This could be done more efficiently
- THURSDAY = 3
- firstday = _ymd2ord(year, 1, 1)
- firstweekday = (firstday + 6) % 7 # See weekday() above
- week1monday = firstday - firstweekday
- if firstweekday > THURSDAY:
- week1monday += 7
- return week1monday
-
-class timezone(tzinfo):
- __slots__ = '_offset', '_name'
-
- # Sentinel value to disallow None
- _Omitted = object()
- def __new__(cls, offset, name=_Omitted):
- if not isinstance(offset, timedelta):
- raise TypeError("offset must be a timedelta")
- if name is cls._Omitted:
- if not offset:
- return cls.utc
- name = None
- elif not isinstance(name, str):
- ###
- # For Python-Future:
- if PY2 and isinstance(name, native_str):
- name = name.decode()
- else:
- raise TypeError("name must be a string")
- ###
- if not cls._minoffset <= offset <= cls._maxoffset:
- raise ValueError("offset must be a timedelta"
- " strictly between -timedelta(hours=24) and"
- " timedelta(hours=24).")
- if (offset.microseconds != 0 or
- offset.seconds % 60 != 0):
- raise ValueError("offset must be a timedelta"
- " representing a whole number of minutes")
- return cls._create(offset, name)
-
- @classmethod
- def _create(cls, offset, name=None):
- self = tzinfo.__new__(cls)
- self._offset = offset
- self._name = name
- return self
-
- def __getinitargs__(self):
- """pickle support"""
- if self._name is None:
- return (self._offset,)
- return (self._offset, self._name)
-
- def __eq__(self, other):
- if type(other) != timezone:
- return False
- return self._offset == other._offset
-
- def __hash__(self):
- return hash(self._offset)
-
- def __repr__(self):
- """Convert to formal string, for repr().
-
- >>> tz = timezone.utc
- >>> repr(tz)
- 'datetime.timezone.utc'
- >>> tz = timezone(timedelta(hours=-5), 'EST')
- >>> repr(tz)
- "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')"
- """
- if self is self.utc:
- return 'datetime.timezone.utc'
- if self._name is None:
- return "%s(%r)" % ('datetime.' + self.__class__.__name__,
- self._offset)
- return "%s(%r, %r)" % ('datetime.' + self.__class__.__name__,
- self._offset, self._name)
-
- def __str__(self):
- return self.tzname(None)
-
- def utcoffset(self, dt):
- if isinstance(dt, datetime) or dt is None:
- return self._offset
- raise TypeError("utcoffset() argument must be a datetime instance"
- " or None")
-
- def tzname(self, dt):
- if isinstance(dt, datetime) or dt is None:
- if self._name is None:
- return self._name_from_offset(self._offset)
- return self._name
- raise TypeError("tzname() argument must be a datetime instance"
- " or None")
-
- def dst(self, dt):
- if isinstance(dt, datetime) or dt is None:
- return None
- raise TypeError("dst() argument must be a datetime instance"
- " or None")
-
- def fromutc(self, dt):
- if isinstance(dt, datetime):
- if dt.tzinfo is not self:
- raise ValueError("fromutc: dt.tzinfo "
- "is not self")
- return dt + self._offset
- raise TypeError("fromutc() argument must be a datetime instance"
- " or None")
-
- _maxoffset = timedelta(hours=23, minutes=59)
- _minoffset = -_maxoffset
-
- @staticmethod
- def _name_from_offset(delta):
- if delta < timedelta(0):
- sign = '-'
- delta = -delta
- else:
- sign = '+'
- hours, rest = divmod(delta, timedelta(hours=1))
- minutes = rest // timedelta(minutes=1)
- return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes)
-
-timezone.utc = timezone._create(timedelta(0))
-timezone.min = timezone._create(timezone._minoffset)
-timezone.max = timezone._create(timezone._maxoffset)
-_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
-"""
-Some time zone algebra. For a datetime x, let
- x.n = x stripped of its timezone -- its naive time.
- x.o = x.utcoffset(), and assuming that doesn't raise an exception or
- return None
- x.d = x.dst(), and assuming that doesn't raise an exception or
- return None
- x.s = x's standard offset, x.o - x.d
-
-Now some derived rules, where k is a duration (timedelta).
-
-1. x.o = x.s + x.d
- This follows from the definition of x.s.
-
-2. If x and y have the same tzinfo member, x.s = y.s.
- This is actually a requirement, an assumption we need to make about
- sane tzinfo classes.
-
-3. The naive UTC time corresponding to x is x.n - x.o.
- This is again a requirement for a sane tzinfo class.
-
-4. (x+k).s = x.s
- This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
-
-5. (x+k).n = x.n + k
- Again follows from how arithmetic is defined.
-
-Now we can explain tz.fromutc(x). Let's assume it's an interesting case
-(meaning that the various tzinfo methods exist, and don't blow up or return
-None when called).
-
-The function wants to return a datetime y with timezone tz, equivalent to x.
-x is already in UTC.
-
-By #3, we want
-
- y.n - y.o = x.n [1]
-
-The algorithm starts by attaching tz to x.n, and calling that y. So
-x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
-becomes true; in effect, we want to solve [2] for k:
-
- (y+k).n - (y+k).o = x.n [2]
-
-By #1, this is the same as
-
- (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
-
-By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
-Substituting that into [3],
-
- x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
- k - (y+k).s - (y+k).d = 0; rearranging,
- k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
- k = y.s - (y+k).d
-
-On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
-approximate k by ignoring the (y+k).d term at first. Note that k can't be
-very large, since all offset-returning methods return a duration of magnitude
-less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
-be 0, so ignoring it has no consequence then.
-
-In any case, the new value is
-
- z = y + y.s [4]
-
-It's helpful to step back at look at [4] from a higher level: it's simply
-mapping from UTC to tz's standard time.
-
-At this point, if
-
- z.n - z.o = x.n [5]
-
-we have an equivalent time, and are almost done. The insecurity here is
-at the start of daylight time. Picture US Eastern for concreteness. The wall
-time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
-sense then. The docs ask that an Eastern tzinfo class consider such a time to
-be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
-on the day DST starts. We want to return the 1:MM EST spelling because that's
-the only spelling that makes sense on the local wall clock.
-
-In fact, if [5] holds at this point, we do have the standard-time spelling,
-but that takes a bit of proof. We first prove a stronger result. What's the
-difference between the LHS and RHS of [5]? Let
-
- diff = x.n - (z.n - z.o) [6]
-
-Now
- z.n = by [4]
- (y + y.s).n = by #5
- y.n + y.s = since y.n = x.n
- x.n + y.s = since z and y are have the same tzinfo member,
- y.s = z.s by #2
- x.n + z.s
-
-Plugging that back into [6] gives
-
- diff =
- x.n - ((x.n + z.s) - z.o) = expanding
- x.n - x.n - z.s + z.o = cancelling
- - z.s + z.o = by #2
- z.d
-
-So diff = z.d.
-
-If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
-spelling we wanted in the endcase described above. We're done. Contrarily,
-if z.d = 0, then we have a UTC equivalent, and are also done.
-
-If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
-add to z (in effect, z is in tz's standard time, and we need to shift the
-local clock into tz's daylight time).
-
-Let
-
- z' = z + z.d = z + diff [7]
-
-and we can again ask whether
-
- z'.n - z'.o = x.n [8]
-
-If so, we're done. If not, the tzinfo class is insane, according to the
-assumptions we've made. This also requires a bit of proof. As before, let's
-compute the difference between the LHS and RHS of [8] (and skipping some of
-the justifications for the kinds of substitutions we've done several times
-already):
-
- diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
- x.n - (z.n + diff - z'.o) = replacing diff via [6]
- x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
- x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
- - z.n + z.n - z.o + z'.o = cancel z.n
- - z.o + z'.o = #1 twice
- -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
- z'.d - z.d
-
-So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
-we've found the UTC-equivalent so are done. In fact, we stop with [7] and
-return z', not bothering to compute z'.d.
-
-How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
-a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
-would have to change the result dst() returns: we start in DST, and moving
-a little further into it takes us out of DST.
-
-There isn't a sane case where this can happen. The closest it gets is at
-the end of DST, where there's an hour in UTC with no spelling in a hybrid
-tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
-that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
-UTC) because the docs insist on that, but 0:MM is taken as being in daylight
-time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
-clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
-standard time. Since that's what the local clock *does*, we want to map both
-UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
-in local time, but so it goes -- it's the way the local clock works.
-
-When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
-so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
-z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
-(correctly) concludes that z' is not UTC-equivalent to x.
-
-Because we know z.d said z was in daylight time (else [5] would have held and
-we would have stopped then), and we know z.d != z'.d (else [8] would have held
-and we have stopped then), and there are only 2 possible values dst() can
-return in Eastern, it follows that z'.d must be 0 (which it is in the example,
-but the reasoning doesn't depend on the example -- it depends on there being
-two possible dst() outcomes, one zero and the other non-zero). Therefore
-z' must be in standard time, and is the spelling we want in this case.
-
-Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
-concerned (because it takes z' as being in standard time rather than the
-daylight time we intend here), but returning it gives the real-life "local
-clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
-tz.
-
-When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
-the 1:MM standard time spelling we want.
-
-So how can this break? One of the assumptions must be violated. Two
-possibilities:
-
-1) [2] effectively says that y.s is invariant across all y belong to a given
- time zone. This isn't true if, for political reasons or continental drift,
- a region decides to change its base offset from UTC.
-
-2) There may be versions of "double daylight" time where the tail end of
- the analysis gives up a step too early. I haven't thought about that
- enough to say.
-
-In any case, it's clear that the default fromutc() is strong enough to handle
-"almost all" time zones: so long as the standard offset is invariant, it
-doesn't matter if daylight time transition points change from year to year, or
-if daylight time is skipped in some years; it doesn't matter how large or
-small dst() may get within its bounds; and it doesn't even matter if some
-perverse time zone returns a negative dst()). So a breaking case must be
-pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
-"""
-try:
- from _datetime import *
-except ImportError:
- pass
-else:
- # Clean up unused names
- del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH,
- _DI100Y, _DI400Y, _DI4Y, _MAXORDINAL, _MONTHNAMES,
- _build_struct_time, _call_tzinfo_method, _check_date_fields,
- _check_time_fields, _check_tzinfo_arg, _check_tzname,
- _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month,
- _days_before_year, _days_in_month, _format_time, _is_leap,
- _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class,
- _wrap_strftime, _ymd2ord)
- # XXX Since import * above excludes names that start with _,
- # docstring does not get overwritten. In the future, it may be
- # appropriate to maintain a single module level docstring and
- # remove the following line.
- from _datetime import __doc__
+"""Concrete date/time and related types.
+
+See http://www.iana.org/time-zones/repository/tz-link.html for
+time zone and DST data sources.
+"""
+from __future__ import division
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import absolute_import
+from future.builtins import str
+from future.builtins import bytes
+from future.builtins import map
+from future.builtins import round
+from future.builtins import int
+from future.builtins import object
+from future.utils import native_str, PY2
+
+import time as _time
+import math as _math
+
+def _cmp(x, y):
+ return 0 if x == y else 1 if x > y else -1
+
+MINYEAR = 1
+MAXYEAR = 9999
+_MAXORDINAL = 3652059 # date.max.toordinal()
+
+# Utility functions, adapted from Python's Demo/classes/Dates.py, which
+# also assumes the current Gregorian calendar indefinitely extended in
+# both directions. Difference: Dates.py calls January 1 of year 0 day
+# number 1. The code here calls January 1 of year 1 day number 1. This is
+# to match the definition of the "proleptic Gregorian" calendar in Dershowitz
+# and Reingold's "Calendrical Calculations", where it's the base calendar
+# for all computations. See the book for algorithms for converting between
+# proleptic Gregorian ordinals and many other calendar systems.
+
+_DAYS_IN_MONTH = [None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+_DAYS_BEFORE_MONTH = [None]
+dbm = 0
+for dim in _DAYS_IN_MONTH[1:]:
+ _DAYS_BEFORE_MONTH.append(dbm)
+ dbm += dim
+del dbm, dim
+
+def _is_leap(year):
+ "year -> 1 if leap year, else 0."
+ return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
+
+def _days_before_year(year):
+ "year -> number of days before January 1st of year."
+ y = year - 1
+ return y*365 + y//4 - y//100 + y//400
+
+def _days_in_month(year, month):
+ "year, month -> number of days in that month in that year."
+ assert 1 <= month <= 12, month
+ if month == 2 and _is_leap(year):
+ return 29
+ return _DAYS_IN_MONTH[month]
+
+def _days_before_month(year, month):
+ "year, month -> number of days in year preceding first day of month."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year))
+
+def _ymd2ord(year, month, day):
+ "year, month, day -> ordinal, considering 01-Jan-0001 as day 1."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ dim = _days_in_month(year, month)
+ assert 1 <= day <= dim, ('day must be in 1..%d' % dim)
+ return (_days_before_year(year) +
+ _days_before_month(year, month) +
+ day)
+
+_DI400Y = _days_before_year(401) # number of days in 400 years
+_DI100Y = _days_before_year(101) # " " " " 100 "
+_DI4Y = _days_before_year(5) # " " " " 4 "
+
+# A 4-year cycle has an extra leap day over what we'd get from pasting
+# together 4 single years.
+assert _DI4Y == 4 * 365 + 1
+
+# Similarly, a 400-year cycle has an extra leap day over what we'd get from
+# pasting together 4 100-year cycles.
+assert _DI400Y == 4 * _DI100Y + 1
+
+# OTOH, a 100-year cycle has one fewer leap day than we'd get from
+# pasting together 25 4-year cycles.
+assert _DI100Y == 25 * _DI4Y - 1
+
+def _ord2ymd(n):
+ "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1."
+
+ # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years
+ # repeats exactly every 400 years. The basic strategy is to find the
+ # closest 400-year boundary at or before n, then work with the offset
+ # from that boundary to n. Life is much clearer if we subtract 1 from
+ # n first -- then the values of n at 400-year boundaries are exactly
+ # those divisible by _DI400Y:
+ #
+ # D M Y n n-1
+ # -- --- ---- ---------- ----------------
+ # 31 Dec -400 -_DI400Y -_DI400Y -1
+ # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary
+ # ...
+ # 30 Dec 000 -1 -2
+ # 31 Dec 000 0 -1
+ # 1 Jan 001 1 0 400-year boundary
+ # 2 Jan 001 2 1
+ # 3 Jan 001 3 2
+ # ...
+ # 31 Dec 400 _DI400Y _DI400Y -1
+ # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary
+ n -= 1
+ n400, n = divmod(n, _DI400Y)
+ year = n400 * 400 + 1 # ..., -399, 1, 401, ...
+
+ # Now n is the (non-negative) offset, in days, from January 1 of year, to
+ # the desired date. Now compute how many 100-year cycles precede n.
+ # Note that it's possible for n100 to equal 4! In that case 4 full
+ # 100-year cycles precede the desired day, which implies the desired
+ # day is December 31 at the end of a 400-year cycle.
+ n100, n = divmod(n, _DI100Y)
+
+ # Now compute how many 4-year cycles precede it.
+ n4, n = divmod(n, _DI4Y)
+
+ # And now how many single years. Again n1 can be 4, and again meaning
+ # that the desired day is December 31 at the end of the 4-year cycle.
+ n1, n = divmod(n, 365)
+
+ year += n100 * 100 + n4 * 4 + n1
+ if n1 == 4 or n100 == 4:
+ assert n == 0
+ return year-1, 12, 31
+
+ # Now the year is correct, and n is the offset from January 1. We find
+ # the month via an estimate that's either exact or one too large.
+ leapyear = n1 == 3 and (n4 != 24 or n100 == 3)
+ assert leapyear == _is_leap(year)
+ month = (n + 50) >> 5
+ preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear)
+ if preceding > n: # estimate is too large
+ month -= 1
+ preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear)
+ n -= preceding
+ assert 0 <= n < _days_in_month(year, month)
+
+ # Now the year and month are correct, and n is the offset from the
+ # start of that month: we're done!
+ return year, month, n+1
+
+# Month and day names. For localized versions, see the calendar module.
+_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+
+
+def _build_struct_time(y, m, d, hh, mm, ss, dstflag):
+ wday = (_ymd2ord(y, m, d) + 6) % 7
+ dnum = _days_before_month(y, m) + d
+ return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag))
+
+def _format_time(hh, mm, ss, us):
+ # Skip trailing microseconds when us==0.
+ result = "%02d:%02d:%02d" % (hh, mm, ss)
+ if us:
+ result += ".%06d" % us
+ return result
+
+# Correctly substitute for %z and %Z escapes in strftime formats.
+def _wrap_strftime(object, format, timetuple):
+ # Don't call utcoffset() or tzname() unless actually needed.
+ freplace = None # the string to use for %f
+ zreplace = None # the string to use for %z
+ Zreplace = None # the string to use for %Z
+
+ # Scan format for %z and %Z escapes, replacing as needed.
+ newformat = []
+ push = newformat.append
+ i, n = 0, len(format)
+ while i < n:
+ ch = format[i]
+ i += 1
+ if ch == '%':
+ if i < n:
+ ch = format[i]
+ i += 1
+ if ch == 'f':
+ if freplace is None:
+ freplace = '%06d' % getattr(object,
+ 'microsecond', 0)
+ newformat.append(freplace)
+ elif ch == 'z':
+ if zreplace is None:
+ zreplace = ""
+ if hasattr(object, "utcoffset"):
+ offset = object.utcoffset()
+ if offset is not None:
+ sign = '+'
+ if offset.days < 0:
+ offset = -offset
+ sign = '-'
+ h, m = divmod(offset, timedelta(hours=1))
+ assert not m % timedelta(minutes=1), "whole minute"
+ m //= timedelta(minutes=1)
+ zreplace = '%c%02d%02d' % (sign, h, m)
+ assert '%' not in zreplace
+ newformat.append(zreplace)
+ elif ch == 'Z':
+ if Zreplace is None:
+ Zreplace = ""
+ if hasattr(object, "tzname"):
+ s = object.tzname()
+ if s is not None:
+ # strftime is going to have at this: escape %
+ Zreplace = s.replace('%', '%%')
+ newformat.append(Zreplace)
+ else:
+ push('%')
+ push(ch)
+ else:
+ push('%')
+ else:
+ push(ch)
+ newformat = "".join(newformat)
+ return _time.strftime(newformat, timetuple)
+
+def _call_tzinfo_method(tzinfo, methname, tzinfoarg):
+ if tzinfo is None:
+ return None
+ return getattr(tzinfo, methname)(tzinfoarg)
+
+# Just raise TypeError if the arg isn't None or a string.
+def _check_tzname(name):
+ if name is not None and not isinstance(name, str):
+ raise TypeError("tzinfo.tzname() must return None or string, "
+ "not '%s'" % type(name))
+
+# name is the offset-producing method, "utcoffset" or "dst".
+# offset is what it returned.
+# If offset isn't None or timedelta, raises TypeError.
+# If offset is None, returns None.
+# Else offset is checked for being in range, and a whole # of minutes.
+# If it is, its integer value is returned. Else ValueError is raised.
+def _check_utc_offset(name, offset):
+ assert name in ("utcoffset", "dst")
+ if offset is None:
+ return
+ if not isinstance(offset, timedelta):
+ raise TypeError("tzinfo.%s() must return None "
+ "or timedelta, not '%s'" % (name, type(offset)))
+ if offset % timedelta(minutes=1) or offset.microseconds:
+ raise ValueError("tzinfo.%s() must return a whole number "
+ "of minutes, got %s" % (name, offset))
+ if not -timedelta(1) < offset < timedelta(1):
+ raise ValueError("%s()=%s, must be must be strictly between"
+ " -timedelta(hours=24) and timedelta(hours=24)"
+ % (name, offset))
+
+def _check_date_fields(year, month, day):
+ if not isinstance(year, int):
+ raise TypeError('int expected')
+ if not MINYEAR <= year <= MAXYEAR:
+ raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year)
+ if not 1 <= month <= 12:
+ raise ValueError('month must be in 1..12', month)
+ dim = _days_in_month(year, month)
+ if not 1 <= day <= dim:
+ raise ValueError('day must be in 1..%d' % dim, day)
+
+def _check_time_fields(hour, minute, second, microsecond):
+ if not isinstance(hour, int):
+ raise TypeError('int expected')
+ if not 0 <= hour <= 23:
+ raise ValueError('hour must be in 0..23', hour)
+ if not 0 <= minute <= 59:
+ raise ValueError('minute must be in 0..59', minute)
+ if not 0 <= second <= 59:
+ raise ValueError('second must be in 0..59', second)
+ if not 0 <= microsecond <= 999999:
+ raise ValueError('microsecond must be in 0..999999', microsecond)
+
+def _check_tzinfo_arg(tz):
+ if tz is not None and not isinstance(tz, tzinfo):
+ raise TypeError("tzinfo argument must be None or of a tzinfo subclass")
+
+def _cmperror(x, y):
+ raise TypeError("can't compare '%s' to '%s'" % (
+ type(x).__name__, type(y).__name__))
+
+class timedelta(object):
+ """Represent the difference between two datetime objects.
+
+ Supported operators:
+
+ - add, subtract timedelta
+ - unary plus, minus, abs
+ - compare to timedelta
+ - multiply, divide by int
+
+ In addition, datetime supports subtraction of two datetime objects
+ returning a timedelta, and addition or subtraction of a datetime
+ and a timedelta giving a datetime.
+
+ Representation: (days, seconds, microseconds). Why? Because I
+ felt like it.
+ """
+ __slots__ = '_days', '_seconds', '_microseconds'
+
+ def __new__(cls, days=0, seconds=0, microseconds=0,
+ milliseconds=0, minutes=0, hours=0, weeks=0):
+ # Doing this efficiently and accurately in C is going to be difficult
+ # and error-prone, due to ubiquitous overflow possibilities, and that
+ # C double doesn't have enough bits of precision to represent
+ # microseconds over 10K years faithfully. The code here tries to make
+ # explicit where go-fast assumptions can be relied on, in order to
+ # guide the C implementation; it's way more convoluted than speed-
+ # ignoring auto-overflow-to-long idiomatic Python could be.
+
+ # XXX Check that all inputs are ints or floats.
+
+ # Final values, all integer.
+ # s and us fit in 32-bit signed ints; d isn't bounded.
+ d = s = us = 0
+
+ # Normalize everything to days, seconds, microseconds.
+ days += weeks*7
+ seconds += minutes*60 + hours*3600
+ microseconds += milliseconds*1000
+
+ # Get rid of all fractions, and normalize s and us.
+ # Take a deep breath <wink>.
+ if isinstance(days, float):
+ dayfrac, days = _math.modf(days)
+ daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.))
+ assert daysecondswhole == int(daysecondswhole) # can't overflow
+ s = int(daysecondswhole)
+ assert days == int(days)
+ d = int(days)
+ else:
+ daysecondsfrac = 0.0
+ d = days
+ assert isinstance(daysecondsfrac, float)
+ assert abs(daysecondsfrac) <= 1.0
+ assert isinstance(d, int)
+ assert abs(s) <= 24 * 3600
+ # days isn't referenced again before redefinition
+
+ if isinstance(seconds, float):
+ secondsfrac, seconds = _math.modf(seconds)
+ assert seconds == int(seconds)
+ seconds = int(seconds)
+ secondsfrac += daysecondsfrac
+ assert abs(secondsfrac) <= 2.0
+ else:
+ secondsfrac = daysecondsfrac
+ # daysecondsfrac isn't referenced again
+ assert isinstance(secondsfrac, float)
+ assert abs(secondsfrac) <= 2.0
+
+ assert isinstance(seconds, int)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 2 * 24 * 3600
+ # seconds isn't referenced again before redefinition
+
+ usdouble = secondsfrac * 1e6
+ assert abs(usdouble) < 2.1e6 # exact value not critical
+ # secondsfrac isn't referenced again
+
+ if isinstance(microseconds, float):
+ microseconds += usdouble
+ microseconds = round(microseconds, 0)
+ seconds, microseconds = divmod(microseconds, 1e6)
+ assert microseconds == int(microseconds)
+ assert seconds == int(seconds)
+ days, seconds = divmod(seconds, 24.*3600.)
+ assert days == int(days)
+ assert seconds == int(seconds)
+ d += int(days)
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 3 * 24 * 3600
+ else:
+ seconds, microseconds = divmod(microseconds, 1000000)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 3 * 24 * 3600
+ microseconds = float(microseconds)
+ microseconds += usdouble
+ microseconds = round(microseconds, 0)
+ assert abs(s) <= 3 * 24 * 3600
+ assert abs(microseconds) < 3.1e6
+
+ # Just a little bit of carrying possible for microseconds and seconds.
+ assert isinstance(microseconds, float)
+ assert int(microseconds) == microseconds
+ us = int(microseconds)
+ seconds, us = divmod(us, 1000000)
+ s += seconds # cant't overflow
+ assert isinstance(s, int)
+ days, s = divmod(s, 24*3600)
+ d += days
+
+ assert isinstance(d, int)
+ assert isinstance(s, int) and 0 <= s < 24*3600
+ assert isinstance(us, int) and 0 <= us < 1000000
+
+ self = object.__new__(cls)
+
+ self._days = d
+ self._seconds = s
+ self._microseconds = us
+ if abs(d) > 999999999:
+ raise OverflowError("timedelta # of days is too large: %d" % d)
+
+ return self
+
+ def __repr__(self):
+ if self._microseconds:
+ return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._days,
+ self._seconds,
+ self._microseconds)
+ if self._seconds:
+ return "%s(%d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._days,
+ self._seconds)
+ return "%s(%d)" % ('datetime.' + self.__class__.__name__, self._days)
+
+ def __str__(self):
+ mm, ss = divmod(self._seconds, 60)
+ hh, mm = divmod(mm, 60)
+ s = "%d:%02d:%02d" % (hh, mm, ss)
+ if self._days:
+ def plural(n):
+ return n, abs(n) != 1 and "s" or ""
+ s = ("%d day%s, " % plural(self._days)) + s
+ if self._microseconds:
+ s = s + ".%06d" % self._microseconds
+ return s
+
+ def total_seconds(self):
+ """Total seconds in the duration."""
+ return ((self.days * 86400 + self.seconds)*10**6 +
+ self.microseconds) / 10**6
+
+ # Read-only field accessors
+ @property
+ def days(self):
+ """days"""
+ return self._days
+
+ @property
+ def seconds(self):
+ """seconds"""
+ return self._seconds
+
+ @property
+ def microseconds(self):
+ """microseconds"""
+ return self._microseconds
+
+ def __add__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days + other._days,
+ self._seconds + other._seconds,
+ self._microseconds + other._microseconds)
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days - other._days,
+ self._seconds - other._seconds,
+ self._microseconds - other._microseconds)
+ return NotImplemented
+
+ def __rsub__(self, other):
+ if isinstance(other, timedelta):
+ return -self + other
+ return NotImplemented
+
+ def __neg__(self):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(-self._days,
+ -self._seconds,
+ -self._microseconds)
+
+ def __pos__(self):
+ return self
+
+ def __abs__(self):
+ if self._days < 0:
+ return -self
+ else:
+ return self
+
+ def __mul__(self, other):
+ if isinstance(other, int):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days * other,
+ self._seconds * other,
+ self._microseconds * other)
+ if isinstance(other, float):
+ a, b = other.as_integer_ratio()
+ return self * a / b
+ return NotImplemented
+
+ __rmul__ = __mul__
+
+ def _to_microseconds(self):
+ return ((self._days * (24*3600) + self._seconds) * 1000000 +
+ self._microseconds)
+
+ def __floordiv__(self, other):
+ if not isinstance(other, (int, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec // other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, usec // other)
+
+ def __truediv__(self, other):
+ if not isinstance(other, (int, float, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec / other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, usec / other)
+ if isinstance(other, float):
+ a, b = other.as_integer_ratio()
+ return timedelta(0, 0, b * usec / a)
+
+ def __mod__(self, other):
+ if isinstance(other, timedelta):
+ r = self._to_microseconds() % other._to_microseconds()
+ return timedelta(0, 0, r)
+ return NotImplemented
+
+ def __divmod__(self, other):
+ if isinstance(other, timedelta):
+ q, r = divmod(self._to_microseconds(),
+ other._to_microseconds())
+ return q, timedelta(0, 0, r)
+ return NotImplemented
+
+ # Comparisons of timedelta objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) == 0
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) != 0
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) <= 0
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) < 0
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) >= 0
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) > 0
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other):
+ assert isinstance(other, timedelta)
+ return _cmp(self._getstate(), other._getstate())
+
+ def __hash__(self):
+ return hash(self._getstate())
+
+ def __bool__(self):
+ return (self._days != 0 or
+ self._seconds != 0 or
+ self._microseconds != 0)
+
+ # Pickle support.
+
+ def _getstate(self):
+ return (self._days, self._seconds, self._microseconds)
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+timedelta.min = timedelta(-999999999)
+timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59,
+ microseconds=999999)
+timedelta.resolution = timedelta(microseconds=1)
+
+class date(object):
+ """Concrete date type.
+
+ Constructors:
+
+ __new__()
+ fromtimestamp()
+ today()
+ fromordinal()
+
+ Operators:
+
+ __repr__, __str__
+ __cmp__, __hash__
+ __add__, __radd__, __sub__ (add/radd only with timedelta arg)
+
+ Methods:
+
+ timetuple()
+ toordinal()
+ weekday()
+ isoweekday(), isocalendar(), isoformat()
+ ctime()
+ strftime()
+
+ Properties (readonly):
+ year, month, day
+ """
+ __slots__ = '_year', '_month', '_day'
+
+ def __new__(cls, year, month=None, day=None):
+ """Constructor.
+
+ Arguments:
+
+ year, month, day (required, base 1)
+ """
+ if (isinstance(year, bytes) and len(year) == 4 and
+ 1 <= year[2] <= 12 and month is None): # Month is sane
+ # Pickle support
+ self = object.__new__(cls)
+ self.__setstate(year)
+ return self
+ _check_date_fields(year, month, day)
+ self = object.__new__(cls)
+ self._year = year
+ self._month = month
+ self._day = day
+ return self
+
+ # Additional constructors
+
+ @classmethod
+ def fromtimestamp(cls, t):
+ "Construct a date from a POSIX timestamp (like time.time())."
+ y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t)
+ return cls(y, m, d)
+
+ @classmethod
+ def today(cls):
+ "Construct a date from time.time()."
+ t = _time.time()
+ return cls.fromtimestamp(t)
+
+ @classmethod
+ def fromordinal(cls, n):
+ """Contruct a date from a proleptic Gregorian ordinal.
+
+ January 1 of year 1 is day 1. Only the year, month and day are
+ non-zero in the result.
+ """
+ y, m, d = _ord2ymd(n)
+ return cls(y, m, d)
+
+ # Conversions to string
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> dt = datetime(2010, 1, 1)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0)'
+
+ >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)'
+ """
+ return "%s(%d, %d, %d)" % ('datetime.' + self.__class__.__name__,
+ self._year,
+ self._month,
+ self._day)
+ # XXX These shouldn't depend on time.localtime(), because that
+ # clips the usable dates to [1970 .. 2038). At least ctime() is
+ # easily done without using strftime() -- that's better too because
+ # strftime("%c", ...) is locale specific.
+
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d 00:00:00 %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day, self._year)
+
+ def strftime(self, fmt):
+ "Format using strftime()."
+ return _wrap_strftime(self, fmt, self.timetuple())
+
+ def __format__(self, fmt):
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ def isoformat(self):
+ """Return the date formatted according to ISO.
+
+ This is 'YYYY-MM-DD'.
+
+ References:
+ - http://www.w3.org/TR/NOTE-datetime
+ - http://www.cl.cam.ac.uk/~mgk25/iso-time.html
+ """
+ return "%04d-%02d-%02d" % (self._year, self._month, self._day)
+
+ __str__ = isoformat
+
+ # Read-only field accessors
+ @property
+ def year(self):
+ """year (1-9999)"""
+ return self._year
+
+ @property
+ def month(self):
+ """month (1-12)"""
+ return self._month
+
+ @property
+ def day(self):
+ """day (1-31)"""
+ return self._day
+
+ # Standard conversions, __cmp__, __hash__ (and helpers)
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ return _build_struct_time(self._year, self._month, self._day,
+ 0, 0, 0, -1)
+
+ def toordinal(self):
+ """Return proleptic Gregorian ordinal for the year, month and day.
+
+ January 1 of year 1 is day 1. Only the year, month and day values
+ contribute to the result.
+ """
+ return _ymd2ord(self._year, self._month, self._day)
+
+ def replace(self, year=None, month=None, day=None):
+ """Return a new date with new values for the specified fields."""
+ if year is None:
+ year = self._year
+ if month is None:
+ month = self._month
+ if day is None:
+ day = self._day
+ _check_date_fields(year, month, day)
+ return date(year, month, day)
+
+ # Comparisons of date objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) == 0
+ return NotImplemented
+
+ def __ne__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) != 0
+ return NotImplemented
+
+ def __le__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) <= 0
+ return NotImplemented
+
+ def __lt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) < 0
+ return NotImplemented
+
+ def __ge__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) >= 0
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) > 0
+ return NotImplemented
+
+ def _cmp(self, other):
+ assert isinstance(other, date)
+ y, m, d = self._year, self._month, self._day
+ y2, m2, d2 = other._year, other._month, other._day
+ return _cmp((y, m, d), (y2, m2, d2))
+
+ def __hash__(self):
+ "Hash."
+ return hash(self._getstate())
+
+ # Computations
+
+ def __add__(self, other):
+ "Add a date to a timedelta."
+ if isinstance(other, timedelta):
+ o = self.toordinal() + other.days
+ if 0 < o <= _MAXORDINAL:
+ return date.fromordinal(o)
+ raise OverflowError("result out of range")
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ """Subtract two dates, or a date and a timedelta."""
+ if isinstance(other, timedelta):
+ return self + timedelta(-other.days)
+ if isinstance(other, date):
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ return timedelta(days1 - days2)
+ return NotImplemented
+
+ def weekday(self):
+ "Return day of the week, where Monday == 0 ... Sunday == 6."
+ return (self.toordinal() + 6) % 7
+
+ # Day-of-the-week and week-of-the-year, according to ISO
+
+ def isoweekday(self):
+ "Return day of the week, where Monday == 1 ... Sunday == 7."
+ # 1-Jan-0001 is a Monday
+ return self.toordinal() % 7 or 7
+
+ def isocalendar(self):
+ """Return a 3-tuple containing ISO year, week number, and weekday.
+
+ The first ISO week of the year is the (Mon-Sun) week
+ containing the year's first Thursday; everything else derives
+ from that.
+
+ The first week is 1; Monday is 1 ... Sunday is 7.
+
+ ISO calendar algorithm taken from
+ http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm
+ """
+ year = self._year
+ week1monday = _isoweek1monday(year)
+ today = _ymd2ord(self._year, self._month, self._day)
+ # Internally, week and day have origin 0
+ week, day = divmod(today - week1monday, 7)
+ if week < 0:
+ year -= 1
+ week1monday = _isoweek1monday(year)
+ week, day = divmod(today - week1monday, 7)
+ elif week >= 52:
+ if today >= _isoweek1monday(year+1):
+ year += 1
+ week = 0
+ return year, week+1, day+1
+
+ # Pickle support.
+
+ def _getstate(self):
+ yhi, ylo = divmod(self._year, 256)
+ return bytes([yhi, ylo, self._month, self._day]),
+
+ def __setstate(self, string):
+ if len(string) != 4 or not (1 <= string[2] <= 12):
+ raise TypeError("not enough arguments")
+ yhi, ylo, self._month, self._day = string
+ self._year = yhi * 256 + ylo
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+_date_class = date # so functions w/ args named "date" can get at the class
+
+date.min = date(1, 1, 1)
+date.max = date(9999, 12, 31)
+date.resolution = timedelta(days=1)
+
+class tzinfo(object):
+ """Abstract base class for time zone info classes.
+
+ Subclasses must override the name(), utcoffset() and dst() methods.
+ """
+ __slots__ = ()
+ def tzname(self, dt):
+ "datetime -> string name of time zone."
+ raise NotImplementedError("tzinfo subclass must override tzname()")
+
+ def utcoffset(self, dt):
+ "datetime -> minutes east of UTC (negative for west of UTC)"
+ raise NotImplementedError("tzinfo subclass must override utcoffset()")
+
+ def dst(self, dt):
+ """datetime -> DST offset in minutes east of UTC.
+
+ Return 0 if DST not in effect. utcoffset() must include the DST
+ offset.
+ """
+ raise NotImplementedError("tzinfo subclass must override dst()")
+
+ def fromutc(self, dt):
+ "datetime in UTC -> datetime in local time."
+
+ if not isinstance(dt, datetime):
+ raise TypeError("fromutc() requires a datetime argument")
+ if dt.tzinfo is not self:
+ raise ValueError("dt.tzinfo is not self")
+
+ dtoff = dt.utcoffset()
+ if dtoff is None:
+ raise ValueError("fromutc() requires a non-None utcoffset() "
+ "result")
+
+ # See the long comment block at the end of this file for an
+ # explanation of this algorithm.
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc() requires a non-None dst() result")
+ delta = dtoff - dtdst
+ if delta:
+ dt += delta
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc(): dt.dst gave inconsistent "
+ "results; cannot convert")
+ return dt + dtdst
+
+ # Pickle support.
+
+ def __reduce__(self):
+ getinitargs = getattr(self, "__getinitargs__", None)
+ if getinitargs:
+ args = getinitargs()
+ else:
+ args = ()
+ getstate = getattr(self, "__getstate__", None)
+ if getstate:
+ state = getstate()
+ else:
+ state = getattr(self, "__dict__", None) or None
+ if state is None:
+ return (self.__class__, args)
+ else:
+ return (self.__class__, args, state)
+
+_tzinfo_class = tzinfo
+
+class time(object):
+ """Time with time zone.
+
+ Constructors:
+
+ __new__()
+
+ Operators:
+
+ __repr__, __str__
+ __cmp__, __hash__
+
+ Methods:
+
+ strftime()
+ isoformat()
+ utcoffset()
+ tzname()
+ dst()
+
+ Properties (readonly):
+ hour, minute, second, microsecond, tzinfo
+ """
+
+ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None):
+ """Constructor.
+
+ Arguments:
+
+ hour, minute (required)
+ second, microsecond (default to zero)
+ tzinfo (default to None)
+ """
+ self = object.__new__(cls)
+ if isinstance(hour, bytes) and len(hour) == 6:
+ # Pickle support
+ self.__setstate(hour, minute or None)
+ return self
+ _check_tzinfo_arg(tzinfo)
+ _check_time_fields(hour, minute, second, microsecond)
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ # Standard conversions, __hash__ (and helpers)
+
+ # Comparisons of time objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other, allow_mixed=True) == 0
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other, allow_mixed=True) != 0
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) <= 0
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) < 0
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) >= 0
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) > 0
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, time)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware times")
+ myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1)
+ othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1)
+ return _cmp((myhhmm, self._second, self._microsecond),
+ (othhmm, other._second, other._microsecond))
+
+ def __hash__(self):
+ """Hash."""
+ tzoff = self.utcoffset()
+ if not tzoff: # zero or None
+ return hash(self._getstate()[0])
+ h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff,
+ timedelta(hours=1))
+ assert not m % timedelta(minutes=1), "whole minute"
+ m //= timedelta(minutes=1)
+ if 0 <= h < 24:
+ return hash(time(h, m, self.second, self.microsecond))
+ return hash((h, m, self.second, self.microsecond))
+
+ # Conversion to string
+
+ def _tzstr(self, sep=":"):
+ """Return formatted timezone offset (+xx:xx) or None."""
+ off = self.utcoffset()
+ if off is not None:
+ if off.days < 0:
+ sign = "-"
+ off = -off
+ else:
+ sign = "+"
+ hh, mm = divmod(off, timedelta(hours=1))
+ assert not mm % timedelta(minutes=1), "whole minute"
+ mm //= timedelta(minutes=1)
+ assert 0 <= hh < 24
+ off = "%s%02d%s%02d" % (sign, hh, sep, mm)
+ return off
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ if self._microsecond != 0:
+ s = ", %d, %d" % (self._second, self._microsecond)
+ elif self._second != 0:
+ s = ", %d" % self._second
+ else:
+ s = ""
+ s= "%s(%d, %d%s)" % ('datetime.' + self.__class__.__name__,
+ self._hour, self._minute, s)
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ return s
+
+ def isoformat(self):
+ """Return the time formatted according to ISO.
+
+ This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if
+ self.microsecond == 0.
+ """
+ s = _format_time(self._hour, self._minute, self._second,
+ self._microsecond)
+ tz = self._tzstr()
+ if tz:
+ s += tz
+ return s
+
+ __str__ = isoformat
+
+ def strftime(self, fmt):
+ """Format using strftime(). The date part of the timestamp passed
+ to underlying strftime should not be used.
+ """
+ # The year must be >= 1000 else Python's strftime implementation
+ # can raise a bogus exception.
+ timetuple = (1900, 1, 1,
+ self._hour, self._minute, self._second,
+ 0, 1, -1)
+ return _wrap_strftime(self, fmt, timetuple)
+
+ def __format__(self, fmt):
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ # Timezone functions
+
+ def utcoffset(self):
+ """Return the timezone offset in minutes east of UTC (negative west of
+ UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(None)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ if self._tzinfo is None:
+ return None
+ name = self._tzinfo.tzname(None)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (in minutes
+ eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(None)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ def replace(self, hour=None, minute=None, second=None, microsecond=None,
+ tzinfo=True):
+ """Return a new time with new values for the specified fields."""
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ _check_time_fields(hour, minute, second, microsecond)
+ _check_tzinfo_arg(tzinfo)
+ return time(hour, minute, second, microsecond, tzinfo)
+
+ def __bool__(self):
+ if self.second or self.microsecond:
+ return True
+ offset = self.utcoffset() or timedelta(0)
+ return timedelta(hours=self.hour, minutes=self.minute) != offset
+
+ # Pickle support.
+
+ def _getstate(self):
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ basestate = bytes([self._hour, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ if len(string) != 6 or string[0] >= 24:
+ raise TypeError("an integer is required")
+ (self._hour, self._minute, self._second,
+ us1, us2, us3) = string
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
+ self._tzinfo = tzinfo
+ else:
+ raise TypeError("bad tzinfo state arg %r" % tzinfo)
+
+ def __reduce__(self):
+ return (time, self._getstate())
+
+_time_class = time # so functions w/ args named "time" can get at the class
+
+time.min = time(0, 0, 0)
+time.max = time(23, 59, 59, 999999)
+time.resolution = timedelta(microseconds=1)
+
+class datetime(date):
+ """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
+
+ The year, month and day arguments are required. tzinfo may be None, or an
+ instance of a tzinfo subclass. The remaining arguments may be ints.
+ """
+
+ __slots__ = date.__slots__ + (
+ '_hour', '_minute', '_second',
+ '_microsecond', '_tzinfo')
+ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
+ microsecond=0, tzinfo=None):
+ if isinstance(year, bytes) and len(year) == 10:
+ # Pickle support
+ self = date.__new__(cls, year[:4])
+ self.__setstate(year, month)
+ return self
+ _check_tzinfo_arg(tzinfo)
+ _check_time_fields(hour, minute, second, microsecond)
+ self = date.__new__(cls, year, month, day)
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ @classmethod
+ def fromtimestamp(cls, t, tz=None):
+ """Construct a datetime from a POSIX timestamp (like time.time()).
+
+ A timezone info object may be passed in as well.
+ """
+
+ _check_tzinfo_arg(tz)
+
+ converter = _time.localtime if tz is None else _time.gmtime
+
+ t, frac = divmod(t, 1.0)
+ us = int(frac * 1e6)
+
+ # If timestamp is less than one microsecond smaller than a
+ # full second, us can be rounded up to 1000000. In this case,
+ # roll over to seconds, otherwise, ValueError is raised
+ # by the constructor.
+ if us == 1000000:
+ t += 1
+ us = 0
+ y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
+ ss = min(ss, 59) # clamp out leap seconds if the platform has them
+ result = cls(y, m, d, hh, mm, ss, us, tz)
+ if tz is not None:
+ result = tz.fromutc(result)
+ return result
+
+ @classmethod
+ def utcfromtimestamp(cls, t):
+ "Construct a UTC datetime from a POSIX timestamp (like time.time())."
+ t, frac = divmod(t, 1.0)
+ us = int(frac * 1e6)
+
+ # If timestamp is less than one microsecond smaller than a
+ # full second, us can be rounded up to 1000000. In this case,
+ # roll over to seconds, otherwise, ValueError is raised
+ # by the constructor.
+ if us == 1000000:
+ t += 1
+ us = 0
+ y, m, d, hh, mm, ss, weekday, jday, dst = _time.gmtime(t)
+ ss = min(ss, 59) # clamp out leap seconds if the platform has them
+ return cls(y, m, d, hh, mm, ss, us)
+
+ # XXX This is supposed to do better than we *can* do by using time.time(),
+ # XXX if the platform supports a more accurate way. The C implementation
+ # XXX uses gettimeofday on platforms that have it, but that isn't
+ # XXX available from Python. So now() may return different results
+ # XXX across the implementations.
+ @classmethod
+ def now(cls, tz=None):
+ "Construct a datetime from time.time() and optional time zone info."
+ t = _time.time()
+ return cls.fromtimestamp(t, tz)
+
+ @classmethod
+ def utcnow(cls):
+ "Construct a UTC datetime from time.time()."
+ t = _time.time()
+ return cls.utcfromtimestamp(t)
+
+ @classmethod
+ def combine(cls, date, time):
+ "Construct a datetime from a given date and a given time."
+ if not isinstance(date, _date_class):
+ raise TypeError("date argument must be a date instance")
+ if not isinstance(time, _time_class):
+ raise TypeError("time argument must be a time instance")
+ return cls(date.year, date.month, date.day,
+ time.hour, time.minute, time.second, time.microsecond,
+ time.tzinfo)
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ dst = self.dst()
+ if dst is None:
+ dst = -1
+ elif dst:
+ dst = 1
+ else:
+ dst = 0
+ return _build_struct_time(self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ dst)
+
+ def timestamp(self):
+ "Return POSIX timestamp as float"
+ if self._tzinfo is None:
+ return _time.mktime((self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ -1, -1, -1)) + self.microsecond / 1e6
+ else:
+ return (self - _EPOCH).total_seconds()
+
+ def utctimetuple(self):
+ "Return UTC time tuple compatible with time.gmtime()."
+ offset = self.utcoffset()
+ if offset:
+ self -= offset
+ y, m, d = self.year, self.month, self.day
+ hh, mm, ss = self.hour, self.minute, self.second
+ return _build_struct_time(y, m, d, hh, mm, ss, 0)
+
+ def date(self):
+ "Return the date part."
+ return date(self._year, self._month, self._day)
+
+ def time(self):
+ "Return the time part, with tzinfo None."
+ return time(self.hour, self.minute, self.second, self.microsecond)
+
+ def timetz(self):
+ "Return the time part, with same tzinfo."
+ return time(self.hour, self.minute, self.second, self.microsecond,
+ self._tzinfo)
+
+ def replace(self, year=None, month=None, day=None, hour=None,
+ minute=None, second=None, microsecond=None, tzinfo=True):
+ """Return a new datetime with new values for the specified fields."""
+ if year is None:
+ year = self.year
+ if month is None:
+ month = self.month
+ if day is None:
+ day = self.day
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ _check_date_fields(year, month, day)
+ _check_time_fields(hour, minute, second, microsecond)
+ _check_tzinfo_arg(tzinfo)
+ return datetime(year, month, day, hour, minute, second,
+ microsecond, tzinfo)
+
+ def astimezone(self, tz=None):
+ if tz is None:
+ if self.tzinfo is None:
+ raise ValueError("astimezone() requires an aware datetime")
+ ts = (self - _EPOCH) // timedelta(seconds=1)
+ localtm = _time.localtime(ts)
+ local = datetime(*localtm[:6])
+ try:
+ # Extract TZ data if available
+ gmtoff = localtm.tm_gmtoff
+ zone = localtm.tm_zone
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied
+ # by tm_isdst. If the values match, use the zone name
+ # implied by tm_isdst.
+ delta = local - datetime(*_time.gmtime(ts)[:6])
+ dst = _time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(_time.altzone if dst else _time.timezone)
+ if delta == timedelta(seconds=gmtoff):
+ tz = timezone(delta, _time.tzname[dst])
+ else:
+ tz = timezone(delta)
+ else:
+ tz = timezone(timedelta(seconds=gmtoff), zone)
+
+ elif not isinstance(tz, tzinfo):
+ raise TypeError("tz argument must be an instance of tzinfo")
+
+ mytz = self.tzinfo
+ if mytz is None:
+ raise ValueError("astimezone() requires an aware datetime")
+
+ if tz is mytz:
+ return self
+
+ # Convert self to UTC, and attach the new time zone object.
+ myoffset = self.utcoffset()
+ if myoffset is None:
+ raise ValueError("astimezone() requires an aware datetime")
+ utc = (self - myoffset).replace(tzinfo=tz)
+
+ # Convert from UTC to tz's local time.
+ return tz.fromutc(utc)
+
+ # Ways to produce a string.
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d %02d:%02d:%02d %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day,
+ self._hour, self._minute, self._second,
+ self._year)
+
+ def isoformat(self, sep='T'):
+ """Return the time formatted according to ISO.
+
+ This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if
+ self.microsecond == 0.
+
+ If self.tzinfo is not None, the UTC offset is also attached, giving
+ 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'.
+
+ Optional argument sep specifies the separator between date and
+ time, default 'T'.
+ """
+ s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day,
+ sep) +
+ _format_time(self._hour, self._minute, self._second,
+ self._microsecond))
+ off = self.utcoffset()
+ if off is not None:
+ if off.days < 0:
+ sign = "-"
+ off = -off
+ else:
+ sign = "+"
+ hh, mm = divmod(off, timedelta(hours=1))
+ assert not mm % timedelta(minutes=1), "whole minute"
+ mm //= timedelta(minutes=1)
+ s += "%s%02d:%02d" % (sign, hh, mm)
+ return s
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ L = [self._year, self._month, self._day, # These are never zero
+ self._hour, self._minute, self._second, self._microsecond]
+ if L[-1] == 0:
+ del L[-1]
+ if L[-1] == 0:
+ del L[-1]
+ s = ", ".join(map(str, L))
+ s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s)
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ return s
+
+ def __str__(self):
+ "Convert to string, for str()."
+ return self.isoformat(sep=' ')
+
+ @classmethod
+ def strptime(cls, date_string, format):
+ 'string, format -> new datetime parsed from a string (like time.strptime()).'
+ import _strptime
+ return _strptime._strptime_datetime(cls, date_string, format)
+
+ def utcoffset(self):
+ """Return the timezone offset in minutes east of UTC (negative west of
+ UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(self)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ name = _call_tzinfo_method(self._tzinfo, "tzname", self)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (in minutes
+ eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(self)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ # Comparisons of datetime objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other, allow_mixed=True) == 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other, allow_mixed=True) != 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ return True
+
+ def __le__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) <= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) < 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) >= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) > 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, datetime)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._year, self._month, self._day,
+ self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._year, other._month, other._day,
+ other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware datetimes")
+ # XXX What follows could be done more efficiently...
+ diff = self - other # this will take offsets into account
+ if diff.days < 0:
+ return -1
+ return diff and 1 or 0
+
+ def __add__(self, other):
+ "Add a datetime and a timedelta."
+ if not isinstance(other, timedelta):
+ return NotImplemented
+ delta = timedelta(self.toordinal(),
+ hours=self._hour,
+ minutes=self._minute,
+ seconds=self._second,
+ microseconds=self._microsecond)
+ delta += other
+ hour, rem = divmod(delta.seconds, 3600)
+ minute, second = divmod(rem, 60)
+ if 0 < delta.days <= _MAXORDINAL:
+ return datetime.combine(date.fromordinal(delta.days),
+ time(hour, minute, second,
+ delta.microseconds,
+ tzinfo=self._tzinfo))
+ raise OverflowError("result out of range")
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ "Subtract two datetimes, or a datetime and a timedelta."
+ if not isinstance(other, datetime):
+ if isinstance(other, timedelta):
+ return self + -other
+ return NotImplemented
+
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ secs1 = self._second + self._minute * 60 + self._hour * 3600
+ secs2 = other._second + other._minute * 60 + other._hour * 3600
+ base = timedelta(days1 - days2,
+ secs1 - secs2,
+ self._microsecond - other._microsecond)
+ if self._tzinfo is other._tzinfo:
+ return base
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ if myoff == otoff:
+ return base
+ if myoff is None or otoff is None:
+ raise TypeError("cannot mix naive and timezone-aware time")
+ return base + otoff - myoff
+
+ def __hash__(self):
+ tzoff = self.utcoffset()
+ if tzoff is None:
+ return hash(self._getstate()[0])
+ days = _ymd2ord(self.year, self.month, self.day)
+ seconds = self.hour * 3600 + self.minute * 60 + self.second
+ return hash(timedelta(days, seconds, self.microsecond) - tzoff)
+
+ # Pickle support.
+
+ def _getstate(self):
+ yhi, ylo = divmod(self._year, 256)
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ basestate = bytes([yhi, ylo, self._month, self._day,
+ self._hour, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ (yhi, ylo, self._month, self._day, self._hour,
+ self._minute, self._second, us1, us2, us3) = string
+ self._year = yhi * 256 + ylo
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ if tzinfo is None or isinstance(tzinfo, _tzinfo_class):
+ self._tzinfo = tzinfo
+ else:
+ raise TypeError("bad tzinfo state arg %r" % tzinfo)
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+
+datetime.min = datetime(1, 1, 1)
+datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999)
+datetime.resolution = timedelta(microseconds=1)
+
+
+def _isoweek1monday(year):
+ # Helper to calculate the day number of the Monday starting week 1
+ # XXX This could be done more efficiently
+ THURSDAY = 3
+ firstday = _ymd2ord(year, 1, 1)
+ firstweekday = (firstday + 6) % 7 # See weekday() above
+ week1monday = firstday - firstweekday
+ if firstweekday > THURSDAY:
+ week1monday += 7
+ return week1monday
+
+class timezone(tzinfo):
+ __slots__ = '_offset', '_name'
+
+ # Sentinel value to disallow None
+ _Omitted = object()
+ def __new__(cls, offset, name=_Omitted):
+ if not isinstance(offset, timedelta):
+ raise TypeError("offset must be a timedelta")
+ if name is cls._Omitted:
+ if not offset:
+ return cls.utc
+ name = None
+ elif not isinstance(name, str):
+ ###
+ # For Python-Future:
+ if PY2 and isinstance(name, native_str):
+ name = name.decode()
+ else:
+ raise TypeError("name must be a string")
+ ###
+ if not cls._minoffset <= offset <= cls._maxoffset:
+ raise ValueError("offset must be a timedelta"
+ " strictly between -timedelta(hours=24) and"
+ " timedelta(hours=24).")
+ if (offset.microseconds != 0 or
+ offset.seconds % 60 != 0):
+ raise ValueError("offset must be a timedelta"
+ " representing a whole number of minutes")
+ return cls._create(offset, name)
+
+ @classmethod
+ def _create(cls, offset, name=None):
+ self = tzinfo.__new__(cls)
+ self._offset = offset
+ self._name = name
+ return self
+
+ def __getinitargs__(self):
+ """pickle support"""
+ if self._name is None:
+ return (self._offset,)
+ return (self._offset, self._name)
+
+ def __eq__(self, other):
+ if type(other) != timezone:
+ return False
+ return self._offset == other._offset
+
+ def __hash__(self):
+ return hash(self._offset)
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> tz = timezone.utc
+ >>> repr(tz)
+ 'datetime.timezone.utc'
+ >>> tz = timezone(timedelta(hours=-5), 'EST')
+ >>> repr(tz)
+ "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')"
+ """
+ if self is self.utc:
+ return 'datetime.timezone.utc'
+ if self._name is None:
+ return "%s(%r)" % ('datetime.' + self.__class__.__name__,
+ self._offset)
+ return "%s(%r, %r)" % ('datetime.' + self.__class__.__name__,
+ self._offset, self._name)
+
+ def __str__(self):
+ return self.tzname(None)
+
+ def utcoffset(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return self._offset
+ raise TypeError("utcoffset() argument must be a datetime instance"
+ " or None")
+
+ def tzname(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ if self._name is None:
+ return self._name_from_offset(self._offset)
+ return self._name
+ raise TypeError("tzname() argument must be a datetime instance"
+ " or None")
+
+ def dst(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return None
+ raise TypeError("dst() argument must be a datetime instance"
+ " or None")
+
+ def fromutc(self, dt):
+ if isinstance(dt, datetime):
+ if dt.tzinfo is not self:
+ raise ValueError("fromutc: dt.tzinfo "
+ "is not self")
+ return dt + self._offset
+ raise TypeError("fromutc() argument must be a datetime instance"
+ " or None")
+
+ _maxoffset = timedelta(hours=23, minutes=59)
+ _minoffset = -_maxoffset
+
+ @staticmethod
+ def _name_from_offset(delta):
+ if delta < timedelta(0):
+ sign = '-'
+ delta = -delta
+ else:
+ sign = '+'
+ hours, rest = divmod(delta, timedelta(hours=1))
+ minutes = rest // timedelta(minutes=1)
+ return 'UTC{}{:02d}:{:02d}'.format(sign, hours, minutes)
+
+timezone.utc = timezone._create(timedelta(0))
+timezone.min = timezone._create(timezone._minoffset)
+timezone.max = timezone._create(timezone._maxoffset)
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+"""
+Some time zone algebra. For a datetime x, let
+ x.n = x stripped of its timezone -- its naive time.
+ x.o = x.utcoffset(), and assuming that doesn't raise an exception or
+ return None
+ x.d = x.dst(), and assuming that doesn't raise an exception or
+ return None
+ x.s = x's standard offset, x.o - x.d
+
+Now some derived rules, where k is a duration (timedelta).
+
+1. x.o = x.s + x.d
+ This follows from the definition of x.s.
+
+2. If x and y have the same tzinfo member, x.s = y.s.
+ This is actually a requirement, an assumption we need to make about
+ sane tzinfo classes.
+
+3. The naive UTC time corresponding to x is x.n - x.o.
+ This is again a requirement for a sane tzinfo class.
+
+4. (x+k).s = x.s
+ This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
+
+5. (x+k).n = x.n + k
+ Again follows from how arithmetic is defined.
+
+Now we can explain tz.fromutc(x). Let's assume it's an interesting case
+(meaning that the various tzinfo methods exist, and don't blow up or return
+None when called).
+
+The function wants to return a datetime y with timezone tz, equivalent to x.
+x is already in UTC.
+
+By #3, we want
+
+ y.n - y.o = x.n [1]
+
+The algorithm starts by attaching tz to x.n, and calling that y. So
+x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
+becomes true; in effect, we want to solve [2] for k:
+
+ (y+k).n - (y+k).o = x.n [2]
+
+By #1, this is the same as
+
+ (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
+
+By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
+Substituting that into [3],
+
+ x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
+ k - (y+k).s - (y+k).d = 0; rearranging,
+ k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
+ k = y.s - (y+k).d
+
+On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
+approximate k by ignoring the (y+k).d term at first. Note that k can't be
+very large, since all offset-returning methods return a duration of magnitude
+less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
+be 0, so ignoring it has no consequence then.
+
+In any case, the new value is
+
+ z = y + y.s [4]
+
+It's helpful to step back at look at [4] from a higher level: it's simply
+mapping from UTC to tz's standard time.
+
+At this point, if
+
+ z.n - z.o = x.n [5]
+
+we have an equivalent time, and are almost done. The insecurity here is
+at the start of daylight time. Picture US Eastern for concreteness. The wall
+time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
+sense then. The docs ask that an Eastern tzinfo class consider such a time to
+be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
+on the day DST starts. We want to return the 1:MM EST spelling because that's
+the only spelling that makes sense on the local wall clock.
+
+In fact, if [5] holds at this point, we do have the standard-time spelling,
+but that takes a bit of proof. We first prove a stronger result. What's the
+difference between the LHS and RHS of [5]? Let
+
+ diff = x.n - (z.n - z.o) [6]
+
+Now
+ z.n = by [4]
+ (y + y.s).n = by #5
+ y.n + y.s = since y.n = x.n
+ x.n + y.s = since z and y are have the same tzinfo member,
+ y.s = z.s by #2
+ x.n + z.s
+
+Plugging that back into [6] gives
+
+ diff =
+ x.n - ((x.n + z.s) - z.o) = expanding
+ x.n - x.n - z.s + z.o = cancelling
+ - z.s + z.o = by #2
+ z.d
+
+So diff = z.d.
+
+If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
+spelling we wanted in the endcase described above. We're done. Contrarily,
+if z.d = 0, then we have a UTC equivalent, and are also done.
+
+If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
+add to z (in effect, z is in tz's standard time, and we need to shift the
+local clock into tz's daylight time).
+
+Let
+
+ z' = z + z.d = z + diff [7]
+
+and we can again ask whether
+
+ z'.n - z'.o = x.n [8]
+
+If so, we're done. If not, the tzinfo class is insane, according to the
+assumptions we've made. This also requires a bit of proof. As before, let's
+compute the difference between the LHS and RHS of [8] (and skipping some of
+the justifications for the kinds of substitutions we've done several times
+already):
+
+ diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
+ x.n - (z.n + diff - z'.o) = replacing diff via [6]
+ x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
+ x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
+ - z.n + z.n - z.o + z'.o = cancel z.n
+ - z.o + z'.o = #1 twice
+ -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
+ z'.d - z.d
+
+So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
+we've found the UTC-equivalent so are done. In fact, we stop with [7] and
+return z', not bothering to compute z'.d.
+
+How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
+a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
+would have to change the result dst() returns: we start in DST, and moving
+a little further into it takes us out of DST.
+
+There isn't a sane case where this can happen. The closest it gets is at
+the end of DST, where there's an hour in UTC with no spelling in a hybrid
+tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
+that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
+UTC) because the docs insist on that, but 0:MM is taken as being in daylight
+time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
+clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
+standard time. Since that's what the local clock *does*, we want to map both
+UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
+in local time, but so it goes -- it's the way the local clock works.
+
+When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
+so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
+z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
+(correctly) concludes that z' is not UTC-equivalent to x.
+
+Because we know z.d said z was in daylight time (else [5] would have held and
+we would have stopped then), and we know z.d != z'.d (else [8] would have held
+and we have stopped then), and there are only 2 possible values dst() can
+return in Eastern, it follows that z'.d must be 0 (which it is in the example,
+but the reasoning doesn't depend on the example -- it depends on there being
+two possible dst() outcomes, one zero and the other non-zero). Therefore
+z' must be in standard time, and is the spelling we want in this case.
+
+Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
+concerned (because it takes z' as being in standard time rather than the
+daylight time we intend here), but returning it gives the real-life "local
+clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
+tz.
+
+When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
+the 1:MM standard time spelling we want.
+
+So how can this break? One of the assumptions must be violated. Two
+possibilities:
+
+1) [2] effectively says that y.s is invariant across all y belong to a given
+ time zone. This isn't true if, for political reasons or continental drift,
+ a region decides to change its base offset from UTC.
+
+2) There may be versions of "double daylight" time where the tail end of
+ the analysis gives up a step too early. I haven't thought about that
+ enough to say.
+
+In any case, it's clear that the default fromutc() is strong enough to handle
+"almost all" time zones: so long as the standard offset is invariant, it
+doesn't matter if daylight time transition points change from year to year, or
+if daylight time is skipped in some years; it doesn't matter how large or
+small dst() may get within its bounds; and it doesn't even matter if some
+perverse time zone returns a negative dst()). So a breaking case must be
+pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
+"""
+try:
+ from _datetime import *
+except ImportError:
+ pass
+else:
+ # Clean up unused names
+ del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH,
+ _DI100Y, _DI400Y, _DI4Y, _MAXORDINAL, _MONTHNAMES,
+ _build_struct_time, _call_tzinfo_method, _check_date_fields,
+ _check_time_fields, _check_tzinfo_arg, _check_tzname,
+ _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month,
+ _days_before_year, _days_in_month, _format_time, _is_leap,
+ _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class,
+ _wrap_strftime, _ymd2ord)
+ # XXX Since import * above excludes names that start with _,
+ # docstring does not get overwritten. In the future, it may be
+ # appropriate to maintain a single module level docstring and
+ # remove the following line.
+ from _datetime import __doc__
diff --git a/contrib/python/future/future/backports/email/__init__.py b/contrib/python/future/future/backports/email/__init__.py
index d7ec3c05ec..f9523bc10b 100644
--- a/contrib/python/future/future/backports/email/__init__.py
+++ b/contrib/python/future/future/backports/email/__init__.py
@@ -1,78 +1,78 @@
-# Copyright (C) 2001-2007 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""
-Backport of the Python 3.3 email package for Python-Future.
-
-A package for parsing, handling, and generating email messages.
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-# Install the surrogate escape handler here because this is used by many
-# modules in the email package.
-from future.utils import surrogateescape
-surrogateescape.register_surrogateescape()
-# (Should this be done globally by ``future``?)
-
-
-__version__ = '5.1.0'
-
-__all__ = [
- 'base64mime',
- 'charset',
- 'encoders',
- 'errors',
- 'feedparser',
- 'generator',
- 'header',
- 'iterators',
- 'message',
- 'message_from_file',
- 'message_from_binary_file',
- 'message_from_string',
- 'message_from_bytes',
- 'mime',
- 'parser',
- 'quoprimime',
- 'utils',
- ]
-
-
-
-# Some convenience routines. Don't import Parser and Message as side-effects
-# of importing email since those cascadingly import most of the rest of the
-# email package.
-def message_from_string(s, *args, **kws):
- """Parse a string into a Message object model.
-
- Optional _class and strict are passed to the Parser constructor.
- """
- from future.backports.email.parser import Parser
- return Parser(*args, **kws).parsestr(s)
-
-def message_from_bytes(s, *args, **kws):
- """Parse a bytes string into a Message object model.
-
- Optional _class and strict are passed to the Parser constructor.
- """
- from future.backports.email.parser import BytesParser
- return BytesParser(*args, **kws).parsebytes(s)
-
-def message_from_file(fp, *args, **kws):
- """Read a file and parse its contents into a Message object model.
-
- Optional _class and strict are passed to the Parser constructor.
- """
- from future.backports.email.parser import Parser
- return Parser(*args, **kws).parse(fp)
-
-def message_from_binary_file(fp, *args, **kws):
- """Read a binary file and parse its contents into a Message object model.
-
- Optional _class and strict are passed to the Parser constructor.
- """
- from future.backports.email.parser import BytesParser
- return BytesParser(*args, **kws).parse(fp)
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""
+Backport of the Python 3.3 email package for Python-Future.
+
+A package for parsing, handling, and generating email messages.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+# Install the surrogate escape handler here because this is used by many
+# modules in the email package.
+from future.utils import surrogateescape
+surrogateescape.register_surrogateescape()
+# (Should this be done globally by ``future``?)
+
+
+__version__ = '5.1.0'
+
+__all__ = [
+ 'base64mime',
+ 'charset',
+ 'encoders',
+ 'errors',
+ 'feedparser',
+ 'generator',
+ 'header',
+ 'iterators',
+ 'message',
+ 'message_from_file',
+ 'message_from_binary_file',
+ 'message_from_string',
+ 'message_from_bytes',
+ 'mime',
+ 'parser',
+ 'quoprimime',
+ 'utils',
+ ]
+
+
+
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, *args, **kws):
+ """Parse a string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import Parser
+ return Parser(*args, **kws).parsestr(s)
+
+def message_from_bytes(s, *args, **kws):
+ """Parse a bytes string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import BytesParser
+ return BytesParser(*args, **kws).parsebytes(s)
+
+def message_from_file(fp, *args, **kws):
+ """Read a file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import Parser
+ return Parser(*args, **kws).parse(fp)
+
+def message_from_binary_file(fp, *args, **kws):
+ """Read a binary file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from future.backports.email.parser import BytesParser
+ return BytesParser(*args, **kws).parse(fp)
diff --git a/contrib/python/future/future/backports/email/_encoded_words.py b/contrib/python/future/future/backports/email/_encoded_words.py
index d375aa9d26..7c4a529146 100644
--- a/contrib/python/future/future/backports/email/_encoded_words.py
+++ b/contrib/python/future/future/backports/email/_encoded_words.py
@@ -1,232 +1,232 @@
-""" Routines for manipulating RFC2047 encoded words.
-
-This is currently a package-private API, but will be considered for promotion
-to a public API if there is demand.
-
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import bytes
-from future.builtins import chr
-from future.builtins import int
-from future.builtins import str
-
-# An ecoded word looks like this:
-#
-# =?charset[*lang]?cte?encoded_string?=
-#
-# for more information about charset see the charset module. Here it is one
-# of the preferred MIME charset names (hopefully; you never know when parsing).
-# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
-# theory other letters could be used for other encodings, but in practice this
-# (almost?) never happens. There could be a public API for adding entries
-# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
-# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
-# as indicated by the brackets (they are not part of the syntax) but is almost
-# never encountered in practice.
-#
-# The general interface for a CTE decoder is that it takes the encoded_string
-# as its argument, and returns a tuple (cte_decoded_string, defects). The
-# cte_decoded_string is the original binary that was encoded using the
-# specified cte. 'defects' is a list of MessageDefect instances indicating any
-# problems encountered during conversion. 'charset' and 'lang' are the
-# corresponding strings extracted from the EW, case preserved.
-#
-# The general interface for a CTE encoder is that it takes a binary sequence
-# as input and returns the cte_encoded_string, which is an ascii-only string.
-#
-# Each decoder must also supply a length function that takes the binary
-# sequence as its argument and returns the length of the resulting encoded
-# string.
-#
-# The main API functions for the module are decode, which calls the decoder
-# referenced by the cte specifier, and encode, which adds the appropriate
-# RFC 2047 "chrome" to the encoded string, and can optionally automatically
-# select the shortest possible encoding. See their docstrings below for
-# details.
-
-import re
-import base64
-import binascii
-import functools
-from string import ascii_letters, digits
-from future.backports.email import errors
-
-__all__ = ['decode_q',
- 'encode_q',
- 'decode_b',
- 'encode_b',
- 'len_q',
- 'len_b',
- 'decode',
- 'encode',
- ]
-
-#
-# Quoted Printable
-#
-
-# regex based decoder.
-_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
- lambda m: bytes([int(m.group(1), 16)]))
-
-def decode_q(encoded):
- encoded = bytes(encoded.replace(b'_', b' '))
- return _q_byte_subber(encoded), []
-
-
-# dict mapping bytes to their encoded form
-class _QByteMap(dict):
-
- safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'))
-
- def __missing__(self, key):
- if key in self.safe:
- self[key] = chr(key)
- else:
- self[key] = "={:02X}".format(key)
- return self[key]
-
-_q_byte_map = _QByteMap()
-
-# In headers spaces are mapped to '_'.
-_q_byte_map[ord(' ')] = '_'
-
-def encode_q(bstring):
- return str(''.join(_q_byte_map[x] for x in bytes(bstring)))
-
-def len_q(bstring):
- return sum(len(_q_byte_map[x]) for x in bytes(bstring))
-
-
-#
-# Base64
-#
-
-def decode_b(encoded):
- defects = []
- pad_err = len(encoded) % 4
- if pad_err:
- defects.append(errors.InvalidBase64PaddingDefect())
- padded_encoded = encoded + b'==='[:4-pad_err]
- else:
- padded_encoded = encoded
- try:
- # The validate kwarg to b64decode is not supported in Py2.x
- if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded):
- raise binascii.Error('Non-base64 digit found')
- return base64.b64decode(padded_encoded), defects
- except binascii.Error:
- # Since we had correct padding, this must an invalid char error.
- defects = [errors.InvalidBase64CharactersDefect()]
- # The non-alphabet characters are ignored as far as padding
- # goes, but we don't know how many there are. So we'll just
- # try various padding lengths until something works.
- for i in 0, 1, 2, 3:
- try:
- return base64.b64decode(encoded+b'='*i), defects
- except (binascii.Error, TypeError): # Py2 raises a TypeError
- if i==0:
- defects.append(errors.InvalidBase64PaddingDefect())
- else:
- # This should never happen.
- raise AssertionError("unexpected binascii.Error")
-
-def encode_b(bstring):
- return base64.b64encode(bstring).decode('ascii')
-
-def len_b(bstring):
- groups_of_3, leftover = divmod(len(bstring), 3)
- # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
- return groups_of_3 * 4 + (4 if leftover else 0)
-
-
-_cte_decoders = {
- 'q': decode_q,
- 'b': decode_b,
- }
-
-def decode(ew):
- """Decode encoded word and return (string, charset, lang, defects) tuple.
-
- An RFC 2047/2243 encoded word has the form:
-
- =?charset*lang?cte?encoded_string?=
-
- where '*lang' may be omitted but the other parts may not be.
-
- This function expects exactly such a string (that is, it does not check the
- syntax and may raise errors if the string is not well formed), and returns
- the encoded_string decoded first from its Content Transfer Encoding and
- then from the resulting bytes into unicode using the specified charset. If
- the cte-decoded string does not successfully decode using the specified
- character set, a defect is added to the defects list and the unknown octets
- are replaced by the unicode 'unknown' character \uFDFF.
-
- The specified charset and language are returned. The default for language,
- which is rarely if ever encountered, is the empty string.
-
- """
- _, charset, cte, cte_string, _ = str(ew).split('?')
- charset, _, lang = charset.partition('*')
- cte = cte.lower()
- # Recover the original bytes and do CTE decoding.
- bstring = cte_string.encode('ascii', 'surrogateescape')
- bstring, defects = _cte_decoders[cte](bstring)
- # Turn the CTE decoded bytes into unicode.
- try:
- string = bstring.decode(charset)
- except UnicodeError:
- defects.append(errors.UndecodableBytesDefect("Encoded word "
- "contains bytes not decodable using {} charset".format(charset)))
- string = bstring.decode(charset, 'surrogateescape')
- except LookupError:
- string = bstring.decode('ascii', 'surrogateescape')
- if charset.lower() != 'unknown-8bit':
- defects.append(errors.CharsetError("Unknown charset {} "
- "in encoded word; decoded as unknown bytes".format(charset)))
- return string, charset, lang, defects
-
-
-_cte_encoders = {
- 'q': encode_q,
- 'b': encode_b,
- }
-
-_cte_encode_length = {
- 'q': len_q,
- 'b': len_b,
- }
-
-def encode(string, charset='utf-8', encoding=None, lang=''):
- """Encode string using the CTE encoding that produces the shorter result.
-
- Produces an RFC 2047/2243 encoded word of the form:
-
- =?charset*lang?cte?encoded_string?=
-
- where '*lang' is omitted unless the 'lang' parameter is given a value.
- Optional argument charset (defaults to utf-8) specifies the charset to use
- to encode the string to binary before CTE encoding it. Optional argument
- 'encoding' is the cte specifier for the encoding that should be used ('q'
- or 'b'); if it is None (the default) the encoding which produces the
- shortest encoded sequence is used, except that 'q' is preferred if it is up
- to five characters longer. Optional argument 'lang' (default '') gives the
- RFC 2243 language string to specify in the encoded word.
-
- """
- string = str(string)
- if charset == 'unknown-8bit':
- bstring = string.encode('ascii', 'surrogateescape')
- else:
- bstring = string.encode(charset)
- if encoding is None:
- qlen = _cte_encode_length['q'](bstring)
- blen = _cte_encode_length['b'](bstring)
- # Bias toward q. 5 is arbitrary.
- encoding = 'q' if qlen - blen < 5 else 'b'
- encoded = _cte_encoders[encoding](bstring)
- if lang:
- lang = '*' + lang
- return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded)
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes
+from future.builtins import chr
+from future.builtins import int
+from future.builtins import str
+
+# An ecoded word looks like this:
+#
+# =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module. Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens. There could be a public API for adding entries
+# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
+# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects). The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte. 'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion. 'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding. See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+from future.backports.email import errors
+
+__all__ = ['decode_q',
+ 'encode_q',
+ 'decode_b',
+ 'encode_b',
+ 'len_q',
+ 'len_b',
+ 'decode',
+ 'encode',
+ ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+ lambda m: bytes([int(m.group(1), 16)]))
+
+def decode_q(encoded):
+ encoded = bytes(encoded.replace(b'_', b' '))
+ return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+ safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'))
+
+ def __missing__(self, key):
+ if key in self.safe:
+ self[key] = chr(key)
+ else:
+ self[key] = "={:02X}".format(key)
+ return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+ return str(''.join(_q_byte_map[x] for x in bytes(bstring)))
+
+def len_q(bstring):
+ return sum(len(_q_byte_map[x]) for x in bytes(bstring))
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+ defects = []
+ pad_err = len(encoded) % 4
+ if pad_err:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ padded_encoded = encoded + b'==='[:4-pad_err]
+ else:
+ padded_encoded = encoded
+ try:
+ # The validate kwarg to b64decode is not supported in Py2.x
+ if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded):
+ raise binascii.Error('Non-base64 digit found')
+ return base64.b64decode(padded_encoded), defects
+ except binascii.Error:
+ # Since we had correct padding, this must an invalid char error.
+ defects = [errors.InvalidBase64CharactersDefect()]
+ # The non-alphabet characters are ignored as far as padding
+ # goes, but we don't know how many there are. So we'll just
+ # try various padding lengths until something works.
+ for i in 0, 1, 2, 3:
+ try:
+ return base64.b64decode(encoded+b'='*i), defects
+ except (binascii.Error, TypeError): # Py2 raises a TypeError
+ if i==0:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ else:
+ # This should never happen.
+ raise AssertionError("unexpected binascii.Error")
+
+def encode_b(bstring):
+ return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+ groups_of_3, leftover = divmod(len(bstring), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+ 'q': decode_q,
+ 'b': decode_b,
+ }
+
+def decode(ew):
+ """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+ An RFC 2047/2243 encoded word has the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' may be omitted but the other parts may not be.
+
+ This function expects exactly such a string (that is, it does not check the
+ syntax and may raise errors if the string is not well formed), and returns
+ the encoded_string decoded first from its Content Transfer Encoding and
+ then from the resulting bytes into unicode using the specified charset. If
+ the cte-decoded string does not successfully decode using the specified
+ character set, a defect is added to the defects list and the unknown octets
+ are replaced by the unicode 'unknown' character \uFDFF.
+
+ The specified charset and language are returned. The default for language,
+ which is rarely if ever encountered, is the empty string.
+
+ """
+ _, charset, cte, cte_string, _ = str(ew).split('?')
+ charset, _, lang = charset.partition('*')
+ cte = cte.lower()
+ # Recover the original bytes and do CTE decoding.
+ bstring = cte_string.encode('ascii', 'surrogateescape')
+ bstring, defects = _cte_decoders[cte](bstring)
+ # Turn the CTE decoded bytes into unicode.
+ try:
+ string = bstring.decode(charset)
+ except UnicodeError:
+ defects.append(errors.UndecodableBytesDefect("Encoded word "
+ "contains bytes not decodable using {} charset".format(charset)))
+ string = bstring.decode(charset, 'surrogateescape')
+ except LookupError:
+ string = bstring.decode('ascii', 'surrogateescape')
+ if charset.lower() != 'unknown-8bit':
+ defects.append(errors.CharsetError("Unknown charset {} "
+ "in encoded word; decoded as unknown bytes".format(charset)))
+ return string, charset, lang, defects
+
+
+_cte_encoders = {
+ 'q': encode_q,
+ 'b': encode_b,
+ }
+
+_cte_encode_length = {
+ 'q': len_q,
+ 'b': len_b,
+ }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+ """Encode string using the CTE encoding that produces the shorter result.
+
+ Produces an RFC 2047/2243 encoded word of the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' is omitted unless the 'lang' parameter is given a value.
+ Optional argument charset (defaults to utf-8) specifies the charset to use
+ to encode the string to binary before CTE encoding it. Optional argument
+ 'encoding' is the cte specifier for the encoding that should be used ('q'
+ or 'b'); if it is None (the default) the encoding which produces the
+ shortest encoded sequence is used, except that 'q' is preferred if it is up
+ to five characters longer. Optional argument 'lang' (default '') gives the
+ RFC 2243 language string to specify in the encoded word.
+
+ """
+ string = str(string)
+ if charset == 'unknown-8bit':
+ bstring = string.encode('ascii', 'surrogateescape')
+ else:
+ bstring = string.encode(charset)
+ if encoding is None:
+ qlen = _cte_encode_length['q'](bstring)
+ blen = _cte_encode_length['b'](bstring)
+ # Bias toward q. 5 is arbitrary.
+ encoding = 'q' if qlen - blen < 5 else 'b'
+ encoded = _cte_encoders[encoding](bstring)
+ if lang:
+ lang = '*' + lang
+ return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded)
diff --git a/contrib/python/future/future/backports/email/_header_value_parser.py b/contrib/python/future/future/backports/email/_header_value_parser.py
index e57d05c34a..43957edc12 100644
--- a/contrib/python/future/future/backports/email/_header_value_parser.py
+++ b/contrib/python/future/future/backports/email/_header_value_parser.py
@@ -1,2965 +1,2965 @@
-"""Header value parser implementing various email-related RFC parsing rules.
-
-The parsing methods defined in this module implement various email related
-parsing rules. Principal among them is RFC 5322, which is the followon
-to RFC 2822 and primarily a clarification of the former. It also implements
-RFC 2047 encoded word decoding.
-
-RFC 5322 goes to considerable trouble to maintain backward compatibility with
-RFC 822 in the parse phase, while cleaning up the structure on the generation
-phase. This parser supports correct RFC 5322 generation by tagging white space
-as folding white space only when folding is allowed in the non-obsolete rule
-sets. Actually, the parser is even more generous when accepting input than RFC
-5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
-Where possible deviations from the standard are annotated on the 'defects'
-attribute of tokens that deviate.
-
-The general structure of the parser follows RFC 5322, and uses its terminology
-where there is a direct correspondence. Where the implementation requires a
-somewhat different structure than that used by the formal grammar, new terms
-that mimic the closest existing terms are used. Thus, it really helps to have
-a copy of RFC 5322 handy when studying this code.
-
-Input to the parser is a string that has already been unfolded according to
-RFC 5322 rules. According to the RFC this unfolding is the very first step, and
-this parser leaves the unfolding step to a higher level message parser, which
-will have already detected the line breaks that need unfolding while
-determining the beginning and end of each header.
-
-The output of the parser is a TokenList object, which is a list subclass. A
-TokenList is a recursive data structure. The terminal nodes of the structure
-are Terminal objects, which are subclasses of str. These do not correspond
-directly to terminal objects in the formal grammar, but are instead more
-practical higher level combinations of true terminals.
-
-All TokenList and Terminal objects have a 'value' attribute, which produces the
-semantically meaningful value of that part of the parse subtree. The value of
-all whitespace tokens (no matter how many sub-tokens they may contain) is a
-single space, as per the RFC rules. This includes 'CFWS', which is herein
-included in the general class of whitespace tokens. There is one exception to
-the rule that whitespace tokens are collapsed into single spaces in values: in
-the value of a 'bare-quoted-string' (a quoted-string with no leading or
-trailing whitespace), any whitespace that appeared between the quotation marks
-is preserved in the returned value. Note that in all Terminal strings quoted
-pairs are turned into their unquoted values.
-
-All TokenList and Terminal objects also have a string value, which attempts to
-be a "canonical" representation of the RFC-compliant form of the substring that
-produced the parsed subtree, including minimal use of quoted pair quoting.
-Whitespace runs are not collapsed.
-
-Comment tokens also have a 'content' attribute providing the string found
-between the parens (including any nested comments) with whitespace preserved.
-
-All TokenList and Terminal objects have a 'defects' attribute which is a
-possibly empty list all of the defects found while creating the token. Defects
-may appear on any token in the tree, and a composite list of all defects in the
-subtree is available through the 'all_defects' attribute of any node. (For
-Terminal notes x.defects == x.all_defects.)
-
-Each object in a parse tree is called a 'token', and each has a 'token_type'
-attribute that gives the name from the RFC 5322 grammar that it represents.
-Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
-may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters.
-It is returned in place of lists of (ctext/quoted-pair) and
-(qtext/quoted-pair).
-
-XXX: provide complete list of token types.
-"""
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import int, range, str, super, list
-
-import re
-from collections import namedtuple, OrderedDict
-
-from future.backports.urllib.parse import (unquote, unquote_to_bytes)
-from future.backports.email import _encoded_words as _ew
-from future.backports.email import errors
-from future.backports.email import utils
-
-#
-# Useful constants and functions
-#
-
-WSP = set(' \t')
-CFWS_LEADER = WSP | set('(')
-SPECIALS = set(r'()<>@,:;.\"[]')
-ATOM_ENDS = SPECIALS | WSP
-DOT_ATOM_ENDS = ATOM_ENDS - set('.')
-# '.', '"', and '(' do not end phrases in order to support obs-phrase
-PHRASE_ENDS = SPECIALS - set('."(')
-TSPECIALS = (SPECIALS | set('/?=')) - set('.')
-TOKEN_ENDS = TSPECIALS | WSP
-ASPECIALS = TSPECIALS | set("*'%")
-ATTRIBUTE_ENDS = ASPECIALS | WSP
-EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
-
-def quote_string(value):
- return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
-
-#
-# Accumulator for header folding
-#
-
-class _Folded(object):
-
- def __init__(self, maxlen, policy):
- self.maxlen = maxlen
- self.policy = policy
- self.lastlen = 0
- self.stickyspace = None
- self.firstline = True
- self.done = []
- self.current = list() # uses l.clear()
-
- def newline(self):
- self.done.extend(self.current)
- self.done.append(self.policy.linesep)
- self.current.clear()
- self.lastlen = 0
-
- def finalize(self):
- if self.current:
- self.newline()
-
- def __str__(self):
- return ''.join(self.done)
-
- def append(self, stoken):
- self.current.append(stoken)
-
- def append_if_fits(self, token, stoken=None):
- if stoken is None:
- stoken = str(token)
- l = len(stoken)
- if self.stickyspace is not None:
- stickyspace_len = len(self.stickyspace)
- if self.lastlen + stickyspace_len + l <= self.maxlen:
- self.current.append(self.stickyspace)
- self.lastlen += stickyspace_len
- self.current.append(stoken)
- self.lastlen += l
- self.stickyspace = None
- self.firstline = False
- return True
- if token.has_fws:
- ws = token.pop_leading_fws()
- if ws is not None:
- self.stickyspace += str(ws)
- stickyspace_len += len(ws)
- token._fold(self)
- return True
- if stickyspace_len and l + 1 <= self.maxlen:
- margin = self.maxlen - l
- if 0 < margin < stickyspace_len:
- trim = stickyspace_len - margin
- self.current.append(self.stickyspace[:trim])
- self.stickyspace = self.stickyspace[trim:]
- stickyspace_len = trim
- self.newline()
- self.current.append(self.stickyspace)
- self.current.append(stoken)
- self.lastlen = l + stickyspace_len
- self.stickyspace = None
- self.firstline = False
- return True
- if not self.firstline:
- self.newline()
- self.current.append(self.stickyspace)
- self.current.append(stoken)
- self.stickyspace = None
- self.firstline = False
- return True
- if self.lastlen + l <= self.maxlen:
- self.current.append(stoken)
- self.lastlen += l
- return True
- if l < self.maxlen:
- self.newline()
- self.current.append(stoken)
- self.lastlen = l
- return True
- return False
-
-#
-# TokenList and its subclasses
-#
-
-class TokenList(list):
-
- token_type = None
-
- def __init__(self, *args, **kw):
- super(TokenList, self).__init__(*args, **kw)
- self.defects = []
-
- def __str__(self):
- return ''.join(str(x) for x in self)
-
- def __repr__(self):
- return '{}({})'.format(self.__class__.__name__,
- super(TokenList, self).__repr__())
-
- @property
- def value(self):
- return ''.join(x.value for x in self if x.value)
-
- @property
- def all_defects(self):
- return sum((x.all_defects for x in self), self.defects)
-
- #
- # Folding API
- #
- # parts():
- #
- # return a list of objects that constitute the "higher level syntactic
- # objects" specified by the RFC as the best places to fold a header line.
- # The returned objects must include leading folding white space, even if
- # this means mutating the underlying parse tree of the object. Each object
- # is only responsible for returning *its* parts, and should not drill down
- # to any lower level except as required to meet the leading folding white
- # space constraint.
- #
- # _fold(folded):
- #
- # folded: the result accumulator. This is an instance of _Folded.
- # (XXX: I haven't finished factoring this out yet, the folding code
- # pretty much uses this as a state object.) When the folded.current
- # contains as much text as will fit, the _fold method should call
- # folded.newline.
- # folded.lastlen: the current length of the test stored in folded.current.
- # folded.maxlen: The maximum number of characters that may appear on a
- # folded line. Differs from the policy setting in that "no limit" is
- # represented by +inf, which means it can be used in the trivially
- # logical fashion in comparisons.
- #
- # Currently no subclasses implement parts, and I think this will remain
- # true. A subclass only needs to implement _fold when the generic version
- # isn't sufficient. _fold will need to be implemented primarily when it is
- # possible for encoded words to appear in the specialized token-list, since
- # there is no generic algorithm that can know where exactly the encoded
- # words are allowed. A _fold implementation is responsible for filling
- # lines in the same general way that the top level _fold does. It may, and
- # should, call the _fold method of sub-objects in a similar fashion to that
- # of the top level _fold.
- #
- # XXX: I'm hoping it will be possible to factor the existing code further
- # to reduce redundancy and make the logic clearer.
-
- @property
- def parts(self):
- klass = self.__class__
- this = list()
- for token in self:
- if token.startswith_fws():
- if this:
- yield this[0] if len(this)==1 else klass(this)
- this.clear()
- end_ws = token.pop_trailing_ws()
- this.append(token)
- if end_ws:
- yield klass(this)
- this = [end_ws]
- if this:
- yield this[0] if len(this)==1 else klass(this)
-
- def startswith_fws(self):
- return self[0].startswith_fws()
-
- def pop_leading_fws(self):
- if self[0].token_type == 'fws':
- return self.pop(0)
- return self[0].pop_leading_fws()
-
- def pop_trailing_ws(self):
- if self[-1].token_type == 'cfws':
- return self.pop(-1)
- return self[-1].pop_trailing_ws()
-
- @property
- def has_fws(self):
- for part in self:
- if part.has_fws:
- return True
- return False
-
- def has_leading_comment(self):
- return self[0].has_leading_comment()
-
- @property
- def comments(self):
- comments = []
- for token in self:
- comments.extend(token.comments)
- return comments
-
- def fold(self, **_3to2kwargs):
- # max_line_length 0/None means no limit, ie: infinitely long.
- policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
- maxlen = policy.max_line_length or float("+inf")
- folded = _Folded(maxlen, policy)
- self._fold(folded)
- folded.finalize()
- return str(folded)
-
- def as_encoded_word(self, charset):
- # This works only for things returned by 'parts', which include
- # the leading fws, if any, that should be used.
- res = []
- ws = self.pop_leading_fws()
- if ws:
- res.append(ws)
- trailer = self.pop(-1) if self[-1].token_type=='fws' else ''
- res.append(_ew.encode(str(self), charset))
- res.append(trailer)
- return ''.join(res)
-
- def cte_encode(self, charset, policy):
- res = []
- for part in self:
- res.append(part.cte_encode(charset, policy))
- return ''.join(res)
-
- def _fold(self, folded):
- for part in self.parts:
- tstr = str(part)
- tlen = len(tstr)
- try:
- str(part).encode('us-ascii')
- except UnicodeEncodeError:
- if any(isinstance(x, errors.UndecodableBytesDefect)
- for x in part.all_defects):
- charset = 'unknown-8bit'
- else:
- # XXX: this should be a policy setting
- charset = 'utf-8'
- tstr = part.cte_encode(charset, folded.policy)
- tlen = len(tstr)
- if folded.append_if_fits(part, tstr):
- continue
- # Peel off the leading whitespace if any and make it sticky, to
- # avoid infinite recursion.
- ws = part.pop_leading_fws()
- if ws is not None:
- # Peel off the leading whitespace and make it sticky, to
- # avoid infinite recursion.
- folded.stickyspace = str(part.pop(0))
- if folded.append_if_fits(part):
- continue
- if part.has_fws:
- part._fold(folded)
- continue
- # There are no fold points in this one; it is too long for a single
- # line and can't be split...we just have to put it on its own line.
- folded.append(tstr)
- folded.newline()
-
- def pprint(self, indent=''):
- print('\n'.join(self._pp(indent='')))
-
- def ppstr(self, indent=''):
- return '\n'.join(self._pp(indent=''))
-
- def _pp(self, indent=''):
- yield '{}{}/{}('.format(
- indent,
- self.__class__.__name__,
- self.token_type)
- for token in self:
- if not hasattr(token, '_pp'):
- yield (indent + ' !! invalid element in token '
- 'list: {!r}'.format(token))
- else:
- for line in token._pp(indent+' '):
- yield line
- if self.defects:
- extra = ' Defects: {}'.format(self.defects)
- else:
- extra = ''
- yield '{}){}'.format(indent, extra)
-
-
-class WhiteSpaceTokenList(TokenList):
-
- @property
- def value(self):
- return ' '
-
- @property
- def comments(self):
- return [x.content for x in self if x.token_type=='comment']
-
-
-class UnstructuredTokenList(TokenList):
-
- token_type = 'unstructured'
-
- def _fold(self, folded):
- if any(x.token_type=='encoded-word' for x in self):
- return self._fold_encoded(folded)
- # Here we can have either a pure ASCII string that may or may not
- # have surrogateescape encoded bytes, or a unicode string.
- last_ew = None
- for part in self.parts:
- tstr = str(part)
- is_ew = False
- try:
- str(part).encode('us-ascii')
- except UnicodeEncodeError:
- if any(isinstance(x, errors.UndecodableBytesDefect)
- for x in part.all_defects):
- charset = 'unknown-8bit'
- else:
- charset = 'utf-8'
- if last_ew is not None:
- # We've already done an EW, combine this one with it
- # if there's room.
- chunk = get_unstructured(
- ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
- oldlastlen = sum(len(x) for x in folded.current[:last_ew])
- schunk = str(chunk)
- lchunk = len(schunk)
- if oldlastlen + lchunk <= folded.maxlen:
- del folded.current[last_ew:]
- folded.append(schunk)
- folded.lastlen = oldlastlen + lchunk
- continue
- tstr = part.as_encoded_word(charset)
- is_ew = True
- if folded.append_if_fits(part, tstr):
- if is_ew:
- last_ew = len(folded.current) - 1
- continue
- if is_ew or last_ew:
- # It's too big to fit on the line, but since we've
- # got encoded words we can use encoded word folding.
- part._fold_as_ew(folded)
- continue
- # Peel off the leading whitespace if any and make it sticky, to
- # avoid infinite recursion.
- ws = part.pop_leading_fws()
- if ws is not None:
- folded.stickyspace = str(ws)
- if folded.append_if_fits(part):
- continue
- if part.has_fws:
- part.fold(folded)
- continue
- # It can't be split...we just have to put it on its own line.
- folded.append(tstr)
- folded.newline()
- last_ew = None
-
- def cte_encode(self, charset, policy):
- res = []
- last_ew = None
- for part in self:
- spart = str(part)
- try:
- spart.encode('us-ascii')
- res.append(spart)
- except UnicodeEncodeError:
- if last_ew is None:
- res.append(part.cte_encode(charset, policy))
- last_ew = len(res)
- else:
- tl = get_unstructured(''.join(res[last_ew:] + [spart]))
- res.append(tl.as_encoded_word())
- return ''.join(res)
-
-
-class Phrase(TokenList):
-
- token_type = 'phrase'
-
- def _fold(self, folded):
- # As with Unstructured, we can have pure ASCII with or without
- # surrogateescape encoded bytes, or we could have unicode. But this
- # case is more complicated, since we have to deal with the various
- # sub-token types and how they can be composed in the face of
- # unicode-that-needs-CTE-encoding, and the fact that if a token a
- # comment that becomes a barrier across which we can't compose encoded
- # words.
- last_ew = None
- for part in self.parts:
- tstr = str(part)
- tlen = len(tstr)
- has_ew = False
- try:
- str(part).encode('us-ascii')
- except UnicodeEncodeError:
- if any(isinstance(x, errors.UndecodableBytesDefect)
- for x in part.all_defects):
- charset = 'unknown-8bit'
- else:
- charset = 'utf-8'
- if last_ew is not None and not part.has_leading_comment():
- # We've already done an EW, let's see if we can combine
- # this one with it. The last_ew logic ensures that all we
- # have at this point is atoms, no comments or quoted
- # strings. So we can treat the text between the last
- # encoded word and the content of this token as
- # unstructured text, and things will work correctly. But
- # we have to strip off any trailing comment on this token
- # first, and if it is a quoted string we have to pull out
- # the content (we're encoding it, so it no longer needs to
- # be quoted).
- if part[-1].token_type == 'cfws' and part.comments:
- remainder = part.pop(-1)
- else:
- remainder = ''
- for i, token in enumerate(part):
- if token.token_type == 'bare-quoted-string':
- part[i] = UnstructuredTokenList(token[:])
- chunk = get_unstructured(
- ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
- schunk = str(chunk)
- lchunk = len(schunk)
- if last_ew + lchunk <= folded.maxlen:
- del folded.current[last_ew:]
- folded.append(schunk)
- folded.lastlen = sum(len(x) for x in folded.current)
- continue
- tstr = part.as_encoded_word(charset)
- tlen = len(tstr)
- has_ew = True
- if folded.append_if_fits(part, tstr):
- if has_ew and not part.comments:
- last_ew = len(folded.current) - 1
- elif part.comments or part.token_type == 'quoted-string':
- # If a comment is involved we can't combine EWs. And if a
- # quoted string is involved, it's not worth the effort to
- # try to combine them.
- last_ew = None
- continue
- part._fold(folded)
-
- def cte_encode(self, charset, policy):
- res = []
- last_ew = None
- is_ew = False
- for part in self:
- spart = str(part)
- try:
- spart.encode('us-ascii')
- res.append(spart)
- except UnicodeEncodeError:
- is_ew = True
- if last_ew is None:
- if not part.comments:
- last_ew = len(res)
- res.append(part.cte_encode(charset, policy))
- elif not part.has_leading_comment():
- if part[-1].token_type == 'cfws' and part.comments:
- remainder = part.pop(-1)
- else:
- remainder = ''
- for i, token in enumerate(part):
- if token.token_type == 'bare-quoted-string':
- part[i] = UnstructuredTokenList(token[:])
- tl = get_unstructured(''.join(res[last_ew:] + [spart]))
- res[last_ew:] = [tl.as_encoded_word(charset)]
- if part.comments or (not is_ew and part.token_type == 'quoted-string'):
- last_ew = None
- return ''.join(res)
-
-class Word(TokenList):
-
- token_type = 'word'
-
-
-class CFWSList(WhiteSpaceTokenList):
-
- token_type = 'cfws'
-
- def has_leading_comment(self):
- return bool(self.comments)
-
-
-class Atom(TokenList):
-
- token_type = 'atom'
-
-
-class Token(TokenList):
-
- token_type = 'token'
-
-
-class EncodedWord(TokenList):
-
- token_type = 'encoded-word'
- cte = None
- charset = None
- lang = None
-
- @property
- def encoded(self):
- if self.cte is not None:
- return self.cte
- _ew.encode(str(self), self.charset)
-
-
-
-class QuotedString(TokenList):
-
- token_type = 'quoted-string'
-
- @property
- def content(self):
- for x in self:
- if x.token_type == 'bare-quoted-string':
- return x.value
-
- @property
- def quoted_value(self):
- res = []
- for x in self:
- if x.token_type == 'bare-quoted-string':
- res.append(str(x))
- else:
- res.append(x.value)
- return ''.join(res)
-
- @property
- def stripped_value(self):
- for token in self:
- if token.token_type == 'bare-quoted-string':
- return token.value
-
-
-class BareQuotedString(QuotedString):
-
- token_type = 'bare-quoted-string'
-
- def __str__(self):
- return quote_string(''.join(str(x) for x in self))
-
- @property
- def value(self):
- return ''.join(str(x) for x in self)
-
-
-class Comment(WhiteSpaceTokenList):
-
- token_type = 'comment'
-
- def __str__(self):
- return ''.join(sum([
- ["("],
- [self.quote(x) for x in self],
- [")"],
- ], []))
-
- def quote(self, value):
- if value.token_type == 'comment':
- return str(value)
- return str(value).replace('\\', '\\\\').replace(
- '(', '\(').replace(
- ')', '\)')
-
- @property
- def content(self):
- return ''.join(str(x) for x in self)
-
- @property
- def comments(self):
- return [self.content]
-
-class AddressList(TokenList):
-
- token_type = 'address-list'
-
- @property
- def addresses(self):
- return [x for x in self if x.token_type=='address']
-
- @property
- def mailboxes(self):
- return sum((x.mailboxes
- for x in self if x.token_type=='address'), [])
-
- @property
- def all_mailboxes(self):
- return sum((x.all_mailboxes
- for x in self if x.token_type=='address'), [])
-
-
-class Address(TokenList):
-
- token_type = 'address'
-
- @property
- def display_name(self):
- if self[0].token_type == 'group':
- return self[0].display_name
-
- @property
- def mailboxes(self):
- if self[0].token_type == 'mailbox':
- return [self[0]]
- elif self[0].token_type == 'invalid-mailbox':
- return []
- return self[0].mailboxes
-
- @property
- def all_mailboxes(self):
- if self[0].token_type == 'mailbox':
- return [self[0]]
- elif self[0].token_type == 'invalid-mailbox':
- return [self[0]]
- return self[0].all_mailboxes
-
-class MailboxList(TokenList):
-
- token_type = 'mailbox-list'
-
- @property
- def mailboxes(self):
- return [x for x in self if x.token_type=='mailbox']
-
- @property
- def all_mailboxes(self):
- return [x for x in self
- if x.token_type in ('mailbox', 'invalid-mailbox')]
-
-
-class GroupList(TokenList):
-
- token_type = 'group-list'
-
- @property
- def mailboxes(self):
- if not self or self[0].token_type != 'mailbox-list':
- return []
- return self[0].mailboxes
-
- @property
- def all_mailboxes(self):
- if not self or self[0].token_type != 'mailbox-list':
- return []
- return self[0].all_mailboxes
-
-
-class Group(TokenList):
-
- token_type = "group"
-
- @property
- def mailboxes(self):
- if self[2].token_type != 'group-list':
- return []
- return self[2].mailboxes
-
- @property
- def all_mailboxes(self):
- if self[2].token_type != 'group-list':
- return []
- return self[2].all_mailboxes
-
- @property
- def display_name(self):
- return self[0].display_name
-
-
-class NameAddr(TokenList):
-
- token_type = 'name-addr'
-
- @property
- def display_name(self):
- if len(self) == 1:
- return None
- return self[0].display_name
-
- @property
- def local_part(self):
- return self[-1].local_part
-
- @property
- def domain(self):
- return self[-1].domain
-
- @property
- def route(self):
- return self[-1].route
-
- @property
- def addr_spec(self):
- return self[-1].addr_spec
-
-
-class AngleAddr(TokenList):
-
- token_type = 'angle-addr'
-
- @property
- def local_part(self):
- for x in self:
- if x.token_type == 'addr-spec':
- return x.local_part
-
- @property
- def domain(self):
- for x in self:
- if x.token_type == 'addr-spec':
- return x.domain
-
- @property
- def route(self):
- for x in self:
- if x.token_type == 'obs-route':
- return x.domains
-
- @property
- def addr_spec(self):
- for x in self:
- if x.token_type == 'addr-spec':
- return x.addr_spec
- else:
- return '<>'
-
-
-class ObsRoute(TokenList):
-
- token_type = 'obs-route'
-
- @property
- def domains(self):
- return [x.domain for x in self if x.token_type == 'domain']
-
-
-class Mailbox(TokenList):
-
- token_type = 'mailbox'
-
- @property
- def display_name(self):
- if self[0].token_type == 'name-addr':
- return self[0].display_name
-
- @property
- def local_part(self):
- return self[0].local_part
-
- @property
- def domain(self):
- return self[0].domain
-
- @property
- def route(self):
- if self[0].token_type == 'name-addr':
- return self[0].route
-
- @property
- def addr_spec(self):
- return self[0].addr_spec
-
-
-class InvalidMailbox(TokenList):
-
- token_type = 'invalid-mailbox'
-
- @property
- def display_name(self):
- return None
-
- local_part = domain = route = addr_spec = display_name
-
-
-class Domain(TokenList):
-
- token_type = 'domain'
-
- @property
- def domain(self):
- return ''.join(super(Domain, self).value.split())
-
-
-class DotAtom(TokenList):
-
- token_type = 'dot-atom'
-
-
-class DotAtomText(TokenList):
-
- token_type = 'dot-atom-text'
-
-
-class AddrSpec(TokenList):
-
- token_type = 'addr-spec'
-
- @property
- def local_part(self):
- return self[0].local_part
-
- @property
- def domain(self):
- if len(self) < 3:
- return None
- return self[-1].domain
-
- @property
- def value(self):
- if len(self) < 3:
- return self[0].value
- return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
-
- @property
- def addr_spec(self):
- nameset = set(self.local_part)
- if len(nameset) > len(nameset-DOT_ATOM_ENDS):
- lp = quote_string(self.local_part)
- else:
- lp = self.local_part
- if self.domain is not None:
- return lp + '@' + self.domain
- return lp
-
-
-class ObsLocalPart(TokenList):
-
- token_type = 'obs-local-part'
-
-
-class DisplayName(Phrase):
-
- token_type = 'display-name'
-
- @property
- def display_name(self):
- res = TokenList(self)
- if res[0].token_type == 'cfws':
- res.pop(0)
- else:
- if res[0][0].token_type == 'cfws':
- res[0] = TokenList(res[0][1:])
- if res[-1].token_type == 'cfws':
- res.pop()
- else:
- if res[-1][-1].token_type == 'cfws':
- res[-1] = TokenList(res[-1][:-1])
- return res.value
-
- @property
- def value(self):
- quote = False
- if self.defects:
- quote = True
- else:
- for x in self:
- if x.token_type == 'quoted-string':
- quote = True
- if quote:
- pre = post = ''
- if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
- pre = ' '
- if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
- post = ' '
- return pre+quote_string(self.display_name)+post
- else:
- return super(DisplayName, self).value
-
-
-class LocalPart(TokenList):
-
- token_type = 'local-part'
-
- @property
- def value(self):
- if self[0].token_type == "quoted-string":
- return self[0].quoted_value
- else:
- return self[0].value
-
- @property
- def local_part(self):
- # Strip whitespace from front, back, and around dots.
- res = [DOT]
- last = DOT
- last_is_tl = False
- for tok in self[0] + [DOT]:
- if tok.token_type == 'cfws':
- continue
- if (last_is_tl and tok.token_type == 'dot' and
- last[-1].token_type == 'cfws'):
- res[-1] = TokenList(last[:-1])
- is_tl = isinstance(tok, TokenList)
- if (is_tl and last.token_type == 'dot' and
- tok[0].token_type == 'cfws'):
- res.append(TokenList(tok[1:]))
- else:
- res.append(tok)
- last = res[-1]
- last_is_tl = is_tl
- res = TokenList(res[1:-1])
- return res.value
-
-
-class DomainLiteral(TokenList):
-
- token_type = 'domain-literal'
-
- @property
- def domain(self):
- return ''.join(super(DomainLiteral, self).value.split())
-
- @property
- def ip(self):
- for x in self:
- if x.token_type == 'ptext':
- return x.value
-
-
-class MIMEVersion(TokenList):
-
- token_type = 'mime-version'
- major = None
- minor = None
-
-
-class Parameter(TokenList):
-
- token_type = 'parameter'
- sectioned = False
- extended = False
- charset = 'us-ascii'
-
- @property
- def section_number(self):
- # Because the first token, the attribute (name) eats CFWS, the second
- # token is always the section if there is one.
- return self[1].number if self.sectioned else 0
-
- @property
- def param_value(self):
- # This is part of the "handle quoted extended parameters" hack.
- for token in self:
- if token.token_type == 'value':
- return token.stripped_value
- if token.token_type == 'quoted-string':
- for token in token:
- if token.token_type == 'bare-quoted-string':
- for token in token:
- if token.token_type == 'value':
- return token.stripped_value
- return ''
-
-
-class InvalidParameter(Parameter):
-
- token_type = 'invalid-parameter'
-
-
-class Attribute(TokenList):
-
- token_type = 'attribute'
-
- @property
- def stripped_value(self):
- for token in self:
- if token.token_type.endswith('attrtext'):
- return token.value
-
-class Section(TokenList):
-
- token_type = 'section'
- number = None
-
-
-class Value(TokenList):
-
- token_type = 'value'
-
- @property
- def stripped_value(self):
- token = self[0]
- if token.token_type == 'cfws':
- token = self[1]
- if token.token_type.endswith(
- ('quoted-string', 'attribute', 'extended-attribute')):
- return token.stripped_value
- return self.value
-
-
-class MimeParameters(TokenList):
-
- token_type = 'mime-parameters'
-
- @property
- def params(self):
- # The RFC specifically states that the ordering of parameters is not
- # guaranteed and may be reordered by the transport layer. So we have
- # to assume the RFC 2231 pieces can come in any order. However, we
- # output them in the order that we first see a given name, which gives
- # us a stable __str__.
- params = OrderedDict()
- for token in self:
- if not token.token_type.endswith('parameter'):
- continue
- if token[0].token_type != 'attribute':
- continue
- name = token[0].value.strip()
- if name not in params:
- params[name] = []
- params[name].append((token.section_number, token))
- for name, parts in params.items():
- parts = sorted(parts)
- # XXX: there might be more recovery we could do here if, for
- # example, this is really a case of a duplicate attribute name.
- value_parts = []
- charset = parts[0][1].charset
- for i, (section_number, param) in enumerate(parts):
- if section_number != i:
- param.defects.append(errors.InvalidHeaderDefect(
- "inconsistent multipart parameter numbering"))
- value = param.param_value
- if param.extended:
- try:
- value = unquote_to_bytes(value)
- except UnicodeEncodeError:
- # source had surrogate escaped bytes. What we do now
- # is a bit of an open question. I'm not sure this is
- # the best choice, but it is what the old algorithm did
- value = unquote(value, encoding='latin-1')
- else:
- try:
- value = value.decode(charset, 'surrogateescape')
- except LookupError:
- # XXX: there should really be a custom defect for
- # unknown character set to make it easy to find,
- # because otherwise unknown charset is a silent
- # failure.
- value = value.decode('us-ascii', 'surrogateescape')
- if utils._has_surrogates(value):
- param.defects.append(errors.UndecodableBytesDefect())
- value_parts.append(value)
- value = ''.join(value_parts)
- yield name, value
-
- def __str__(self):
- params = []
- for name, value in self.params:
- if value:
- params.append('{}={}'.format(name, quote_string(value)))
- else:
- params.append(name)
- params = '; '.join(params)
- return ' ' + params if params else ''
-
-
-class ParameterizedHeaderValue(TokenList):
-
- @property
- def params(self):
- for token in reversed(self):
- if token.token_type == 'mime-parameters':
- return token.params
- return {}
-
- @property
- def parts(self):
- if self and self[-1].token_type == 'mime-parameters':
- # We don't want to start a new line if all of the params don't fit
- # after the value, so unwrap the parameter list.
- return TokenList(self[:-1] + self[-1])
- return TokenList(self).parts
-
-
-class ContentType(ParameterizedHeaderValue):
-
- token_type = 'content-type'
- maintype = 'text'
- subtype = 'plain'
-
-
-class ContentDisposition(ParameterizedHeaderValue):
-
- token_type = 'content-disposition'
- content_disposition = None
-
-
-class ContentTransferEncoding(TokenList):
-
- token_type = 'content-transfer-encoding'
- cte = '7bit'
-
-
-class HeaderLabel(TokenList):
-
- token_type = 'header-label'
-
-
-class Header(TokenList):
-
- token_type = 'header'
-
- def _fold(self, folded):
- folded.append(str(self.pop(0)))
- folded.lastlen = len(folded.current[0])
- # The first line of the header is different from all others: we don't
- # want to start a new object on a new line if it has any fold points in
- # it that would allow part of it to be on the first header line.
- # Further, if the first fold point would fit on the new line, we want
- # to do that, but if it doesn't we want to put it on the first line.
- # Folded supports this via the stickyspace attribute. If this
- # attribute is not None, it does the special handling.
- folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else ''
- rest = self.pop(0)
- if self:
- raise ValueError("Malformed Header token list")
- rest._fold(folded)
-
-
-#
-# Terminal classes and instances
-#
-
-class Terminal(str):
-
- def __new__(cls, value, token_type):
- self = super(Terminal, cls).__new__(cls, value)
- self.token_type = token_type
- self.defects = []
- return self
-
- def __repr__(self):
- return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__())
-
- @property
- def all_defects(self):
- return list(self.defects)
-
- def _pp(self, indent=''):
- return ["{}{}/{}({}){}".format(
- indent,
- self.__class__.__name__,
- self.token_type,
- super(Terminal, self).__repr__(),
- '' if not self.defects else ' {}'.format(self.defects),
- )]
-
- def cte_encode(self, charset, policy):
- value = str(self)
- try:
- value.encode('us-ascii')
- return value
- except UnicodeEncodeError:
- return _ew.encode(value, charset)
-
- def pop_trailing_ws(self):
- # This terminates the recursion.
- return None
-
- def pop_leading_fws(self):
- # This terminates the recursion.
- return None
-
- @property
- def comments(self):
- return []
-
- def has_leading_comment(self):
- return False
-
- def __getnewargs__(self):
- return(str(self), self.token_type)
-
-
-class WhiteSpaceTerminal(Terminal):
-
- @property
- def value(self):
- return ' '
-
- def startswith_fws(self):
- return True
-
- has_fws = True
-
-
-class ValueTerminal(Terminal):
-
- @property
- def value(self):
- return self
-
- def startswith_fws(self):
- return False
-
- has_fws = False
-
- def as_encoded_word(self, charset):
- return _ew.encode(str(self), charset)
-
-
-class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
-
- @property
- def value(self):
- return ''
-
- @property
- def encoded(self):
- return self[:]
-
- def __str__(self):
- return ''
-
- has_fws = True
-
-
-# XXX these need to become classes and used as instances so
-# that a program can't change them in a parse tree and screw
-# up other parse trees. Maybe should have tests for that, too.
-DOT = ValueTerminal('.', 'dot')
-ListSeparator = ValueTerminal(',', 'list-separator')
-RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
-
-#
-# Parser
-#
-
-"""Parse strings according to RFC822/2047/2822/5322 rules.
-
-This is a stateless parser. Each get_XXX function accepts a string and
-returns either a Terminal or a TokenList representing the RFC object named
-by the method and a string containing the remaining unparsed characters
-from the input. Thus a parser method consumes the next syntactic construct
-of a given type and returns a token representing the construct plus the
-unparsed remainder of the input string.
-
-For example, if the first element of a structured header is a 'phrase',
-then:
-
- phrase, value = get_phrase(value)
-
-returns the complete phrase from the start of the string value, plus any
-characters left in the string after the phrase is removed.
-
-"""
-
-_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
-_non_atom_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match
-_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
-_non_token_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match
-_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match
-_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
- ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
- '\\','\\\\').replace(']','\]'))).match
-
-def _validate_xtext(xtext):
- """If input token contains ASCII non-printables, register a defect."""
-
- non_printables = _non_printable_finder(xtext)
- if non_printables:
- xtext.defects.append(errors.NonPrintableDefect(non_printables))
- if utils._has_surrogates(xtext):
- xtext.defects.append(errors.UndecodableBytesDefect(
- "Non-ASCII characters found in header token"))
-
-def _get_ptext_to_endchars(value, endchars):
- """Scan printables/quoted-pairs until endchars and return unquoted ptext.
-
- This function turns a run of qcontent, ccontent-without-comments, or
- dtext-with-quoted-printables into a single string by unquoting any
- quoted printables. It returns the string, the remaining value, and
- a flag that is True iff there were any quoted printables decoded.
-
- """
- _3to2list = list(_wsp_splitter(value, 1))
- fragment, remainder, = _3to2list[:1] + [_3to2list[1:]]
- vchars = []
- escape = False
- had_qp = False
- for pos in range(len(fragment)):
- if fragment[pos] == '\\':
- if escape:
- escape = False
- had_qp = True
- else:
- escape = True
- continue
- if escape:
- escape = False
- elif fragment[pos] in endchars:
- break
- vchars.append(fragment[pos])
- else:
- pos = pos + 1
- return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
-
-def _decode_ew_run(value):
- """ Decode a run of RFC2047 encoded words.
-
- _decode_ew_run(value) -> (text, value, defects)
-
- Scans the supplied value for a run of tokens that look like they are RFC
- 2047 encoded words, decodes those words into text according to RFC 2047
- rules (whitespace between encoded words is discarded), and returns the text
- and the remaining value (including any leading whitespace on the remaining
- value), as well as a list of any defects encountered while decoding. The
- input value may not have any leading whitespace.
-
- """
- res = []
- defects = []
- last_ws = ''
- while value:
- try:
- tok, ws, value = _wsp_splitter(value, 1)
- except ValueError:
- tok, ws, value = value, '', ''
- if not (tok.startswith('=?') and tok.endswith('?=')):
- return ''.join(res), last_ws + tok + ws + value, defects
- text, charset, lang, new_defects = _ew.decode(tok)
- res.append(text)
- defects.extend(new_defects)
- last_ws = ws
- return ''.join(res), last_ws, defects
-
-def get_fws(value):
- """FWS = 1*WSP
-
- This isn't the RFC definition. We're using fws to represent tokens where
- folding can be done, but when we are parsing the *un*folding has already
- been done so we don't need to watch out for CRLF.
-
- """
- newvalue = value.lstrip()
- fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
- return fws, newvalue
-
-def get_encoded_word(value):
- """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
-
- """
- ew = EncodedWord()
- if not value.startswith('=?'):
- raise errors.HeaderParseError(
- "expected encoded word but found {}".format(value))
- _3to2list1 = list(value[2:].split('?=', 1))
- tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]]
- if tok == value[2:]:
- raise errors.HeaderParseError(
- "expected encoded word but found {}".format(value))
- remstr = ''.join(remainder)
- if remstr[:2].isdigit():
- _3to2list3 = list(remstr.split('?=', 1))
- rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]]
- tok = tok + '?=' + rest
- if len(tok.split()) > 1:
- ew.defects.append(errors.InvalidHeaderDefect(
- "whitespace inside encoded word"))
- ew.cte = value
- value = ''.join(remainder)
- try:
- text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
- except ValueError:
- raise errors.HeaderParseError(
- "encoded word format invalid: '{}'".format(ew.cte))
- ew.charset = charset
- ew.lang = lang
- ew.defects.extend(defects)
- while text:
- if text[0] in WSP:
- token, text = get_fws(text)
- ew.append(token)
- continue
- _3to2list5 = list(_wsp_splitter(text, 1))
- chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]]
- vtext = ValueTerminal(chars, 'vtext')
- _validate_xtext(vtext)
- ew.append(vtext)
- text = ''.join(remainder)
- return ew, value
-
-def get_unstructured(value):
- """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
- obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
- obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
-
- obs-NO-WS-CTL is control characters except WSP/CR/LF.
-
- So, basically, we have printable runs, plus control characters or nulls in
- the obsolete syntax, separated by whitespace. Since RFC 2047 uses the
- obsolete syntax in its specification, but requires whitespace on either
- side of the encoded words, I can see no reason to need to separate the
- non-printable-non-whitespace from the printable runs if they occur, so we
- parse this into xtext tokens separated by WSP tokens.
-
- Because an 'unstructured' value must by definition constitute the entire
- value, this 'get' routine does not return a remaining value, only the
- parsed TokenList.
-
- """
- # XXX: but what about bare CR and LF? They might signal the start or
- # end of an encoded word. YAGNI for now, since out current parsers
- # will never send us strings with bard CR or LF.
-
- unstructured = UnstructuredTokenList()
- while value:
- if value[0] in WSP:
- token, value = get_fws(value)
- unstructured.append(token)
- continue
- if value.startswith('=?'):
- try:
- token, value = get_encoded_word(value)
- except errors.HeaderParseError:
- pass
- else:
- have_ws = True
- if len(unstructured) > 0:
- if unstructured[-1].token_type != 'fws':
- unstructured.defects.append(errors.InvalidHeaderDefect(
- "missing whitespace before encoded word"))
- have_ws = False
- if have_ws and len(unstructured) > 1:
- if unstructured[-2].token_type == 'encoded-word':
- unstructured[-1] = EWWhiteSpaceTerminal(
- unstructured[-1], 'fws')
- unstructured.append(token)
- continue
- _3to2list7 = list(_wsp_splitter(value, 1))
- tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]]
- vtext = ValueTerminal(tok, 'vtext')
- _validate_xtext(vtext)
- unstructured.append(vtext)
- value = ''.join(remainder)
- return unstructured
-
-def get_qp_ctext(value):
- """ctext = <printable ascii except \ ( )>
-
- This is not the RFC ctext, since we are handling nested comments in comment
- and unquoting quoted-pairs here. We allow anything except the '()'
- characters, but if we find any ASCII other than the RFC defined printable
- ASCII an NonPrintableDefect is added to the token's defects list. Since
- quoted pairs are converted to their unquoted values, what is returned is
- a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
- is ' '.
-
- """
- ptext, value, _ = _get_ptext_to_endchars(value, '()')
- ptext = WhiteSpaceTerminal(ptext, 'ptext')
- _validate_xtext(ptext)
- return ptext, value
-
-def get_qcontent(value):
- """qcontent = qtext / quoted-pair
-
- We allow anything except the DQUOTE character, but if we find any ASCII
- other than the RFC defined printable ASCII an NonPrintableDefect is
- added to the token's defects list. Any quoted pairs are converted to their
- unquoted values, so what is returned is a 'ptext' token. In this case it
- is a ValueTerminal.
-
- """
- ptext, value, _ = _get_ptext_to_endchars(value, '"')
- ptext = ValueTerminal(ptext, 'ptext')
- _validate_xtext(ptext)
- return ptext, value
-
-def get_atext(value):
- """atext = <matches _atext_matcher>
-
- We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
- the token's defects list if we find non-atext characters.
- """
- m = _non_atom_end_matcher(value)
- if not m:
- raise errors.HeaderParseError(
- "expected atext but found '{}'".format(value))
- atext = m.group()
- value = value[len(atext):]
- atext = ValueTerminal(atext, 'atext')
- _validate_xtext(atext)
- return atext, value
-
-def get_bare_quoted_string(value):
- """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
-
- A quoted-string without the leading or trailing white space. Its
- value is the text between the quote marks, with whitespace
- preserved and quoted pairs decoded.
- """
- if value[0] != '"':
- raise errors.HeaderParseError(
- "expected '\"' but found '{}'".format(value))
- bare_quoted_string = BareQuotedString()
- value = value[1:]
- while value and value[0] != '"':
- if value[0] in WSP:
- token, value = get_fws(value)
- else:
- token, value = get_qcontent(value)
- bare_quoted_string.append(token)
- if not value:
- bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
- "end of header inside quoted string"))
- return bare_quoted_string, value
- return bare_quoted_string, value[1:]
-
-def get_comment(value):
- """comment = "(" *([FWS] ccontent) [FWS] ")"
- ccontent = ctext / quoted-pair / comment
-
- We handle nested comments here, and quoted-pair in our qp-ctext routine.
- """
- if value and value[0] != '(':
- raise errors.HeaderParseError(
- "expected '(' but found '{}'".format(value))
- comment = Comment()
- value = value[1:]
- while value and value[0] != ")":
- if value[0] in WSP:
- token, value = get_fws(value)
- elif value[0] == '(':
- token, value = get_comment(value)
- else:
- token, value = get_qp_ctext(value)
- comment.append(token)
- if not value:
- comment.defects.append(errors.InvalidHeaderDefect(
- "end of header inside comment"))
- return comment, value
- return comment, value[1:]
-
-def get_cfws(value):
- """CFWS = (1*([FWS] comment) [FWS]) / FWS
-
- """
- cfws = CFWSList()
- while value and value[0] in CFWS_LEADER:
- if value[0] in WSP:
- token, value = get_fws(value)
- else:
- token, value = get_comment(value)
- cfws.append(token)
- return cfws, value
-
-def get_quoted_string(value):
- """quoted-string = [CFWS] <bare-quoted-string> [CFWS]
-
- 'bare-quoted-string' is an intermediate class defined by this
- parser and not by the RFC grammar. It is the quoted string
- without any attached CFWS.
- """
- quoted_string = QuotedString()
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- quoted_string.append(token)
- token, value = get_bare_quoted_string(value)
- quoted_string.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- quoted_string.append(token)
- return quoted_string, value
-
-def get_atom(value):
- """atom = [CFWS] 1*atext [CFWS]
-
- """
- atom = Atom()
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- atom.append(token)
- if value and value[0] in ATOM_ENDS:
- raise errors.HeaderParseError(
- "expected atom but found '{}'".format(value))
- token, value = get_atext(value)
- atom.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- atom.append(token)
- return atom, value
-
-def get_dot_atom_text(value):
- """ dot-text = 1*atext *("." 1*atext)
-
- """
- dot_atom_text = DotAtomText()
- if not value or value[0] in ATOM_ENDS:
- raise errors.HeaderParseError("expected atom at a start of "
- "dot-atom-text but found '{}'".format(value))
- while value and value[0] not in ATOM_ENDS:
- token, value = get_atext(value)
- dot_atom_text.append(token)
- if value and value[0] == '.':
- dot_atom_text.append(DOT)
- value = value[1:]
- if dot_atom_text[-1] is DOT:
- raise errors.HeaderParseError("expected atom at end of dot-atom-text "
- "but found '{}'".format('.'+value))
- return dot_atom_text, value
-
-def get_dot_atom(value):
- """ dot-atom = [CFWS] dot-atom-text [CFWS]
-
- """
- dot_atom = DotAtom()
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- dot_atom.append(token)
- token, value = get_dot_atom_text(value)
- dot_atom.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- dot_atom.append(token)
- return dot_atom, value
-
-def get_word(value):
- """word = atom / quoted-string
-
- Either atom or quoted-string may start with CFWS. We have to peel off this
- CFWS first to determine which type of word to parse. Afterward we splice
- the leading CFWS, if any, into the parsed sub-token.
-
- If neither an atom or a quoted-string is found before the next special, a
- HeaderParseError is raised.
-
- The token returned is either an Atom or a QuotedString, as appropriate.
- This means the 'word' level of the formal grammar is not represented in the
- parse tree; this is because having that extra layer when manipulating the
- parse tree is more confusing than it is helpful.
-
- """
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- else:
- leader = None
- if value[0]=='"':
- token, value = get_quoted_string(value)
- elif value[0] in SPECIALS:
- raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
- "but found '{}'".format(value))
- else:
- token, value = get_atom(value)
- if leader is not None:
- token[:0] = [leader]
- return token, value
-
-def get_phrase(value):
- """ phrase = 1*word / obs-phrase
- obs-phrase = word *(word / "." / CFWS)
-
- This means a phrase can be a sequence of words, periods, and CFWS in any
- order as long as it starts with at least one word. If anything other than
- words is detected, an ObsoleteHeaderDefect is added to the token's defect
- list. We also accept a phrase that starts with CFWS followed by a dot;
- this is registered as an InvalidHeaderDefect, since it is not supported by
- even the obsolete grammar.
-
- """
- phrase = Phrase()
- try:
- token, value = get_word(value)
- phrase.append(token)
- except errors.HeaderParseError:
- phrase.defects.append(errors.InvalidHeaderDefect(
- "phrase does not start with word"))
- while value and value[0] not in PHRASE_ENDS:
- if value[0]=='.':
- phrase.append(DOT)
- phrase.defects.append(errors.ObsoleteHeaderDefect(
- "period in 'phrase'"))
- value = value[1:]
- else:
- try:
- token, value = get_word(value)
- except errors.HeaderParseError:
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- phrase.defects.append(errors.ObsoleteHeaderDefect(
- "comment found without atom"))
- else:
- raise
- phrase.append(token)
- return phrase, value
-
-def get_local_part(value):
- """ local-part = dot-atom / quoted-string / obs-local-part
-
- """
- local_part = LocalPart()
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- raise errors.HeaderParseError(
- "expected local-part but found '{}'".format(value))
- try:
- token, value = get_dot_atom(value)
- except errors.HeaderParseError:
- try:
- token, value = get_word(value)
- except errors.HeaderParseError:
- if value[0] != '\\' and value[0] in PHRASE_ENDS:
- raise
- token = TokenList()
- if leader is not None:
- token[:0] = [leader]
- local_part.append(token)
- if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
- obs_local_part, value = get_obs_local_part(str(local_part) + value)
- if obs_local_part.token_type == 'invalid-obs-local-part':
- local_part.defects.append(errors.InvalidHeaderDefect(
- "local-part is not dot-atom, quoted-string, or obs-local-part"))
- else:
- local_part.defects.append(errors.ObsoleteHeaderDefect(
- "local-part is not a dot-atom (contains CFWS)"))
- local_part[0] = obs_local_part
- try:
- local_part.value.encode('ascii')
- except UnicodeEncodeError:
- local_part.defects.append(errors.NonASCIILocalPartDefect(
- "local-part contains non-ASCII characters)"))
- return local_part, value
-
-def get_obs_local_part(value):
- """ obs-local-part = word *("." word)
- """
- obs_local_part = ObsLocalPart()
- last_non_ws_was_dot = False
- while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
- if value[0] == '.':
- if last_non_ws_was_dot:
- obs_local_part.defects.append(errors.InvalidHeaderDefect(
- "invalid repeated '.'"))
- obs_local_part.append(DOT)
- last_non_ws_was_dot = True
- value = value[1:]
- continue
- elif value[0]=='\\':
- obs_local_part.append(ValueTerminal(value[0],
- 'misplaced-special'))
- value = value[1:]
- obs_local_part.defects.append(errors.InvalidHeaderDefect(
- "'\\' character outside of quoted-string/ccontent"))
- last_non_ws_was_dot = False
- continue
- if obs_local_part and obs_local_part[-1].token_type != 'dot':
- obs_local_part.defects.append(errors.InvalidHeaderDefect(
- "missing '.' between words"))
- try:
- token, value = get_word(value)
- last_non_ws_was_dot = False
- except errors.HeaderParseError:
- if value[0] not in CFWS_LEADER:
- raise
- token, value = get_cfws(value)
- obs_local_part.append(token)
- if (obs_local_part[0].token_type == 'dot' or
- obs_local_part[0].token_type=='cfws' and
- obs_local_part[1].token_type=='dot'):
- obs_local_part.defects.append(errors.InvalidHeaderDefect(
- "Invalid leading '.' in local part"))
- if (obs_local_part[-1].token_type == 'dot' or
- obs_local_part[-1].token_type=='cfws' and
- obs_local_part[-2].token_type=='dot'):
- obs_local_part.defects.append(errors.InvalidHeaderDefect(
- "Invalid trailing '.' in local part"))
- if obs_local_part.defects:
- obs_local_part.token_type = 'invalid-obs-local-part'
- return obs_local_part, value
-
-def get_dtext(value):
- """ dtext = <printable ascii except \ [ ]> / obs-dtext
- obs-dtext = obs-NO-WS-CTL / quoted-pair
-
- We allow anything except the excluded characters, but if we find any
- ASCII other than the RFC defined printable ASCII an NonPrintableDefect is
- added to the token's defects list. Quoted pairs are converted to their
- unquoted values, so what is returned is a ptext token, in this case a
- ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
- added to the returned token's defect list.
-
- """
- ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
- ptext = ValueTerminal(ptext, 'ptext')
- if had_qp:
- ptext.defects.append(errors.ObsoleteHeaderDefect(
- "quoted printable found in domain-literal"))
- _validate_xtext(ptext)
- return ptext, value
-
-def _check_for_early_dl_end(value, domain_literal):
- if value:
- return False
- domain_literal.append(errors.InvalidHeaderDefect(
- "end of input inside domain-literal"))
- domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
- return True
-
-def get_domain_literal(value):
- """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
-
- """
- domain_literal = DomainLiteral()
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- domain_literal.append(token)
- if not value:
- raise errors.HeaderParseError("expected domain-literal")
- if value[0] != '[':
- raise errors.HeaderParseError("expected '[' at start of domain-literal "
- "but found '{}'".format(value))
- value = value[1:]
- if _check_for_early_dl_end(value, domain_literal):
- return domain_literal, value
- domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
- if value[0] in WSP:
- token, value = get_fws(value)
- domain_literal.append(token)
- token, value = get_dtext(value)
- domain_literal.append(token)
- if _check_for_early_dl_end(value, domain_literal):
- return domain_literal, value
- if value[0] in WSP:
- token, value = get_fws(value)
- domain_literal.append(token)
- if _check_for_early_dl_end(value, domain_literal):
- return domain_literal, value
- if value[0] != ']':
- raise errors.HeaderParseError("expected ']' at end of domain-literal "
- "but found '{}'".format(value))
- domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
- value = value[1:]
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- domain_literal.append(token)
- return domain_literal, value
-
-def get_domain(value):
- """ domain = dot-atom / domain-literal / obs-domain
- obs-domain = atom *("." atom))
-
- """
- domain = Domain()
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- raise errors.HeaderParseError(
- "expected domain but found '{}'".format(value))
- if value[0] == '[':
- token, value = get_domain_literal(value)
- if leader is not None:
- token[:0] = [leader]
- domain.append(token)
- return domain, value
- try:
- token, value = get_dot_atom(value)
- except errors.HeaderParseError:
- token, value = get_atom(value)
- if leader is not None:
- token[:0] = [leader]
- domain.append(token)
- if value and value[0] == '.':
- domain.defects.append(errors.ObsoleteHeaderDefect(
- "domain is not a dot-atom (contains CFWS)"))
- if domain[0].token_type == 'dot-atom':
- domain[:] = domain[0]
- while value and value[0] == '.':
- domain.append(DOT)
- token, value = get_atom(value[1:])
- domain.append(token)
- return domain, value
-
-def get_addr_spec(value):
- """ addr-spec = local-part "@" domain
-
- """
- addr_spec = AddrSpec()
- token, value = get_local_part(value)
- addr_spec.append(token)
- if not value or value[0] != '@':
- addr_spec.defects.append(errors.InvalidHeaderDefect(
- "add-spec local part with no domain"))
- return addr_spec, value
- addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
- token, value = get_domain(value[1:])
- addr_spec.append(token)
- return addr_spec, value
-
-def get_obs_route(value):
- """ obs-route = obs-domain-list ":"
- obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
-
- Returns an obs-route token with the appropriate sub-tokens (that is,
- there is no obs-domain-list in the parse tree).
- """
- obs_route = ObsRoute()
- while value and (value[0]==',' or value[0] in CFWS_LEADER):
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- obs_route.append(token)
- elif value[0] == ',':
- obs_route.append(ListSeparator)
- value = value[1:]
- if not value or value[0] != '@':
- raise errors.HeaderParseError(
- "expected obs-route domain but found '{}'".format(value))
- obs_route.append(RouteComponentMarker)
- token, value = get_domain(value[1:])
- obs_route.append(token)
- while value and value[0]==',':
- obs_route.append(ListSeparator)
- value = value[1:]
- if not value:
- break
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- obs_route.append(token)
- if value[0] == '@':
- obs_route.append(RouteComponentMarker)
- token, value = get_domain(value[1:])
- obs_route.append(token)
- if not value:
- raise errors.HeaderParseError("end of header while parsing obs-route")
- if value[0] != ':':
- raise errors.HeaderParseError( "expected ':' marking end of "
- "obs-route but found '{}'".format(value))
- obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
- return obs_route, value[1:]
-
-def get_angle_addr(value):
- """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
- obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
-
- """
- angle_addr = AngleAddr()
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- angle_addr.append(token)
- if not value or value[0] != '<':
- raise errors.HeaderParseError(
- "expected angle-addr but found '{}'".format(value))
- angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
- value = value[1:]
- # Although it is not legal per RFC5322, SMTP uses '<>' in certain
- # circumstances.
- if value[0] == '>':
- angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
- angle_addr.defects.append(errors.InvalidHeaderDefect(
- "null addr-spec in angle-addr"))
- value = value[1:]
- return angle_addr, value
- try:
- token, value = get_addr_spec(value)
- except errors.HeaderParseError:
- try:
- token, value = get_obs_route(value)
- angle_addr.defects.append(errors.ObsoleteHeaderDefect(
- "obsolete route specification in angle-addr"))
- except errors.HeaderParseError:
- raise errors.HeaderParseError(
- "expected addr-spec or obs-route but found '{}'".format(value))
- angle_addr.append(token)
- token, value = get_addr_spec(value)
- angle_addr.append(token)
- if value and value[0] == '>':
- value = value[1:]
- else:
- angle_addr.defects.append(errors.InvalidHeaderDefect(
- "missing trailing '>' on angle-addr"))
- angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- angle_addr.append(token)
- return angle_addr, value
-
-def get_display_name(value):
- """ display-name = phrase
-
- Because this is simply a name-rule, we don't return a display-name
- token containing a phrase, but rather a display-name token with
- the content of the phrase.
-
- """
- display_name = DisplayName()
- token, value = get_phrase(value)
- display_name.extend(token[:])
- display_name.defects = token.defects[:]
- return display_name, value
-
-
-def get_name_addr(value):
- """ name-addr = [display-name] angle-addr
-
- """
- name_addr = NameAddr()
- # Both the optional display name and the angle-addr can start with cfws.
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- raise errors.HeaderParseError(
- "expected name-addr but found '{}'".format(leader))
- if value[0] != '<':
- if value[0] in PHRASE_ENDS:
- raise errors.HeaderParseError(
- "expected name-addr but found '{}'".format(value))
- token, value = get_display_name(value)
- if not value:
- raise errors.HeaderParseError(
- "expected name-addr but found '{}'".format(token))
- if leader is not None:
- token[0][:0] = [leader]
- leader = None
- name_addr.append(token)
- token, value = get_angle_addr(value)
- if leader is not None:
- token[:0] = [leader]
- name_addr.append(token)
- return name_addr, value
-
-def get_mailbox(value):
- """ mailbox = name-addr / addr-spec
-
- """
- # The only way to figure out if we are dealing with a name-addr or an
- # addr-spec is to try parsing each one.
- mailbox = Mailbox()
- try:
- token, value = get_name_addr(value)
- except errors.HeaderParseError:
- try:
- token, value = get_addr_spec(value)
- except errors.HeaderParseError:
- raise errors.HeaderParseError(
- "expected mailbox but found '{}'".format(value))
- if any(isinstance(x, errors.InvalidHeaderDefect)
- for x in token.all_defects):
- mailbox.token_type = 'invalid-mailbox'
- mailbox.append(token)
- return mailbox, value
-
-def get_invalid_mailbox(value, endchars):
- """ Read everything up to one of the chars in endchars.
-
- This is outside the formal grammar. The InvalidMailbox TokenList that is
- returned acts like a Mailbox, but the data attributes are None.
-
- """
- invalid_mailbox = InvalidMailbox()
- while value and value[0] not in endchars:
- if value[0] in PHRASE_ENDS:
- invalid_mailbox.append(ValueTerminal(value[0],
- 'misplaced-special'))
- value = value[1:]
- else:
- token, value = get_phrase(value)
- invalid_mailbox.append(token)
- return invalid_mailbox, value
-
-def get_mailbox_list(value):
- """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
- obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
-
- For this routine we go outside the formal grammar in order to improve error
- handling. We recognize the end of the mailbox list only at the end of the
- value or at a ';' (the group terminator). This is so that we can turn
- invalid mailboxes into InvalidMailbox tokens and continue parsing any
- remaining valid mailboxes. We also allow all mailbox entries to be null,
- and this condition is handled appropriately at a higher level.
-
- """
- mailbox_list = MailboxList()
- while value and value[0] != ';':
- try:
- token, value = get_mailbox(value)
- mailbox_list.append(token)
- except errors.HeaderParseError:
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value or value[0] in ',;':
- mailbox_list.append(leader)
- mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
- "empty element in mailbox-list"))
- else:
- token, value = get_invalid_mailbox(value, ',;')
- if leader is not None:
- token[:0] = [leader]
- mailbox_list.append(token)
- mailbox_list.defects.append(errors.InvalidHeaderDefect(
- "invalid mailbox in mailbox-list"))
- elif value[0] == ',':
- mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
- "empty element in mailbox-list"))
- else:
- token, value = get_invalid_mailbox(value, ',;')
- if leader is not None:
- token[:0] = [leader]
- mailbox_list.append(token)
- mailbox_list.defects.append(errors.InvalidHeaderDefect(
- "invalid mailbox in mailbox-list"))
- if value and value[0] not in ',;':
- # Crap after mailbox; treat it as an invalid mailbox.
- # The mailbox info will still be available.
- mailbox = mailbox_list[-1]
- mailbox.token_type = 'invalid-mailbox'
- token, value = get_invalid_mailbox(value, ',;')
- mailbox.extend(token)
- mailbox_list.defects.append(errors.InvalidHeaderDefect(
- "invalid mailbox in mailbox-list"))
- if value and value[0] == ',':
- mailbox_list.append(ListSeparator)
- value = value[1:]
- return mailbox_list, value
-
-
-def get_group_list(value):
- """ group-list = mailbox-list / CFWS / obs-group-list
- obs-group-list = 1*([CFWS] ",") [CFWS]
-
- """
- group_list = GroupList()
- if not value:
- group_list.defects.append(errors.InvalidHeaderDefect(
- "end of header before group-list"))
- return group_list, value
- leader = None
- if value and value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- # This should never happen in email parsing, since CFWS-only is a
- # legal alternative to group-list in a group, which is the only
- # place group-list appears.
- group_list.defects.append(errors.InvalidHeaderDefect(
- "end of header in group-list"))
- group_list.append(leader)
- return group_list, value
- if value[0] == ';':
- group_list.append(leader)
- return group_list, value
- token, value = get_mailbox_list(value)
- if len(token.all_mailboxes)==0:
- if leader is not None:
- group_list.append(leader)
- group_list.extend(token)
- group_list.defects.append(errors.ObsoleteHeaderDefect(
- "group-list with empty entries"))
- return group_list, value
- if leader is not None:
- token[:0] = [leader]
- group_list.append(token)
- return group_list, value
-
-def get_group(value):
- """ group = display-name ":" [group-list] ";" [CFWS]
-
- """
- group = Group()
- token, value = get_display_name(value)
- if not value or value[0] != ':':
- raise errors.HeaderParseError("expected ':' at end of group "
- "display name but found '{}'".format(value))
- group.append(token)
- group.append(ValueTerminal(':', 'group-display-name-terminator'))
- value = value[1:]
- if value and value[0] == ';':
- group.append(ValueTerminal(';', 'group-terminator'))
- return group, value[1:]
- token, value = get_group_list(value)
- group.append(token)
- if not value:
- group.defects.append(errors.InvalidHeaderDefect(
- "end of header in group"))
- if value[0] != ';':
- raise errors.HeaderParseError(
- "expected ';' at end of group but found {}".format(value))
- group.append(ValueTerminal(';', 'group-terminator'))
- value = value[1:]
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- group.append(token)
- return group, value
-
-def get_address(value):
- """ address = mailbox / group
-
- Note that counter-intuitively, an address can be either a single address or
- a list of addresses (a group). This is why the returned Address object has
- a 'mailboxes' attribute which treats a single address as a list of length
- one. When you need to differentiate between to two cases, extract the single
- element, which is either a mailbox or a group token.
-
- """
- # The formal grammar isn't very helpful when parsing an address. mailbox
- # and group, especially when allowing for obsolete forms, start off very
- # similarly. It is only when you reach one of @, <, or : that you know
- # what you've got. So, we try each one in turn, starting with the more
- # likely of the two. We could perhaps make this more efficient by looking
- # for a phrase and then branching based on the next character, but that
- # would be a premature optimization.
- address = Address()
- try:
- token, value = get_group(value)
- except errors.HeaderParseError:
- try:
- token, value = get_mailbox(value)
- except errors.HeaderParseError:
- raise errors.HeaderParseError(
- "expected address but found '{}'".format(value))
- address.append(token)
- return address, value
-
-def get_address_list(value):
- """ address_list = (address *("," address)) / obs-addr-list
- obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
-
- We depart from the formal grammar here by continuing to parse until the end
- of the input, assuming the input to be entirely composed of an
- address-list. This is always true in email parsing, and allows us
- to skip invalid addresses to parse additional valid ones.
-
- """
- address_list = AddressList()
- while value:
- try:
- token, value = get_address(value)
- address_list.append(token)
- except errors.HeaderParseError as err:
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value or value[0] == ',':
- address_list.append(leader)
- address_list.defects.append(errors.ObsoleteHeaderDefect(
- "address-list entry with no content"))
- else:
- token, value = get_invalid_mailbox(value, ',')
- if leader is not None:
- token[:0] = [leader]
- address_list.append(Address([token]))
- address_list.defects.append(errors.InvalidHeaderDefect(
- "invalid address in address-list"))
- elif value[0] == ',':
- address_list.defects.append(errors.ObsoleteHeaderDefect(
- "empty element in address-list"))
- else:
- token, value = get_invalid_mailbox(value, ',')
- if leader is not None:
- token[:0] = [leader]
- address_list.append(Address([token]))
- address_list.defects.append(errors.InvalidHeaderDefect(
- "invalid address in address-list"))
- if value and value[0] != ',':
- # Crap after address; treat it as an invalid mailbox.
- # The mailbox info will still be available.
- mailbox = address_list[-1][0]
- mailbox.token_type = 'invalid-mailbox'
- token, value = get_invalid_mailbox(value, ',')
- mailbox.extend(token)
- address_list.defects.append(errors.InvalidHeaderDefect(
- "invalid address in address-list"))
- if value: # Must be a , at this point.
- address_list.append(ValueTerminal(',', 'list-separator'))
- value = value[1:]
- return address_list, value
-
-#
-# XXX: As I begin to add additional header parsers, I'm realizing we probably
-# have two level of parser routines: the get_XXX methods that get a token in
-# the grammar, and parse_XXX methods that parse an entire field value. So
-# get_address_list above should really be a parse_ method, as probably should
-# be get_unstructured.
-#
-
-def parse_mime_version(value):
- """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
-
- """
- # The [CFWS] is implicit in the RFC 2045 BNF.
- # XXX: This routine is a bit verbose, should factor out a get_int method.
- mime_version = MIMEVersion()
- if not value:
- mime_version.defects.append(errors.HeaderMissingRequiredValue(
- "Missing MIME version number (eg: 1.0)"))
- return mime_version
- if value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mime_version.append(token)
- if not value:
- mime_version.defects.append(errors.HeaderMissingRequiredValue(
- "Expected MIME version number but found only CFWS"))
- digits = ''
- while value and value[0] != '.' and value[0] not in CFWS_LEADER:
- digits += value[0]
- value = value[1:]
- if not digits.isdigit():
- mime_version.defects.append(errors.InvalidHeaderDefect(
- "Expected MIME major version number but found {!r}".format(digits)))
- mime_version.append(ValueTerminal(digits, 'xtext'))
- else:
- mime_version.major = int(digits)
- mime_version.append(ValueTerminal(digits, 'digits'))
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mime_version.append(token)
- if not value or value[0] != '.':
- if mime_version.major is not None:
- mime_version.defects.append(errors.InvalidHeaderDefect(
- "Incomplete MIME version; found only major number"))
- if value:
- mime_version.append(ValueTerminal(value, 'xtext'))
- return mime_version
- mime_version.append(ValueTerminal('.', 'version-separator'))
- value = value[1:]
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mime_version.append(token)
- if not value:
- if mime_version.major is not None:
- mime_version.defects.append(errors.InvalidHeaderDefect(
- "Incomplete MIME version; found only major number"))
- return mime_version
- digits = ''
- while value and value[0] not in CFWS_LEADER:
- digits += value[0]
- value = value[1:]
- if not digits.isdigit():
- mime_version.defects.append(errors.InvalidHeaderDefect(
- "Expected MIME minor version number but found {!r}".format(digits)))
- mime_version.append(ValueTerminal(digits, 'xtext'))
- else:
- mime_version.minor = int(digits)
- mime_version.append(ValueTerminal(digits, 'digits'))
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mime_version.append(token)
- if value:
- mime_version.defects.append(errors.InvalidHeaderDefect(
- "Excess non-CFWS text after MIME version"))
- mime_version.append(ValueTerminal(value, 'xtext'))
- return mime_version
-
-def get_invalid_parameter(value):
- """ Read everything up to the next ';'.
-
- This is outside the formal grammar. The InvalidParameter TokenList that is
- returned acts like a Parameter, but the data attributes are None.
-
- """
- invalid_parameter = InvalidParameter()
- while value and value[0] != ';':
- if value[0] in PHRASE_ENDS:
- invalid_parameter.append(ValueTerminal(value[0],
- 'misplaced-special'))
- value = value[1:]
- else:
- token, value = get_phrase(value)
- invalid_parameter.append(token)
- return invalid_parameter, value
-
-def get_ttext(value):
- """ttext = <matches _ttext_matcher>
-
- We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
- defects list if we find non-ttext characters. We also register defects for
- *any* non-printables even though the RFC doesn't exclude all of them,
- because we follow the spirit of RFC 5322.
-
- """
- m = _non_token_end_matcher(value)
- if not m:
- raise errors.HeaderParseError(
- "expected ttext but found '{}'".format(value))
- ttext = m.group()
- value = value[len(ttext):]
- ttext = ValueTerminal(ttext, 'ttext')
- _validate_xtext(ttext)
- return ttext, value
-
-def get_token(value):
- """token = [CFWS] 1*ttext [CFWS]
-
- The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
- tspecials. We also exclude tabs even though the RFC doesn't.
-
- The RFC implies the CFWS but is not explicit about it in the BNF.
-
- """
- mtoken = Token()
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mtoken.append(token)
- if value and value[0] in TOKEN_ENDS:
- raise errors.HeaderParseError(
- "expected token but found '{}'".format(value))
- token, value = get_ttext(value)
- mtoken.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- mtoken.append(token)
- return mtoken, value
-
-def get_attrtext(value):
- """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
-
- We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
- token's defects list if we find non-attrtext characters. We also register
- defects for *any* non-printables even though the RFC doesn't exclude all of
- them, because we follow the spirit of RFC 5322.
-
- """
- m = _non_attribute_end_matcher(value)
- if not m:
- raise errors.HeaderParseError(
- "expected attrtext but found {!r}".format(value))
- attrtext = m.group()
- value = value[len(attrtext):]
- attrtext = ValueTerminal(attrtext, 'attrtext')
- _validate_xtext(attrtext)
- return attrtext, value
-
-def get_attribute(value):
- """ [CFWS] 1*attrtext [CFWS]
-
- This version of the BNF makes the CFWS explicit, and as usual we use a
- value terminal for the actual run of characters. The RFC equivalent of
- attrtext is the token characters, with the subtraction of '*', "'", and '%'.
- We include tab in the excluded set just as we do for token.
-
- """
- attribute = Attribute()
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- attribute.append(token)
- if value and value[0] in ATTRIBUTE_ENDS:
- raise errors.HeaderParseError(
- "expected token but found '{}'".format(value))
- token, value = get_attrtext(value)
- attribute.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- attribute.append(token)
- return attribute, value
-
-def get_extended_attrtext(value):
- """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
-
- This is a special parsing routine so that we get a value that
- includes % escapes as a single string (which we decode as a single
- string later).
-
- """
- m = _non_extended_attribute_end_matcher(value)
- if not m:
- raise errors.HeaderParseError(
- "expected extended attrtext but found {!r}".format(value))
- attrtext = m.group()
- value = value[len(attrtext):]
- attrtext = ValueTerminal(attrtext, 'extended-attrtext')
- _validate_xtext(attrtext)
- return attrtext, value
-
-def get_extended_attribute(value):
- """ [CFWS] 1*extended_attrtext [CFWS]
-
- This is like the non-extended version except we allow % characters, so that
- we can pick up an encoded value as a single string.
-
- """
- # XXX: should we have an ExtendedAttribute TokenList?
- attribute = Attribute()
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- attribute.append(token)
- if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
- raise errors.HeaderParseError(
- "expected token but found '{}'".format(value))
- token, value = get_extended_attrtext(value)
- attribute.append(token)
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- attribute.append(token)
- return attribute, value
-
-def get_section(value):
- """ '*' digits
-
- The formal BNF is more complicated because leading 0s are not allowed. We
- check for that and add a defect. We also assume no CFWS is allowed between
- the '*' and the digits, though the RFC is not crystal clear on that.
- The caller should already have dealt with leading CFWS.
-
- """
- section = Section()
- if not value or value[0] != '*':
- raise errors.HeaderParseError("Expected section but found {}".format(
- value))
- section.append(ValueTerminal('*', 'section-marker'))
- value = value[1:]
- if not value or not value[0].isdigit():
- raise errors.HeaderParseError("Expected section number but "
- "found {}".format(value))
- digits = ''
- while value and value[0].isdigit():
- digits += value[0]
- value = value[1:]
- if digits[0] == '0' and digits != '0':
- section.defects.append(errors.InvalidHeaderError("section number"
- "has an invalid leading 0"))
- section.number = int(digits)
- section.append(ValueTerminal(digits, 'digits'))
- return section, value
-
-
-def get_value(value):
- """ quoted-string / attribute
-
- """
- v = Value()
- if not value:
- raise errors.HeaderParseError("Expected value but found end of string")
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- raise errors.HeaderParseError("Expected value but found "
- "only {}".format(leader))
- if value[0] == '"':
- token, value = get_quoted_string(value)
- else:
- token, value = get_extended_attribute(value)
- if leader is not None:
- token[:0] = [leader]
- v.append(token)
- return v, value
-
-def get_parameter(value):
- """ attribute [section] ["*"] [CFWS] "=" value
-
- The CFWS is implied by the RFC but not made explicit in the BNF. This
- simplified form of the BNF from the RFC is made to conform with the RFC BNF
- through some extra checks. We do it this way because it makes both error
- recovery and working with the resulting parse tree easier.
- """
- # It is possible CFWS would also be implicitly allowed between the section
- # and the 'extended-attribute' marker (the '*') , but we've never seen that
- # in the wild and we will therefore ignore the possibility.
- param = Parameter()
- token, value = get_attribute(value)
- param.append(token)
- if not value or value[0] == ';':
- param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
- "name ({}) but no value".format(token)))
- return param, value
- if value[0] == '*':
- try:
- token, value = get_section(value)
- param.sectioned = True
- param.append(token)
- except errors.HeaderParseError:
- pass
- if not value:
- raise errors.HeaderParseError("Incomplete parameter")
- if value[0] == '*':
- param.append(ValueTerminal('*', 'extended-parameter-marker'))
- value = value[1:]
- param.extended = True
- if value[0] != '=':
- raise errors.HeaderParseError("Parameter not followed by '='")
- param.append(ValueTerminal('=', 'parameter-separator'))
- value = value[1:]
- leader = None
- if value and value[0] in CFWS_LEADER:
- token, value = get_cfws(value)
- param.append(token)
- remainder = None
- appendto = param
- if param.extended and value and value[0] == '"':
- # Now for some serious hackery to handle the common invalid case of
- # double quotes around an extended value. We also accept (with defect)
- # a value marked as encoded that isn't really.
- qstring, remainder = get_quoted_string(value)
- inner_value = qstring.stripped_value
- semi_valid = False
- if param.section_number == 0:
- if inner_value and inner_value[0] == "'":
- semi_valid = True
- else:
- token, rest = get_attrtext(inner_value)
- if rest and rest[0] == "'":
- semi_valid = True
- else:
- try:
- token, rest = get_extended_attrtext(inner_value)
- except:
- pass
- else:
- if not rest:
- semi_valid = True
- if semi_valid:
- param.defects.append(errors.InvalidHeaderDefect(
- "Quoted string value for extended parameter is invalid"))
- param.append(qstring)
- for t in qstring:
- if t.token_type == 'bare-quoted-string':
- t[:] = []
- appendto = t
- break
- value = inner_value
- else:
- remainder = None
- param.defects.append(errors.InvalidHeaderDefect(
- "Parameter marked as extended but appears to have a "
- "quoted string value that is non-encoded"))
- if value and value[0] == "'":
- token = None
- else:
- token, value = get_value(value)
- if not param.extended or param.section_number > 0:
- if not value or value[0] != "'":
- appendto.append(token)
- if remainder is not None:
- assert not value, value
- value = remainder
- return param, value
- param.defects.append(errors.InvalidHeaderDefect(
- "Apparent initial-extended-value but attribute "
- "was not marked as extended or was not initial section"))
- if not value:
- # Assume the charset/lang is missing and the token is the value.
- param.defects.append(errors.InvalidHeaderDefect(
- "Missing required charset/lang delimiters"))
- appendto.append(token)
- if remainder is None:
- return param, value
- else:
- if token is not None:
- for t in token:
- if t.token_type == 'extended-attrtext':
- break
- t.token_type == 'attrtext'
- appendto.append(t)
- param.charset = t.value
- if value[0] != "'":
- raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
- "delimiter, but found {!r}".format(value))
- appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
- value = value[1:]
- if value and value[0] != "'":
- token, value = get_attrtext(value)
- appendto.append(token)
- param.lang = token.value
- if not value or value[0] != "'":
- raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
- "delimiter, but found {}".format(value))
- appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
- value = value[1:]
- if remainder is not None:
- # Treat the rest of value as bare quoted string content.
- v = Value()
- while value:
- if value[0] in WSP:
- token, value = get_fws(value)
- else:
- token, value = get_qcontent(value)
- v.append(token)
- token = v
- else:
- token, value = get_value(value)
- appendto.append(token)
- if remainder is not None:
- assert not value, value
- value = remainder
- return param, value
-
-def parse_mime_parameters(value):
- """ parameter *( ";" parameter )
-
- That BNF is meant to indicate this routine should only be called after
- finding and handling the leading ';'. There is no corresponding rule in
- the formal RFC grammar, but it is more convenient for us for the set of
- parameters to be treated as its own TokenList.
-
- This is 'parse' routine because it consumes the reminaing value, but it
- would never be called to parse a full header. Instead it is called to
- parse everything after the non-parameter value of a specific MIME header.
-
- """
- mime_parameters = MimeParameters()
- while value:
- try:
- token, value = get_parameter(value)
- mime_parameters.append(token)
- except errors.HeaderParseError as err:
- leader = None
- if value[0] in CFWS_LEADER:
- leader, value = get_cfws(value)
- if not value:
- mime_parameters.append(leader)
- return mime_parameters
- if value[0] == ';':
- if leader is not None:
- mime_parameters.append(leader)
- mime_parameters.defects.append(errors.InvalidHeaderDefect(
- "parameter entry with no content"))
- else:
- token, value = get_invalid_parameter(value)
- if leader:
- token[:0] = [leader]
- mime_parameters.append(token)
- mime_parameters.defects.append(errors.InvalidHeaderDefect(
- "invalid parameter {!r}".format(token)))
- if value and value[0] != ';':
- # Junk after the otherwise valid parameter. Mark it as
- # invalid, but it will have a value.
- param = mime_parameters[-1]
- param.token_type = 'invalid-parameter'
- token, value = get_invalid_parameter(value)
- param.extend(token)
- mime_parameters.defects.append(errors.InvalidHeaderDefect(
- "parameter with invalid trailing text {!r}".format(token)))
- if value:
- # Must be a ';' at this point.
- mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
- value = value[1:]
- return mime_parameters
-
-def _find_mime_parameters(tokenlist, value):
- """Do our best to find the parameters in an invalid MIME header
-
- """
- while value and value[0] != ';':
- if value[0] in PHRASE_ENDS:
- tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
- value = value[1:]
- else:
- token, value = get_phrase(value)
- tokenlist.append(token)
- if not value:
- return
- tokenlist.append(ValueTerminal(';', 'parameter-separator'))
- tokenlist.append(parse_mime_parameters(value[1:]))
-
-def parse_content_type_header(value):
- """ maintype "/" subtype *( ";" parameter )
-
- The maintype and substype are tokens. Theoretically they could
- be checked against the official IANA list + x-token, but we
- don't do that.
- """
- ctype = ContentType()
- recover = False
- if not value:
- ctype.defects.append(errors.HeaderMissingRequiredValue(
- "Missing content type specification"))
- return ctype
- try:
- token, value = get_token(value)
- except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content maintype but found {!r}".format(value)))
- _find_mime_parameters(ctype, value)
- return ctype
- ctype.append(token)
- # XXX: If we really want to follow the formal grammer we should make
- # mantype and subtype specialized TokenLists here. Probably not worth it.
- if not value or value[0] != '/':
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Invalid content type"))
- if value:
- _find_mime_parameters(ctype, value)
- return ctype
- ctype.maintype = token.value.strip().lower()
- ctype.append(ValueTerminal('/', 'content-type-separator'))
- value = value[1:]
- try:
- token, value = get_token(value)
- except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content subtype but found {!r}".format(value)))
- _find_mime_parameters(ctype, value)
- return ctype
- ctype.append(token)
- ctype.subtype = token.value.strip().lower()
- if not value:
- return ctype
- if value[0] != ';':
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Only parameters are valid after content type, but "
- "found {!r}".format(value)))
- # The RFC requires that a syntactically invalid content-type be treated
- # as text/plain. Perhaps we should postel this, but we should probably
- # only do that if we were checking the subtype value against IANA.
- del ctype.maintype, ctype.subtype
- _find_mime_parameters(ctype, value)
- return ctype
- ctype.append(ValueTerminal(';', 'parameter-separator'))
- ctype.append(parse_mime_parameters(value[1:]))
- return ctype
-
-def parse_content_disposition_header(value):
- """ disposition-type *( ";" parameter )
-
- """
- disp_header = ContentDisposition()
- if not value:
- disp_header.defects.append(errors.HeaderMissingRequiredValue(
- "Missing content disposition"))
- return disp_header
- try:
- token, value = get_token(value)
- except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content disposition but found {!r}".format(value)))
- _find_mime_parameters(disp_header, value)
- return disp_header
- disp_header.append(token)
- disp_header.content_disposition = token.value.strip().lower()
- if not value:
- return disp_header
- if value[0] != ';':
- disp_header.defects.append(errors.InvalidHeaderDefect(
- "Only parameters are valid after content disposition, but "
- "found {!r}".format(value)))
- _find_mime_parameters(disp_header, value)
- return disp_header
- disp_header.append(ValueTerminal(';', 'parameter-separator'))
- disp_header.append(parse_mime_parameters(value[1:]))
- return disp_header
-
-def parse_content_transfer_encoding_header(value):
- """ mechanism
-
- """
- # We should probably validate the values, since the list is fixed.
- cte_header = ContentTransferEncoding()
- if not value:
- cte_header.defects.append(errors.HeaderMissingRequiredValue(
- "Missing content transfer encoding"))
- return cte_header
- try:
- token, value = get_token(value)
- except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content trnasfer encoding but found {!r}".format(value)))
- else:
- cte_header.append(token)
- cte_header.cte = token.value.strip().lower()
- if not value:
- return cte_header
- while value:
- cte_header.defects.append(errors.InvalidHeaderDefect(
- "Extra text after content transfer encoding"))
- if value[0] in PHRASE_ENDS:
- cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
- value = value[1:]
- else:
- token, value = get_phrase(value)
- cte_header.append(token)
- return cte_header
+"""Header value parser implementing various email-related RFC parsing rules.
+
+The parsing methods defined in this module implement various email related
+parsing rules. Principal among them is RFC 5322, which is the followon
+to RFC 2822 and primarily a clarification of the former. It also implements
+RFC 2047 encoded word decoding.
+
+RFC 5322 goes to considerable trouble to maintain backward compatibility with
+RFC 822 in the parse phase, while cleaning up the structure on the generation
+phase. This parser supports correct RFC 5322 generation by tagging white space
+as folding white space only when folding is allowed in the non-obsolete rule
+sets. Actually, the parser is even more generous when accepting input than RFC
+5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
+Where possible deviations from the standard are annotated on the 'defects'
+attribute of tokens that deviate.
+
+The general structure of the parser follows RFC 5322, and uses its terminology
+where there is a direct correspondence. Where the implementation requires a
+somewhat different structure than that used by the formal grammar, new terms
+that mimic the closest existing terms are used. Thus, it really helps to have
+a copy of RFC 5322 handy when studying this code.
+
+Input to the parser is a string that has already been unfolded according to
+RFC 5322 rules. According to the RFC this unfolding is the very first step, and
+this parser leaves the unfolding step to a higher level message parser, which
+will have already detected the line breaks that need unfolding while
+determining the beginning and end of each header.
+
+The output of the parser is a TokenList object, which is a list subclass. A
+TokenList is a recursive data structure. The terminal nodes of the structure
+are Terminal objects, which are subclasses of str. These do not correspond
+directly to terminal objects in the formal grammar, but are instead more
+practical higher level combinations of true terminals.
+
+All TokenList and Terminal objects have a 'value' attribute, which produces the
+semantically meaningful value of that part of the parse subtree. The value of
+all whitespace tokens (no matter how many sub-tokens they may contain) is a
+single space, as per the RFC rules. This includes 'CFWS', which is herein
+included in the general class of whitespace tokens. There is one exception to
+the rule that whitespace tokens are collapsed into single spaces in values: in
+the value of a 'bare-quoted-string' (a quoted-string with no leading or
+trailing whitespace), any whitespace that appeared between the quotation marks
+is preserved in the returned value. Note that in all Terminal strings quoted
+pairs are turned into their unquoted values.
+
+All TokenList and Terminal objects also have a string value, which attempts to
+be a "canonical" representation of the RFC-compliant form of the substring that
+produced the parsed subtree, including minimal use of quoted pair quoting.
+Whitespace runs are not collapsed.
+
+Comment tokens also have a 'content' attribute providing the string found
+between the parens (including any nested comments) with whitespace preserved.
+
+All TokenList and Terminal objects have a 'defects' attribute which is a
+possibly empty list all of the defects found while creating the token. Defects
+may appear on any token in the tree, and a composite list of all defects in the
+subtree is available through the 'all_defects' attribute of any node. (For
+Terminal notes x.defects == x.all_defects.)
+
+Each object in a parse tree is called a 'token', and each has a 'token_type'
+attribute that gives the name from the RFC 5322 grammar that it represents.
+Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
+may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters.
+It is returned in place of lists of (ctext/quoted-pair) and
+(qtext/quoted-pair).
+
+XXX: provide complete list of token types.
+"""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import int, range, str, super, list
+
+import re
+from collections import namedtuple, OrderedDict
+
+from future.backports.urllib.parse import (unquote, unquote_to_bytes)
+from future.backports.email import _encoded_words as _ew
+from future.backports.email import errors
+from future.backports.email import utils
+
+#
+# Useful constants and functions
+#
+
+WSP = set(' \t')
+CFWS_LEADER = WSP | set('(')
+SPECIALS = set(r'()<>@,:;.\"[]')
+ATOM_ENDS = SPECIALS | WSP
+DOT_ATOM_ENDS = ATOM_ENDS - set('.')
+# '.', '"', and '(' do not end phrases in order to support obs-phrase
+PHRASE_ENDS = SPECIALS - set('."(')
+TSPECIALS = (SPECIALS | set('/?=')) - set('.')
+TOKEN_ENDS = TSPECIALS | WSP
+ASPECIALS = TSPECIALS | set("*'%")
+ATTRIBUTE_ENDS = ASPECIALS | WSP
+EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+
+def quote_string(value):
+ return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+
+#
+# Accumulator for header folding
+#
+
+class _Folded(object):
+
+ def __init__(self, maxlen, policy):
+ self.maxlen = maxlen
+ self.policy = policy
+ self.lastlen = 0
+ self.stickyspace = None
+ self.firstline = True
+ self.done = []
+ self.current = list() # uses l.clear()
+
+ def newline(self):
+ self.done.extend(self.current)
+ self.done.append(self.policy.linesep)
+ self.current.clear()
+ self.lastlen = 0
+
+ def finalize(self):
+ if self.current:
+ self.newline()
+
+ def __str__(self):
+ return ''.join(self.done)
+
+ def append(self, stoken):
+ self.current.append(stoken)
+
+ def append_if_fits(self, token, stoken=None):
+ if stoken is None:
+ stoken = str(token)
+ l = len(stoken)
+ if self.stickyspace is not None:
+ stickyspace_len = len(self.stickyspace)
+ if self.lastlen + stickyspace_len + l <= self.maxlen:
+ self.current.append(self.stickyspace)
+ self.lastlen += stickyspace_len
+ self.current.append(stoken)
+ self.lastlen += l
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if token.has_fws:
+ ws = token.pop_leading_fws()
+ if ws is not None:
+ self.stickyspace += str(ws)
+ stickyspace_len += len(ws)
+ token._fold(self)
+ return True
+ if stickyspace_len and l + 1 <= self.maxlen:
+ margin = self.maxlen - l
+ if 0 < margin < stickyspace_len:
+ trim = stickyspace_len - margin
+ self.current.append(self.stickyspace[:trim])
+ self.stickyspace = self.stickyspace[trim:]
+ stickyspace_len = trim
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.lastlen = l + stickyspace_len
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if not self.firstline:
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if self.lastlen + l <= self.maxlen:
+ self.current.append(stoken)
+ self.lastlen += l
+ return True
+ if l < self.maxlen:
+ self.newline()
+ self.current.append(stoken)
+ self.lastlen = l
+ return True
+ return False
+
+#
+# TokenList and its subclasses
+#
+
+class TokenList(list):
+
+ token_type = None
+
+ def __init__(self, *args, **kw):
+ super(TokenList, self).__init__(*args, **kw)
+ self.defects = []
+
+ def __str__(self):
+ return ''.join(str(x) for x in self)
+
+ def __repr__(self):
+ return '{}({})'.format(self.__class__.__name__,
+ super(TokenList, self).__repr__())
+
+ @property
+ def value(self):
+ return ''.join(x.value for x in self if x.value)
+
+ @property
+ def all_defects(self):
+ return sum((x.all_defects for x in self), self.defects)
+
+ #
+ # Folding API
+ #
+ # parts():
+ #
+ # return a list of objects that constitute the "higher level syntactic
+ # objects" specified by the RFC as the best places to fold a header line.
+ # The returned objects must include leading folding white space, even if
+ # this means mutating the underlying parse tree of the object. Each object
+ # is only responsible for returning *its* parts, and should not drill down
+ # to any lower level except as required to meet the leading folding white
+ # space constraint.
+ #
+ # _fold(folded):
+ #
+ # folded: the result accumulator. This is an instance of _Folded.
+ # (XXX: I haven't finished factoring this out yet, the folding code
+ # pretty much uses this as a state object.) When the folded.current
+ # contains as much text as will fit, the _fold method should call
+ # folded.newline.
+ # folded.lastlen: the current length of the test stored in folded.current.
+ # folded.maxlen: The maximum number of characters that may appear on a
+ # folded line. Differs from the policy setting in that "no limit" is
+ # represented by +inf, which means it can be used in the trivially
+ # logical fashion in comparisons.
+ #
+ # Currently no subclasses implement parts, and I think this will remain
+ # true. A subclass only needs to implement _fold when the generic version
+ # isn't sufficient. _fold will need to be implemented primarily when it is
+ # possible for encoded words to appear in the specialized token-list, since
+ # there is no generic algorithm that can know where exactly the encoded
+ # words are allowed. A _fold implementation is responsible for filling
+ # lines in the same general way that the top level _fold does. It may, and
+ # should, call the _fold method of sub-objects in a similar fashion to that
+ # of the top level _fold.
+ #
+ # XXX: I'm hoping it will be possible to factor the existing code further
+ # to reduce redundancy and make the logic clearer.
+
+ @property
+ def parts(self):
+ klass = self.__class__
+ this = list()
+ for token in self:
+ if token.startswith_fws():
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+ this.clear()
+ end_ws = token.pop_trailing_ws()
+ this.append(token)
+ if end_ws:
+ yield klass(this)
+ this = [end_ws]
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+
+ def startswith_fws(self):
+ return self[0].startswith_fws()
+
+ def pop_leading_fws(self):
+ if self[0].token_type == 'fws':
+ return self.pop(0)
+ return self[0].pop_leading_fws()
+
+ def pop_trailing_ws(self):
+ if self[-1].token_type == 'cfws':
+ return self.pop(-1)
+ return self[-1].pop_trailing_ws()
+
+ @property
+ def has_fws(self):
+ for part in self:
+ if part.has_fws:
+ return True
+ return False
+
+ def has_leading_comment(self):
+ return self[0].has_leading_comment()
+
+ @property
+ def comments(self):
+ comments = []
+ for token in self:
+ comments.extend(token.comments)
+ return comments
+
+ def fold(self, **_3to2kwargs):
+ # max_line_length 0/None means no limit, ie: infinitely long.
+ policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ maxlen = policy.max_line_length or float("+inf")
+ folded = _Folded(maxlen, policy)
+ self._fold(folded)
+ folded.finalize()
+ return str(folded)
+
+ def as_encoded_word(self, charset):
+ # This works only for things returned by 'parts', which include
+ # the leading fws, if any, that should be used.
+ res = []
+ ws = self.pop_leading_fws()
+ if ws:
+ res.append(ws)
+ trailer = self.pop(-1) if self[-1].token_type=='fws' else ''
+ res.append(_ew.encode(str(self), charset))
+ res.append(trailer)
+ return ''.join(res)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ for part in self:
+ res.append(part.cte_encode(charset, policy))
+ return ''.join(res)
+
+ def _fold(self, folded):
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ # XXX: this should be a policy setting
+ charset = 'utf-8'
+ tstr = part.cte_encode(charset, folded.policy)
+ tlen = len(tstr)
+ if folded.append_if_fits(part, tstr):
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ # Peel off the leading whitespace and make it sticky, to
+ # avoid infinite recursion.
+ folded.stickyspace = str(part.pop(0))
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part._fold(folded)
+ continue
+ # There are no fold points in this one; it is too long for a single
+ # line and can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+
+ def pprint(self, indent=''):
+ print('\n'.join(self._pp(indent='')))
+
+ def ppstr(self, indent=''):
+ return '\n'.join(self._pp(indent=''))
+
+ def _pp(self, indent=''):
+ yield '{}{}/{}('.format(
+ indent,
+ self.__class__.__name__,
+ self.token_type)
+ for token in self:
+ if not hasattr(token, '_pp'):
+ yield (indent + ' !! invalid element in token '
+ 'list: {!r}'.format(token))
+ else:
+ for line in token._pp(indent+' '):
+ yield line
+ if self.defects:
+ extra = ' Defects: {}'.format(self.defects)
+ else:
+ extra = ''
+ yield '{}){}'.format(indent, extra)
+
+
+class WhiteSpaceTokenList(TokenList):
+
+ @property
+ def value(self):
+ return ' '
+
+ @property
+ def comments(self):
+ return [x.content for x in self if x.token_type=='comment']
+
+
+class UnstructuredTokenList(TokenList):
+
+ token_type = 'unstructured'
+
+ def _fold(self, folded):
+ if any(x.token_type=='encoded-word' for x in self):
+ return self._fold_encoded(folded)
+ # Here we can have either a pure ASCII string that may or may not
+ # have surrogateescape encoded bytes, or a unicode string.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ is_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None:
+ # We've already done an EW, combine this one with it
+ # if there's room.
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ oldlastlen = sum(len(x) for x in folded.current[:last_ew])
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if oldlastlen + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = oldlastlen + lchunk
+ continue
+ tstr = part.as_encoded_word(charset)
+ is_ew = True
+ if folded.append_if_fits(part, tstr):
+ if is_ew:
+ last_ew = len(folded.current) - 1
+ continue
+ if is_ew or last_ew:
+ # It's too big to fit on the line, but since we've
+ # got encoded words we can use encoded word folding.
+ part._fold_as_ew(folded)
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ folded.stickyspace = str(ws)
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part.fold(folded)
+ continue
+ # It can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+ last_ew = None
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ if last_ew is None:
+ res.append(part.cte_encode(charset, policy))
+ last_ew = len(res)
+ else:
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res.append(tl.as_encoded_word())
+ return ''.join(res)
+
+
+class Phrase(TokenList):
+
+ token_type = 'phrase'
+
+ def _fold(self, folded):
+ # As with Unstructured, we can have pure ASCII with or without
+ # surrogateescape encoded bytes, or we could have unicode. But this
+ # case is more complicated, since we have to deal with the various
+ # sub-token types and how they can be composed in the face of
+ # unicode-that-needs-CTE-encoding, and the fact that if a token a
+ # comment that becomes a barrier across which we can't compose encoded
+ # words.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ has_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None and not part.has_leading_comment():
+ # We've already done an EW, let's see if we can combine
+ # this one with it. The last_ew logic ensures that all we
+ # have at this point is atoms, no comments or quoted
+ # strings. So we can treat the text between the last
+ # encoded word and the content of this token as
+ # unstructured text, and things will work correctly. But
+ # we have to strip off any trailing comment on this token
+ # first, and if it is a quoted string we have to pull out
+ # the content (we're encoding it, so it no longer needs to
+ # be quoted).
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if last_ew + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = sum(len(x) for x in folded.current)
+ continue
+ tstr = part.as_encoded_word(charset)
+ tlen = len(tstr)
+ has_ew = True
+ if folded.append_if_fits(part, tstr):
+ if has_ew and not part.comments:
+ last_ew = len(folded.current) - 1
+ elif part.comments or part.token_type == 'quoted-string':
+ # If a comment is involved we can't combine EWs. And if a
+ # quoted string is involved, it's not worth the effort to
+ # try to combine them.
+ last_ew = None
+ continue
+ part._fold(folded)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ is_ew = False
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ is_ew = True
+ if last_ew is None:
+ if not part.comments:
+ last_ew = len(res)
+ res.append(part.cte_encode(charset, policy))
+ elif not part.has_leading_comment():
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res[last_ew:] = [tl.as_encoded_word(charset)]
+ if part.comments or (not is_ew and part.token_type == 'quoted-string'):
+ last_ew = None
+ return ''.join(res)
+
+class Word(TokenList):
+
+ token_type = 'word'
+
+
+class CFWSList(WhiteSpaceTokenList):
+
+ token_type = 'cfws'
+
+ def has_leading_comment(self):
+ return bool(self.comments)
+
+
+class Atom(TokenList):
+
+ token_type = 'atom'
+
+
+class Token(TokenList):
+
+ token_type = 'token'
+
+
+class EncodedWord(TokenList):
+
+ token_type = 'encoded-word'
+ cte = None
+ charset = None
+ lang = None
+
+ @property
+ def encoded(self):
+ if self.cte is not None:
+ return self.cte
+ _ew.encode(str(self), self.charset)
+
+
+
+class QuotedString(TokenList):
+
+ token_type = 'quoted-string'
+
+ @property
+ def content(self):
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ return x.value
+
+ @property
+ def quoted_value(self):
+ res = []
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ res.append(str(x))
+ else:
+ res.append(x.value)
+ return ''.join(res)
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type == 'bare-quoted-string':
+ return token.value
+
+
+class BareQuotedString(QuotedString):
+
+ token_type = 'bare-quoted-string'
+
+ def __str__(self):
+ return quote_string(''.join(str(x) for x in self))
+
+ @property
+ def value(self):
+ return ''.join(str(x) for x in self)
+
+
+class Comment(WhiteSpaceTokenList):
+
+ token_type = 'comment'
+
+ def __str__(self):
+ return ''.join(sum([
+ ["("],
+ [self.quote(x) for x in self],
+ [")"],
+ ], []))
+
+ def quote(self, value):
+ if value.token_type == 'comment':
+ return str(value)
+ return str(value).replace('\\', '\\\\').replace(
+ '(', '\(').replace(
+ ')', '\)')
+
+ @property
+ def content(self):
+ return ''.join(str(x) for x in self)
+
+ @property
+ def comments(self):
+ return [self.content]
+
+class AddressList(TokenList):
+
+ token_type = 'address-list'
+
+ @property
+ def addresses(self):
+ return [x for x in self if x.token_type=='address']
+
+ @property
+ def mailboxes(self):
+ return sum((x.mailboxes
+ for x in self if x.token_type=='address'), [])
+
+ @property
+ def all_mailboxes(self):
+ return sum((x.all_mailboxes
+ for x in self if x.token_type=='address'), [])
+
+
+class Address(TokenList):
+
+ token_type = 'address'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'group':
+ return self[0].display_name
+
+ @property
+ def mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return [self[0]]
+ return self[0].all_mailboxes
+
+class MailboxList(TokenList):
+
+ token_type = 'mailbox-list'
+
+ @property
+ def mailboxes(self):
+ return [x for x in self if x.token_type=='mailbox']
+
+ @property
+ def all_mailboxes(self):
+ return [x for x in self
+ if x.token_type in ('mailbox', 'invalid-mailbox')]
+
+
+class GroupList(TokenList):
+
+ token_type = 'group-list'
+
+ @property
+ def mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].all_mailboxes
+
+
+class Group(TokenList):
+
+ token_type = "group"
+
+ @property
+ def mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].all_mailboxes
+
+ @property
+ def display_name(self):
+ return self[0].display_name
+
+
+class NameAddr(TokenList):
+
+ token_type = 'name-addr'
+
+ @property
+ def display_name(self):
+ if len(self) == 1:
+ return None
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[-1].local_part
+
+ @property
+ def domain(self):
+ return self[-1].domain
+
+ @property
+ def route(self):
+ return self[-1].route
+
+ @property
+ def addr_spec(self):
+ return self[-1].addr_spec
+
+
+class AngleAddr(TokenList):
+
+ token_type = 'angle-addr'
+
+ @property
+ def local_part(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.local_part
+
+ @property
+ def domain(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.domain
+
+ @property
+ def route(self):
+ for x in self:
+ if x.token_type == 'obs-route':
+ return x.domains
+
+ @property
+ def addr_spec(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.addr_spec
+ else:
+ return '<>'
+
+
+class ObsRoute(TokenList):
+
+ token_type = 'obs-route'
+
+ @property
+ def domains(self):
+ return [x.domain for x in self if x.token_type == 'domain']
+
+
+class Mailbox(TokenList):
+
+ token_type = 'mailbox'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ return self[0].domain
+
+ @property
+ def route(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].route
+
+ @property
+ def addr_spec(self):
+ return self[0].addr_spec
+
+
+class InvalidMailbox(TokenList):
+
+ token_type = 'invalid-mailbox'
+
+ @property
+ def display_name(self):
+ return None
+
+ local_part = domain = route = addr_spec = display_name
+
+
+class Domain(TokenList):
+
+ token_type = 'domain'
+
+ @property
+ def domain(self):
+ return ''.join(super(Domain, self).value.split())
+
+
+class DotAtom(TokenList):
+
+ token_type = 'dot-atom'
+
+
+class DotAtomText(TokenList):
+
+ token_type = 'dot-atom-text'
+
+
+class AddrSpec(TokenList):
+
+ token_type = 'addr-spec'
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ if len(self) < 3:
+ return None
+ return self[-1].domain
+
+ @property
+ def value(self):
+ if len(self) < 3:
+ return self[0].value
+ return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
+
+ @property
+ def addr_spec(self):
+ nameset = set(self.local_part)
+ if len(nameset) > len(nameset-DOT_ATOM_ENDS):
+ lp = quote_string(self.local_part)
+ else:
+ lp = self.local_part
+ if self.domain is not None:
+ return lp + '@' + self.domain
+ return lp
+
+
+class ObsLocalPart(TokenList):
+
+ token_type = 'obs-local-part'
+
+
+class DisplayName(Phrase):
+
+ token_type = 'display-name'
+
+ @property
+ def display_name(self):
+ res = TokenList(self)
+ if res[0].token_type == 'cfws':
+ res.pop(0)
+ else:
+ if res[0][0].token_type == 'cfws':
+ res[0] = TokenList(res[0][1:])
+ if res[-1].token_type == 'cfws':
+ res.pop()
+ else:
+ if res[-1][-1].token_type == 'cfws':
+ res[-1] = TokenList(res[-1][:-1])
+ return res.value
+
+ @property
+ def value(self):
+ quote = False
+ if self.defects:
+ quote = True
+ else:
+ for x in self:
+ if x.token_type == 'quoted-string':
+ quote = True
+ if quote:
+ pre = post = ''
+ if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
+ pre = ' '
+ if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
+ post = ' '
+ return pre+quote_string(self.display_name)+post
+ else:
+ return super(DisplayName, self).value
+
+
+class LocalPart(TokenList):
+
+ token_type = 'local-part'
+
+ @property
+ def value(self):
+ if self[0].token_type == "quoted-string":
+ return self[0].quoted_value
+ else:
+ return self[0].value
+
+ @property
+ def local_part(self):
+ # Strip whitespace from front, back, and around dots.
+ res = [DOT]
+ last = DOT
+ last_is_tl = False
+ for tok in self[0] + [DOT]:
+ if tok.token_type == 'cfws':
+ continue
+ if (last_is_tl and tok.token_type == 'dot' and
+ last[-1].token_type == 'cfws'):
+ res[-1] = TokenList(last[:-1])
+ is_tl = isinstance(tok, TokenList)
+ if (is_tl and last.token_type == 'dot' and
+ tok[0].token_type == 'cfws'):
+ res.append(TokenList(tok[1:]))
+ else:
+ res.append(tok)
+ last = res[-1]
+ last_is_tl = is_tl
+ res = TokenList(res[1:-1])
+ return res.value
+
+
+class DomainLiteral(TokenList):
+
+ token_type = 'domain-literal'
+
+ @property
+ def domain(self):
+ return ''.join(super(DomainLiteral, self).value.split())
+
+ @property
+ def ip(self):
+ for x in self:
+ if x.token_type == 'ptext':
+ return x.value
+
+
+class MIMEVersion(TokenList):
+
+ token_type = 'mime-version'
+ major = None
+ minor = None
+
+
+class Parameter(TokenList):
+
+ token_type = 'parameter'
+ sectioned = False
+ extended = False
+ charset = 'us-ascii'
+
+ @property
+ def section_number(self):
+ # Because the first token, the attribute (name) eats CFWS, the second
+ # token is always the section if there is one.
+ return self[1].number if self.sectioned else 0
+
+ @property
+ def param_value(self):
+ # This is part of the "handle quoted extended parameters" hack.
+ for token in self:
+ if token.token_type == 'value':
+ return token.stripped_value
+ if token.token_type == 'quoted-string':
+ for token in token:
+ if token.token_type == 'bare-quoted-string':
+ for token in token:
+ if token.token_type == 'value':
+ return token.stripped_value
+ return ''
+
+
+class InvalidParameter(Parameter):
+
+ token_type = 'invalid-parameter'
+
+
+class Attribute(TokenList):
+
+ token_type = 'attribute'
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type.endswith('attrtext'):
+ return token.value
+
+class Section(TokenList):
+
+ token_type = 'section'
+ number = None
+
+
+class Value(TokenList):
+
+ token_type = 'value'
+
+ @property
+ def stripped_value(self):
+ token = self[0]
+ if token.token_type == 'cfws':
+ token = self[1]
+ if token.token_type.endswith(
+ ('quoted-string', 'attribute', 'extended-attribute')):
+ return token.stripped_value
+ return self.value
+
+
+class MimeParameters(TokenList):
+
+ token_type = 'mime-parameters'
+
+ @property
+ def params(self):
+ # The RFC specifically states that the ordering of parameters is not
+ # guaranteed and may be reordered by the transport layer. So we have
+ # to assume the RFC 2231 pieces can come in any order. However, we
+ # output them in the order that we first see a given name, which gives
+ # us a stable __str__.
+ params = OrderedDict()
+ for token in self:
+ if not token.token_type.endswith('parameter'):
+ continue
+ if token[0].token_type != 'attribute':
+ continue
+ name = token[0].value.strip()
+ if name not in params:
+ params[name] = []
+ params[name].append((token.section_number, token))
+ for name, parts in params.items():
+ parts = sorted(parts)
+ # XXX: there might be more recovery we could do here if, for
+ # example, this is really a case of a duplicate attribute name.
+ value_parts = []
+ charset = parts[0][1].charset
+ for i, (section_number, param) in enumerate(parts):
+ if section_number != i:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent multipart parameter numbering"))
+ value = param.param_value
+ if param.extended:
+ try:
+ value = unquote_to_bytes(value)
+ except UnicodeEncodeError:
+ # source had surrogate escaped bytes. What we do now
+ # is a bit of an open question. I'm not sure this is
+ # the best choice, but it is what the old algorithm did
+ value = unquote(value, encoding='latin-1')
+ else:
+ try:
+ value = value.decode(charset, 'surrogateescape')
+ except LookupError:
+ # XXX: there should really be a custom defect for
+ # unknown character set to make it easy to find,
+ # because otherwise unknown charset is a silent
+ # failure.
+ value = value.decode('us-ascii', 'surrogateescape')
+ if utils._has_surrogates(value):
+ param.defects.append(errors.UndecodableBytesDefect())
+ value_parts.append(value)
+ value = ''.join(value_parts)
+ yield name, value
+
+ def __str__(self):
+ params = []
+ for name, value in self.params:
+ if value:
+ params.append('{}={}'.format(name, quote_string(value)))
+ else:
+ params.append(name)
+ params = '; '.join(params)
+ return ' ' + params if params else ''
+
+
+class ParameterizedHeaderValue(TokenList):
+
+ @property
+ def params(self):
+ for token in reversed(self):
+ if token.token_type == 'mime-parameters':
+ return token.params
+ return {}
+
+ @property
+ def parts(self):
+ if self and self[-1].token_type == 'mime-parameters':
+ # We don't want to start a new line if all of the params don't fit
+ # after the value, so unwrap the parameter list.
+ return TokenList(self[:-1] + self[-1])
+ return TokenList(self).parts
+
+
+class ContentType(ParameterizedHeaderValue):
+
+ token_type = 'content-type'
+ maintype = 'text'
+ subtype = 'plain'
+
+
+class ContentDisposition(ParameterizedHeaderValue):
+
+ token_type = 'content-disposition'
+ content_disposition = None
+
+
+class ContentTransferEncoding(TokenList):
+
+ token_type = 'content-transfer-encoding'
+ cte = '7bit'
+
+
+class HeaderLabel(TokenList):
+
+ token_type = 'header-label'
+
+
+class Header(TokenList):
+
+ token_type = 'header'
+
+ def _fold(self, folded):
+ folded.append(str(self.pop(0)))
+ folded.lastlen = len(folded.current[0])
+ # The first line of the header is different from all others: we don't
+ # want to start a new object on a new line if it has any fold points in
+ # it that would allow part of it to be on the first header line.
+ # Further, if the first fold point would fit on the new line, we want
+ # to do that, but if it doesn't we want to put it on the first line.
+ # Folded supports this via the stickyspace attribute. If this
+ # attribute is not None, it does the special handling.
+ folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else ''
+ rest = self.pop(0)
+ if self:
+ raise ValueError("Malformed Header token list")
+ rest._fold(folded)
+
+
+#
+# Terminal classes and instances
+#
+
+class Terminal(str):
+
+ def __new__(cls, value, token_type):
+ self = super(Terminal, cls).__new__(cls, value)
+ self.token_type = token_type
+ self.defects = []
+ return self
+
+ def __repr__(self):
+ return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__())
+
+ @property
+ def all_defects(self):
+ return list(self.defects)
+
+ def _pp(self, indent=''):
+ return ["{}{}/{}({}){}".format(
+ indent,
+ self.__class__.__name__,
+ self.token_type,
+ super(Terminal, self).__repr__(),
+ '' if not self.defects else ' {}'.format(self.defects),
+ )]
+
+ def cte_encode(self, charset, policy):
+ value = str(self)
+ try:
+ value.encode('us-ascii')
+ return value
+ except UnicodeEncodeError:
+ return _ew.encode(value, charset)
+
+ def pop_trailing_ws(self):
+ # This terminates the recursion.
+ return None
+
+ def pop_leading_fws(self):
+ # This terminates the recursion.
+ return None
+
+ @property
+ def comments(self):
+ return []
+
+ def has_leading_comment(self):
+ return False
+
+ def __getnewargs__(self):
+ return(str(self), self.token_type)
+
+
+class WhiteSpaceTerminal(Terminal):
+
+ @property
+ def value(self):
+ return ' '
+
+ def startswith_fws(self):
+ return True
+
+ has_fws = True
+
+
+class ValueTerminal(Terminal):
+
+ @property
+ def value(self):
+ return self
+
+ def startswith_fws(self):
+ return False
+
+ has_fws = False
+
+ def as_encoded_word(self, charset):
+ return _ew.encode(str(self), charset)
+
+
+class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
+
+ @property
+ def value(self):
+ return ''
+
+ @property
+ def encoded(self):
+ return self[:]
+
+ def __str__(self):
+ return ''
+
+ has_fws = True
+
+
+# XXX these need to become classes and used as instances so
+# that a program can't change them in a parse tree and screw
+# up other parse trees. Maybe should have tests for that, too.
+DOT = ValueTerminal('.', 'dot')
+ListSeparator = ValueTerminal(',', 'list-separator')
+RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
+
+#
+# Parser
+#
+
+"""Parse strings according to RFC822/2047/2822/5322 rules.
+
+This is a stateless parser. Each get_XXX function accepts a string and
+returns either a Terminal or a TokenList representing the RFC object named
+by the method and a string containing the remaining unparsed characters
+from the input. Thus a parser method consumes the next syntactic construct
+of a given type and returns a token representing the construct plus the
+unparsed remainder of the input string.
+
+For example, if the first element of a structured header is a 'phrase',
+then:
+
+ phrase, value = get_phrase(value)
+
+returns the complete phrase from the start of the string value, plus any
+characters left in the string after the phrase is removed.
+
+"""
+
+_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
+_non_atom_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
+_non_token_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
+ '\\','\\\\').replace(']','\]'))).match
+
+def _validate_xtext(xtext):
+ """If input token contains ASCII non-printables, register a defect."""
+
+ non_printables = _non_printable_finder(xtext)
+ if non_printables:
+ xtext.defects.append(errors.NonPrintableDefect(non_printables))
+ if utils._has_surrogates(xtext):
+ xtext.defects.append(errors.UndecodableBytesDefect(
+ "Non-ASCII characters found in header token"))
+
+def _get_ptext_to_endchars(value, endchars):
+ """Scan printables/quoted-pairs until endchars and return unquoted ptext.
+
+ This function turns a run of qcontent, ccontent-without-comments, or
+ dtext-with-quoted-printables into a single string by unquoting any
+ quoted printables. It returns the string, the remaining value, and
+ a flag that is True iff there were any quoted printables decoded.
+
+ """
+ _3to2list = list(_wsp_splitter(value, 1))
+ fragment, remainder, = _3to2list[:1] + [_3to2list[1:]]
+ vchars = []
+ escape = False
+ had_qp = False
+ for pos in range(len(fragment)):
+ if fragment[pos] == '\\':
+ if escape:
+ escape = False
+ had_qp = True
+ else:
+ escape = True
+ continue
+ if escape:
+ escape = False
+ elif fragment[pos] in endchars:
+ break
+ vchars.append(fragment[pos])
+ else:
+ pos = pos + 1
+ return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
+
+def _decode_ew_run(value):
+ """ Decode a run of RFC2047 encoded words.
+
+ _decode_ew_run(value) -> (text, value, defects)
+
+ Scans the supplied value for a run of tokens that look like they are RFC
+ 2047 encoded words, decodes those words into text according to RFC 2047
+ rules (whitespace between encoded words is discarded), and returns the text
+ and the remaining value (including any leading whitespace on the remaining
+ value), as well as a list of any defects encountered while decoding. The
+ input value may not have any leading whitespace.
+
+ """
+ res = []
+ defects = []
+ last_ws = ''
+ while value:
+ try:
+ tok, ws, value = _wsp_splitter(value, 1)
+ except ValueError:
+ tok, ws, value = value, '', ''
+ if not (tok.startswith('=?') and tok.endswith('?=')):
+ return ''.join(res), last_ws + tok + ws + value, defects
+ text, charset, lang, new_defects = _ew.decode(tok)
+ res.append(text)
+ defects.extend(new_defects)
+ last_ws = ws
+ return ''.join(res), last_ws, defects
+
+def get_fws(value):
+ """FWS = 1*WSP
+
+ This isn't the RFC definition. We're using fws to represent tokens where
+ folding can be done, but when we are parsing the *un*folding has already
+ been done so we don't need to watch out for CRLF.
+
+ """
+ newvalue = value.lstrip()
+ fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
+ return fws, newvalue
+
+def get_encoded_word(value):
+ """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+
+ """
+ ew = EncodedWord()
+ if not value.startswith('=?'):
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ _3to2list1 = list(value[2:].split('?=', 1))
+ tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]]
+ if tok == value[2:]:
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ remstr = ''.join(remainder)
+ if remstr[:2].isdigit():
+ _3to2list3 = list(remstr.split('?=', 1))
+ rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]]
+ tok = tok + '?=' + rest
+ if len(tok.split()) > 1:
+ ew.defects.append(errors.InvalidHeaderDefect(
+ "whitespace inside encoded word"))
+ ew.cte = value
+ value = ''.join(remainder)
+ try:
+ text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
+ except ValueError:
+ raise errors.HeaderParseError(
+ "encoded word format invalid: '{}'".format(ew.cte))
+ ew.charset = charset
+ ew.lang = lang
+ ew.defects.extend(defects)
+ while text:
+ if text[0] in WSP:
+ token, text = get_fws(text)
+ ew.append(token)
+ continue
+ _3to2list5 = list(_wsp_splitter(text, 1))
+ chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]]
+ vtext = ValueTerminal(chars, 'vtext')
+ _validate_xtext(vtext)
+ ew.append(vtext)
+ text = ''.join(remainder)
+ return ew, value
+
+def get_unstructured(value):
+ """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
+ obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
+ obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
+
+ obs-NO-WS-CTL is control characters except WSP/CR/LF.
+
+ So, basically, we have printable runs, plus control characters or nulls in
+ the obsolete syntax, separated by whitespace. Since RFC 2047 uses the
+ obsolete syntax in its specification, but requires whitespace on either
+ side of the encoded words, I can see no reason to need to separate the
+ non-printable-non-whitespace from the printable runs if they occur, so we
+ parse this into xtext tokens separated by WSP tokens.
+
+ Because an 'unstructured' value must by definition constitute the entire
+ value, this 'get' routine does not return a remaining value, only the
+ parsed TokenList.
+
+ """
+ # XXX: but what about bare CR and LF? They might signal the start or
+ # end of an encoded word. YAGNI for now, since out current parsers
+ # will never send us strings with bard CR or LF.
+
+ unstructured = UnstructuredTokenList()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ unstructured.append(token)
+ continue
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ pass
+ else:
+ have_ws = True
+ if len(unstructured) > 0:
+ if unstructured[-1].token_type != 'fws':
+ unstructured.defects.append(errors.InvalidHeaderDefect(
+ "missing whitespace before encoded word"))
+ have_ws = False
+ if have_ws and len(unstructured) > 1:
+ if unstructured[-2].token_type == 'encoded-word':
+ unstructured[-1] = EWWhiteSpaceTerminal(
+ unstructured[-1], 'fws')
+ unstructured.append(token)
+ continue
+ _3to2list7 = list(_wsp_splitter(value, 1))
+ tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]]
+ vtext = ValueTerminal(tok, 'vtext')
+ _validate_xtext(vtext)
+ unstructured.append(vtext)
+ value = ''.join(remainder)
+ return unstructured
+
+def get_qp_ctext(value):
+ """ctext = <printable ascii except \ ( )>
+
+ This is not the RFC ctext, since we are handling nested comments in comment
+ and unquoting quoted-pairs here. We allow anything except the '()'
+ characters, but if we find any ASCII other than the RFC defined printable
+ ASCII an NonPrintableDefect is added to the token's defects list. Since
+ quoted pairs are converted to their unquoted values, what is returned is
+ a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
+ is ' '.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '()')
+ ptext = WhiteSpaceTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_qcontent(value):
+ """qcontent = qtext / quoted-pair
+
+ We allow anything except the DQUOTE character, but if we find any ASCII
+ other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Any quoted pairs are converted to their
+ unquoted values, so what is returned is a 'ptext' token. In this case it
+ is a ValueTerminal.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '"')
+ ptext = ValueTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_atext(value):
+ """atext = <matches _atext_matcher>
+
+ We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
+ the token's defects list if we find non-atext characters.
+ """
+ m = _non_atom_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected atext but found '{}'".format(value))
+ atext = m.group()
+ value = value[len(atext):]
+ atext = ValueTerminal(atext, 'atext')
+ _validate_xtext(atext)
+ return atext, value
+
+def get_bare_quoted_string(value):
+ """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+
+ A quoted-string without the leading or trailing white space. Its
+ value is the text between the quote marks, with whitespace
+ preserved and quoted pairs decoded.
+ """
+ if value[0] != '"':
+ raise errors.HeaderParseError(
+ "expected '\"' but found '{}'".format(value))
+ bare_quoted_string = BareQuotedString()
+ value = value[1:]
+ while value and value[0] != '"':
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ bare_quoted_string.append(token)
+ if not value:
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside quoted string"))
+ return bare_quoted_string, value
+ return bare_quoted_string, value[1:]
+
+def get_comment(value):
+ """comment = "(" *([FWS] ccontent) [FWS] ")"
+ ccontent = ctext / quoted-pair / comment
+
+ We handle nested comments here, and quoted-pair in our qp-ctext routine.
+ """
+ if value and value[0] != '(':
+ raise errors.HeaderParseError(
+ "expected '(' but found '{}'".format(value))
+ comment = Comment()
+ value = value[1:]
+ while value and value[0] != ")":
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[0] == '(':
+ token, value = get_comment(value)
+ else:
+ token, value = get_qp_ctext(value)
+ comment.append(token)
+ if not value:
+ comment.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside comment"))
+ return comment, value
+ return comment, value[1:]
+
+def get_cfws(value):
+ """CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+ """
+ cfws = CFWSList()
+ while value and value[0] in CFWS_LEADER:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_comment(value)
+ cfws.append(token)
+ return cfws, value
+
+def get_quoted_string(value):
+ """quoted-string = [CFWS] <bare-quoted-string> [CFWS]
+
+ 'bare-quoted-string' is an intermediate class defined by this
+ parser and not by the RFC grammar. It is the quoted string
+ without any attached CFWS.
+ """
+ quoted_string = QuotedString()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ token, value = get_bare_quoted_string(value)
+ quoted_string.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ return quoted_string, value
+
+def get_atom(value):
+ """atom = [CFWS] 1*atext [CFWS]
+
+ """
+ atom = Atom()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ if value and value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError(
+ "expected atom but found '{}'".format(value))
+ token, value = get_atext(value)
+ atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ return atom, value
+
+def get_dot_atom_text(value):
+ """ dot-text = 1*atext *("." 1*atext)
+
+ """
+ dot_atom_text = DotAtomText()
+ if not value or value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError("expected atom at a start of "
+ "dot-atom-text but found '{}'".format(value))
+ while value and value[0] not in ATOM_ENDS:
+ token, value = get_atext(value)
+ dot_atom_text.append(token)
+ if value and value[0] == '.':
+ dot_atom_text.append(DOT)
+ value = value[1:]
+ if dot_atom_text[-1] is DOT:
+ raise errors.HeaderParseError("expected atom at end of dot-atom-text "
+ "but found '{}'".format('.'+value))
+ return dot_atom_text, value
+
+def get_dot_atom(value):
+ """ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ """
+ dot_atom = DotAtom()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ token, value = get_dot_atom_text(value)
+ dot_atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ return dot_atom, value
+
+def get_word(value):
+ """word = atom / quoted-string
+
+ Either atom or quoted-string may start with CFWS. We have to peel off this
+ CFWS first to determine which type of word to parse. Afterward we splice
+ the leading CFWS, if any, into the parsed sub-token.
+
+ If neither an atom or a quoted-string is found before the next special, a
+ HeaderParseError is raised.
+
+ The token returned is either an Atom or a QuotedString, as appropriate.
+ This means the 'word' level of the formal grammar is not represented in the
+ parse tree; this is because having that extra layer when manipulating the
+ parse tree is more confusing than it is helpful.
+
+ """
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ else:
+ leader = None
+ if value[0]=='"':
+ token, value = get_quoted_string(value)
+ elif value[0] in SPECIALS:
+ raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
+ "but found '{}'".format(value))
+ else:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ return token, value
+
+def get_phrase(value):
+ """ phrase = 1*word / obs-phrase
+ obs-phrase = word *(word / "." / CFWS)
+
+ This means a phrase can be a sequence of words, periods, and CFWS in any
+ order as long as it starts with at least one word. If anything other than
+ words is detected, an ObsoleteHeaderDefect is added to the token's defect
+ list. We also accept a phrase that starts with CFWS followed by a dot;
+ this is registered as an InvalidHeaderDefect, since it is not supported by
+ even the obsolete grammar.
+
+ """
+ phrase = Phrase()
+ try:
+ token, value = get_word(value)
+ phrase.append(token)
+ except errors.HeaderParseError:
+ phrase.defects.append(errors.InvalidHeaderDefect(
+ "phrase does not start with word"))
+ while value and value[0] not in PHRASE_ENDS:
+ if value[0]=='.':
+ phrase.append(DOT)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "period in 'phrase'"))
+ value = value[1:]
+ else:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "comment found without atom"))
+ else:
+ raise
+ phrase.append(token)
+ return phrase, value
+
+def get_local_part(value):
+ """ local-part = dot-atom / quoted-string / obs-local-part
+
+ """
+ local_part = LocalPart()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected local-part but found '{}'".format(value))
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] != '\\' and value[0] in PHRASE_ENDS:
+ raise
+ token = TokenList()
+ if leader is not None:
+ token[:0] = [leader]
+ local_part.append(token)
+ if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ obs_local_part, value = get_obs_local_part(str(local_part) + value)
+ if obs_local_part.token_type == 'invalid-obs-local-part':
+ local_part.defects.append(errors.InvalidHeaderDefect(
+ "local-part is not dot-atom, quoted-string, or obs-local-part"))
+ else:
+ local_part.defects.append(errors.ObsoleteHeaderDefect(
+ "local-part is not a dot-atom (contains CFWS)"))
+ local_part[0] = obs_local_part
+ try:
+ local_part.value.encode('ascii')
+ except UnicodeEncodeError:
+ local_part.defects.append(errors.NonASCIILocalPartDefect(
+ "local-part contains non-ASCII characters)"))
+ return local_part, value
+
+def get_obs_local_part(value):
+ """ obs-local-part = word *("." word)
+ """
+ obs_local_part = ObsLocalPart()
+ last_non_ws_was_dot = False
+ while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ if value[0] == '.':
+ if last_non_ws_was_dot:
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "invalid repeated '.'"))
+ obs_local_part.append(DOT)
+ last_non_ws_was_dot = True
+ value = value[1:]
+ continue
+ elif value[0]=='\\':
+ obs_local_part.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "'\\' character outside of quoted-string/ccontent"))
+ last_non_ws_was_dot = False
+ continue
+ if obs_local_part and obs_local_part[-1].token_type != 'dot':
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "missing '.' between words"))
+ try:
+ token, value = get_word(value)
+ last_non_ws_was_dot = False
+ except errors.HeaderParseError:
+ if value[0] not in CFWS_LEADER:
+ raise
+ token, value = get_cfws(value)
+ obs_local_part.append(token)
+ if (obs_local_part[0].token_type == 'dot' or
+ obs_local_part[0].token_type=='cfws' and
+ obs_local_part[1].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid leading '.' in local part"))
+ if (obs_local_part[-1].token_type == 'dot' or
+ obs_local_part[-1].token_type=='cfws' and
+ obs_local_part[-2].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid trailing '.' in local part"))
+ if obs_local_part.defects:
+ obs_local_part.token_type = 'invalid-obs-local-part'
+ return obs_local_part, value
+
+def get_dtext(value):
+ """ dtext = <printable ascii except \ [ ]> / obs-dtext
+ obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+ We allow anything except the excluded characters, but if we find any
+ ASCII other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Quoted pairs are converted to their
+ unquoted values, so what is returned is a ptext token, in this case a
+ ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
+ added to the returned token's defect list.
+
+ """
+ ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
+ ptext = ValueTerminal(ptext, 'ptext')
+ if had_qp:
+ ptext.defects.append(errors.ObsoleteHeaderDefect(
+ "quoted printable found in domain-literal"))
+ _validate_xtext(ptext)
+ return ptext, value
+
+def _check_for_early_dl_end(value, domain_literal):
+ if value:
+ return False
+ domain_literal.append(errors.InvalidHeaderDefect(
+ "end of input inside domain-literal"))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ return True
+
+def get_domain_literal(value):
+ """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+ """
+ domain_literal = DomainLiteral()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ if not value:
+ raise errors.HeaderParseError("expected domain-literal")
+ if value[0] != '[':
+ raise errors.HeaderParseError("expected '[' at start of domain-literal "
+ "but found '{}'".format(value))
+ value = value[1:]
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ token, value = get_dtext(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] != ']':
+ raise errors.HeaderParseError("expected ']' at end of domain-literal "
+ "but found '{}'".format(value))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ return domain_literal, value
+
+def get_domain(value):
+ """ domain = dot-atom / domain-literal / obs-domain
+ obs-domain = atom *("." atom))
+
+ """
+ domain = Domain()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected domain but found '{}'".format(value))
+ if value[0] == '[':
+ token, value = get_domain_literal(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ return domain, value
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ if value and value[0] == '.':
+ domain.defects.append(errors.ObsoleteHeaderDefect(
+ "domain is not a dot-atom (contains CFWS)"))
+ if domain[0].token_type == 'dot-atom':
+ domain[:] = domain[0]
+ while value and value[0] == '.':
+ domain.append(DOT)
+ token, value = get_atom(value[1:])
+ domain.append(token)
+ return domain, value
+
+def get_addr_spec(value):
+ """ addr-spec = local-part "@" domain
+
+ """
+ addr_spec = AddrSpec()
+ token, value = get_local_part(value)
+ addr_spec.append(token)
+ if not value or value[0] != '@':
+ addr_spec.defects.append(errors.InvalidHeaderDefect(
+ "add-spec local part with no domain"))
+ return addr_spec, value
+ addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
+ token, value = get_domain(value[1:])
+ addr_spec.append(token)
+ return addr_spec, value
+
+def get_obs_route(value):
+ """ obs-route = obs-domain-list ":"
+ obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
+
+ Returns an obs-route token with the appropriate sub-tokens (that is,
+ there is no obs-domain-list in the parse tree).
+ """
+ obs_route = ObsRoute()
+ while value and (value[0]==',' or value[0] in CFWS_LEADER):
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ elif value[0] == ',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value or value[0] != '@':
+ raise errors.HeaderParseError(
+ "expected obs-route domain but found '{}'".format(value))
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ while value and value[0]==',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value:
+ break
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ if value[0] == '@':
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ if not value:
+ raise errors.HeaderParseError("end of header while parsing obs-route")
+ if value[0] != ':':
+ raise errors.HeaderParseError( "expected ':' marking end of "
+ "obs-route but found '{}'".format(value))
+ obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
+ return obs_route, value[1:]
+
+def get_angle_addr(value):
+ """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
+ obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+ """
+ angle_addr = AngleAddr()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ if not value or value[0] != '<':
+ raise errors.HeaderParseError(
+ "expected angle-addr but found '{}'".format(value))
+ angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
+ value = value[1:]
+ # Although it is not legal per RFC5322, SMTP uses '<>' in certain
+ # circumstances.
+ if value[0] == '>':
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "null addr-spec in angle-addr"))
+ value = value[1:]
+ return angle_addr, value
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_obs_route(value)
+ angle_addr.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete route specification in angle-addr"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected addr-spec or obs-route but found '{}'".format(value))
+ angle_addr.append(token)
+ token, value = get_addr_spec(value)
+ angle_addr.append(token)
+ if value and value[0] == '>':
+ value = value[1:]
+ else:
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing '>' on angle-addr"))
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ return angle_addr, value
+
+def get_display_name(value):
+ """ display-name = phrase
+
+ Because this is simply a name-rule, we don't return a display-name
+ token containing a phrase, but rather a display-name token with
+ the content of the phrase.
+
+ """
+ display_name = DisplayName()
+ token, value = get_phrase(value)
+ display_name.extend(token[:])
+ display_name.defects = token.defects[:]
+ return display_name, value
+
+
+def get_name_addr(value):
+ """ name-addr = [display-name] angle-addr
+
+ """
+ name_addr = NameAddr()
+ # Both the optional display name and the angle-addr can start with cfws.
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(leader))
+ if value[0] != '<':
+ if value[0] in PHRASE_ENDS:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(value))
+ token, value = get_display_name(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(token))
+ if leader is not None:
+ token[0][:0] = [leader]
+ leader = None
+ name_addr.append(token)
+ token, value = get_angle_addr(value)
+ if leader is not None:
+ token[:0] = [leader]
+ name_addr.append(token)
+ return name_addr, value
+
+def get_mailbox(value):
+ """ mailbox = name-addr / addr-spec
+
+ """
+ # The only way to figure out if we are dealing with a name-addr or an
+ # addr-spec is to try parsing each one.
+ mailbox = Mailbox()
+ try:
+ token, value = get_name_addr(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected mailbox but found '{}'".format(value))
+ if any(isinstance(x, errors.InvalidHeaderDefect)
+ for x in token.all_defects):
+ mailbox.token_type = 'invalid-mailbox'
+ mailbox.append(token)
+ return mailbox, value
+
+def get_invalid_mailbox(value, endchars):
+ """ Read everything up to one of the chars in endchars.
+
+ This is outside the formal grammar. The InvalidMailbox TokenList that is
+ returned acts like a Mailbox, but the data attributes are None.
+
+ """
+ invalid_mailbox = InvalidMailbox()
+ while value and value[0] not in endchars:
+ if value[0] in PHRASE_ENDS:
+ invalid_mailbox.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_mailbox.append(token)
+ return invalid_mailbox, value
+
+def get_mailbox_list(value):
+ """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+ obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+ For this routine we go outside the formal grammar in order to improve error
+ handling. We recognize the end of the mailbox list only at the end of the
+ value or at a ';' (the group terminator). This is so that we can turn
+ invalid mailboxes into InvalidMailbox tokens and continue parsing any
+ remaining valid mailboxes. We also allow all mailbox entries to be null,
+ and this condition is handled appropriately at a higher level.
+
+ """
+ mailbox_list = MailboxList()
+ while value and value[0] != ';':
+ try:
+ token, value = get_mailbox(value)
+ mailbox_list.append(token)
+ except errors.HeaderParseError:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] in ',;':
+ mailbox_list.append(leader)
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ elif value[0] == ',':
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] not in ',;':
+ # Crap after mailbox; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = mailbox_list[-1]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',;')
+ mailbox.extend(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] == ',':
+ mailbox_list.append(ListSeparator)
+ value = value[1:]
+ return mailbox_list, value
+
+
+def get_group_list(value):
+ """ group-list = mailbox-list / CFWS / obs-group-list
+ obs-group-list = 1*([CFWS] ",") [CFWS]
+
+ """
+ group_list = GroupList()
+ if not value:
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header before group-list"))
+ return group_list, value
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ # This should never happen in email parsing, since CFWS-only is a
+ # legal alternative to group-list in a group, which is the only
+ # place group-list appears.
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group-list"))
+ group_list.append(leader)
+ return group_list, value
+ if value[0] == ';':
+ group_list.append(leader)
+ return group_list, value
+ token, value = get_mailbox_list(value)
+ if len(token.all_mailboxes)==0:
+ if leader is not None:
+ group_list.append(leader)
+ group_list.extend(token)
+ group_list.defects.append(errors.ObsoleteHeaderDefect(
+ "group-list with empty entries"))
+ return group_list, value
+ if leader is not None:
+ token[:0] = [leader]
+ group_list.append(token)
+ return group_list, value
+
+def get_group(value):
+ """ group = display-name ":" [group-list] ";" [CFWS]
+
+ """
+ group = Group()
+ token, value = get_display_name(value)
+ if not value or value[0] != ':':
+ raise errors.HeaderParseError("expected ':' at end of group "
+ "display name but found '{}'".format(value))
+ group.append(token)
+ group.append(ValueTerminal(':', 'group-display-name-terminator'))
+ value = value[1:]
+ if value and value[0] == ';':
+ group.append(ValueTerminal(';', 'group-terminator'))
+ return group, value[1:]
+ token, value = get_group_list(value)
+ group.append(token)
+ if not value:
+ group.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group"))
+ if value[0] != ';':
+ raise errors.HeaderParseError(
+ "expected ';' at end of group but found {}".format(value))
+ group.append(ValueTerminal(';', 'group-terminator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ group.append(token)
+ return group, value
+
+def get_address(value):
+ """ address = mailbox / group
+
+ Note that counter-intuitively, an address can be either a single address or
+ a list of addresses (a group). This is why the returned Address object has
+ a 'mailboxes' attribute which treats a single address as a list of length
+ one. When you need to differentiate between to two cases, extract the single
+ element, which is either a mailbox or a group token.
+
+ """
+ # The formal grammar isn't very helpful when parsing an address. mailbox
+ # and group, especially when allowing for obsolete forms, start off very
+ # similarly. It is only when you reach one of @, <, or : that you know
+ # what you've got. So, we try each one in turn, starting with the more
+ # likely of the two. We could perhaps make this more efficient by looking
+ # for a phrase and then branching based on the next character, but that
+ # would be a premature optimization.
+ address = Address()
+ try:
+ token, value = get_group(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_mailbox(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected address but found '{}'".format(value))
+ address.append(token)
+ return address, value
+
+def get_address_list(value):
+ """ address_list = (address *("," address)) / obs-addr-list
+ obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+ We depart from the formal grammar here by continuing to parse until the end
+ of the input, assuming the input to be entirely composed of an
+ address-list. This is always true in email parsing, and allows us
+ to skip invalid addresses to parse additional valid ones.
+
+ """
+ address_list = AddressList()
+ while value:
+ try:
+ token, value = get_address(value)
+ address_list.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] == ',':
+ address_list.append(leader)
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "address-list entry with no content"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ elif value[0] == ',':
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in address-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value and value[0] != ',':
+ # Crap after address; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = address_list[-1][0]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',')
+ mailbox.extend(token)
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value: # Must be a , at this point.
+ address_list.append(ValueTerminal(',', 'list-separator'))
+ value = value[1:]
+ return address_list, value
+
+#
+# XXX: As I begin to add additional header parsers, I'm realizing we probably
+# have two level of parser routines: the get_XXX methods that get a token in
+# the grammar, and parse_XXX methods that parse an entire field value. So
+# get_address_list above should really be a parse_ method, as probably should
+# be get_unstructured.
+#
+
+def parse_mime_version(value):
+ """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
+
+ """
+ # The [CFWS] is implicit in the RFC 2045 BNF.
+ # XXX: This routine is a bit verbose, should factor out a get_int method.
+ mime_version = MIMEVersion()
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing MIME version number (eg: 1.0)"))
+ return mime_version
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Expected MIME version number but found only CFWS"))
+ digits = ''
+ while value and value[0] != '.' and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME major version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.major = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value or value[0] != '.':
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ if value:
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+ mime_version.append(ValueTerminal('.', 'version-separator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ return mime_version
+ digits = ''
+ while value and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME minor version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.minor = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if value:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Excess non-CFWS text after MIME version"))
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+
+def get_invalid_parameter(value):
+ """ Read everything up to the next ';'.
+
+ This is outside the formal grammar. The InvalidParameter TokenList that is
+ returned acts like a Parameter, but the data attributes are None.
+
+ """
+ invalid_parameter = InvalidParameter()
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ invalid_parameter.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_parameter.append(token)
+ return invalid_parameter, value
+
+def get_ttext(value):
+ """ttext = <matches _ttext_matcher>
+
+ We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
+ defects list if we find non-ttext characters. We also register defects for
+ *any* non-printables even though the RFC doesn't exclude all of them,
+ because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_token_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected ttext but found '{}'".format(value))
+ ttext = m.group()
+ value = value[len(ttext):]
+ ttext = ValueTerminal(ttext, 'ttext')
+ _validate_xtext(ttext)
+ return ttext, value
+
+def get_token(value):
+ """token = [CFWS] 1*ttext [CFWS]
+
+ The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
+ tspecials. We also exclude tabs even though the RFC doesn't.
+
+ The RFC implies the CFWS but is not explicit about it in the BNF.
+
+ """
+ mtoken = Token()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ if value and value[0] in TOKEN_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_ttext(value)
+ mtoken.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ return mtoken, value
+
+def get_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
+
+ We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
+ token's defects list if we find non-attrtext characters. We also register
+ defects for *any* non-printables even though the RFC doesn't exclude all of
+ them, because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_attribute(value):
+ """ [CFWS] 1*attrtext [CFWS]
+
+ This version of the BNF makes the CFWS explicit, and as usual we use a
+ value terminal for the actual run of characters. The RFC equivalent of
+ attrtext is the token characters, with the subtraction of '*', "'", and '%'.
+ We include tab in the excluded set just as we do for token.
+
+ """
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_extended_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
+
+ This is a special parsing routine so that we get a value that
+ includes % escapes as a single string (which we decode as a single
+ string later).
+
+ """
+ m = _non_extended_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected extended attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'extended-attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_extended_attribute(value):
+ """ [CFWS] 1*extended_attrtext [CFWS]
+
+ This is like the non-extended version except we allow % characters, so that
+ we can pick up an encoded value as a single string.
+
+ """
+ # XXX: should we have an ExtendedAttribute TokenList?
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_extended_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_section(value):
+ """ '*' digits
+
+ The formal BNF is more complicated because leading 0s are not allowed. We
+ check for that and add a defect. We also assume no CFWS is allowed between
+ the '*' and the digits, though the RFC is not crystal clear on that.
+ The caller should already have dealt with leading CFWS.
+
+ """
+ section = Section()
+ if not value or value[0] != '*':
+ raise errors.HeaderParseError("Expected section but found {}".format(
+ value))
+ section.append(ValueTerminal('*', 'section-marker'))
+ value = value[1:]
+ if not value or not value[0].isdigit():
+ raise errors.HeaderParseError("Expected section number but "
+ "found {}".format(value))
+ digits = ''
+ while value and value[0].isdigit():
+ digits += value[0]
+ value = value[1:]
+ if digits[0] == '0' and digits != '0':
+ section.defects.append(errors.InvalidHeaderError("section number"
+ "has an invalid leading 0"))
+ section.number = int(digits)
+ section.append(ValueTerminal(digits, 'digits'))
+ return section, value
+
+
+def get_value(value):
+ """ quoted-string / attribute
+
+ """
+ v = Value()
+ if not value:
+ raise errors.HeaderParseError("Expected value but found end of string")
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError("Expected value but found "
+ "only {}".format(leader))
+ if value[0] == '"':
+ token, value = get_quoted_string(value)
+ else:
+ token, value = get_extended_attribute(value)
+ if leader is not None:
+ token[:0] = [leader]
+ v.append(token)
+ return v, value
+
+def get_parameter(value):
+ """ attribute [section] ["*"] [CFWS] "=" value
+
+ The CFWS is implied by the RFC but not made explicit in the BNF. This
+ simplified form of the BNF from the RFC is made to conform with the RFC BNF
+ through some extra checks. We do it this way because it makes both error
+ recovery and working with the resulting parse tree easier.
+ """
+ # It is possible CFWS would also be implicitly allowed between the section
+ # and the 'extended-attribute' marker (the '*') , but we've never seen that
+ # in the wild and we will therefore ignore the possibility.
+ param = Parameter()
+ token, value = get_attribute(value)
+ param.append(token)
+ if not value or value[0] == ';':
+ param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
+ "name ({}) but no value".format(token)))
+ return param, value
+ if value[0] == '*':
+ try:
+ token, value = get_section(value)
+ param.sectioned = True
+ param.append(token)
+ except errors.HeaderParseError:
+ pass
+ if not value:
+ raise errors.HeaderParseError("Incomplete parameter")
+ if value[0] == '*':
+ param.append(ValueTerminal('*', 'extended-parameter-marker'))
+ value = value[1:]
+ param.extended = True
+ if value[0] != '=':
+ raise errors.HeaderParseError("Parameter not followed by '='")
+ param.append(ValueTerminal('=', 'parameter-separator'))
+ value = value[1:]
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ param.append(token)
+ remainder = None
+ appendto = param
+ if param.extended and value and value[0] == '"':
+ # Now for some serious hackery to handle the common invalid case of
+ # double quotes around an extended value. We also accept (with defect)
+ # a value marked as encoded that isn't really.
+ qstring, remainder = get_quoted_string(value)
+ inner_value = qstring.stripped_value
+ semi_valid = False
+ if param.section_number == 0:
+ if inner_value and inner_value[0] == "'":
+ semi_valid = True
+ else:
+ token, rest = get_attrtext(inner_value)
+ if rest and rest[0] == "'":
+ semi_valid = True
+ else:
+ try:
+ token, rest = get_extended_attrtext(inner_value)
+ except:
+ pass
+ else:
+ if not rest:
+ semi_valid = True
+ if semi_valid:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Quoted string value for extended parameter is invalid"))
+ param.append(qstring)
+ for t in qstring:
+ if t.token_type == 'bare-quoted-string':
+ t[:] = []
+ appendto = t
+ break
+ value = inner_value
+ else:
+ remainder = None
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Parameter marked as extended but appears to have a "
+ "quoted string value that is non-encoded"))
+ if value and value[0] == "'":
+ token = None
+ else:
+ token, value = get_value(value)
+ if not param.extended or param.section_number > 0:
+ if not value or value[0] != "'":
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Apparent initial-extended-value but attribute "
+ "was not marked as extended or was not initial section"))
+ if not value:
+ # Assume the charset/lang is missing and the token is the value.
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Missing required charset/lang delimiters"))
+ appendto.append(token)
+ if remainder is None:
+ return param, value
+ else:
+ if token is not None:
+ for t in token:
+ if t.token_type == 'extended-attrtext':
+ break
+ t.token_type == 'attrtext'
+ appendto.append(t)
+ param.charset = t.value
+ if value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {!r}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if value and value[0] != "'":
+ token, value = get_attrtext(value)
+ appendto.append(token)
+ param.lang = token.value
+ if not value or value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if remainder is not None:
+ # Treat the rest of value as bare quoted string content.
+ v = Value()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ v.append(token)
+ token = v
+ else:
+ token, value = get_value(value)
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+
+def parse_mime_parameters(value):
+ """ parameter *( ";" parameter )
+
+ That BNF is meant to indicate this routine should only be called after
+ finding and handling the leading ';'. There is no corresponding rule in
+ the formal RFC grammar, but it is more convenient for us for the set of
+ parameters to be treated as its own TokenList.
+
+ This is 'parse' routine because it consumes the reminaing value, but it
+ would never be called to parse a full header. Instead it is called to
+ parse everything after the non-parameter value of a specific MIME header.
+
+ """
+ mime_parameters = MimeParameters()
+ while value:
+ try:
+ token, value = get_parameter(value)
+ mime_parameters.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ mime_parameters.append(leader)
+ return mime_parameters
+ if value[0] == ';':
+ if leader is not None:
+ mime_parameters.append(leader)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter entry with no content"))
+ else:
+ token, value = get_invalid_parameter(value)
+ if leader:
+ token[:0] = [leader]
+ mime_parameters.append(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "invalid parameter {!r}".format(token)))
+ if value and value[0] != ';':
+ # Junk after the otherwise valid parameter. Mark it as
+ # invalid, but it will have a value.
+ param = mime_parameters[-1]
+ param.token_type = 'invalid-parameter'
+ token, value = get_invalid_parameter(value)
+ param.extend(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter with invalid trailing text {!r}".format(token)))
+ if value:
+ # Must be a ';' at this point.
+ mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
+ value = value[1:]
+ return mime_parameters
+
+def _find_mime_parameters(tokenlist, value):
+ """Do our best to find the parameters in an invalid MIME header
+
+ """
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ tokenlist.append(token)
+ if not value:
+ return
+ tokenlist.append(ValueTerminal(';', 'parameter-separator'))
+ tokenlist.append(parse_mime_parameters(value[1:]))
+
+def parse_content_type_header(value):
+ """ maintype "/" subtype *( ";" parameter )
+
+ The maintype and substype are tokens. Theoretically they could
+ be checked against the official IANA list + x-token, but we
+ don't do that.
+ """
+ ctype = ContentType()
+ recover = False
+ if not value:
+ ctype.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content type specification"))
+ return ctype
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content maintype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ # XXX: If we really want to follow the formal grammer we should make
+ # mantype and subtype specialized TokenLists here. Probably not worth it.
+ if not value or value[0] != '/':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Invalid content type"))
+ if value:
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.maintype = token.value.strip().lower()
+ ctype.append(ValueTerminal('/', 'content-type-separator'))
+ value = value[1:]
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content subtype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ ctype.subtype = token.value.strip().lower()
+ if not value:
+ return ctype
+ if value[0] != ';':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content type, but "
+ "found {!r}".format(value)))
+ # The RFC requires that a syntactically invalid content-type be treated
+ # as text/plain. Perhaps we should postel this, but we should probably
+ # only do that if we were checking the subtype value against IANA.
+ del ctype.maintype, ctype.subtype
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(ValueTerminal(';', 'parameter-separator'))
+ ctype.append(parse_mime_parameters(value[1:]))
+ return ctype
+
+def parse_content_disposition_header(value):
+ """ disposition-type *( ";" parameter )
+
+ """
+ disp_header = ContentDisposition()
+ if not value:
+ disp_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content disposition"))
+ return disp_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content disposition but found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(token)
+ disp_header.content_disposition = token.value.strip().lower()
+ if not value:
+ return disp_header
+ if value[0] != ';':
+ disp_header.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content disposition, but "
+ "found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(ValueTerminal(';', 'parameter-separator'))
+ disp_header.append(parse_mime_parameters(value[1:]))
+ return disp_header
+
+def parse_content_transfer_encoding_header(value):
+ """ mechanism
+
+ """
+ # We should probably validate the values, since the list is fixed.
+ cte_header = ContentTransferEncoding()
+ if not value:
+ cte_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content transfer encoding"))
+ return cte_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content trnasfer encoding but found {!r}".format(value)))
+ else:
+ cte_header.append(token)
+ cte_header.cte = token.value.strip().lower()
+ if not value:
+ return cte_header
+ while value:
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Extra text after content transfer encoding"))
+ if value[0] in PHRASE_ENDS:
+ cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ cte_header.append(token)
+ return cte_header
diff --git a/contrib/python/future/future/backports/email/_parseaddr.py b/contrib/python/future/future/backports/email/_parseaddr.py
index f87c2fc4b6..5b50cc6bd1 100644
--- a/contrib/python/future/future/backports/email/_parseaddr.py
+++ b/contrib/python/future/future/backports/email/_parseaddr.py
@@ -1,546 +1,546 @@
-# Copyright (C) 2002-2007 Python Software Foundation
-# Contact: email-sig@python.org
-
-"""Email address parsing code.
-
-Lifted directly from rfc822.py. This should eventually be rewritten.
-"""
-
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import int
-
-__all__ = [
- 'mktime_tz',
- 'parsedate',
- 'parsedate_tz',
- 'quote',
- ]
-
-import time, calendar
-
-SPACE = ' '
-EMPTYSTRING = ''
-COMMASPACE = ', '
-
-# Parse a date field
-_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
- 'aug', 'sep', 'oct', 'nov', 'dec',
- 'january', 'february', 'march', 'april', 'may', 'june', 'july',
- 'august', 'september', 'october', 'november', 'december']
-
-_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
-
-# The timezone table does not include the military time zones defined
-# in RFC822, other than Z. According to RFC1123, the description in
-# RFC822 gets the signs wrong, so we can't rely on any such time
-# zones. RFC1123 recommends that numeric timezone indicators be used
-# instead of timezone names.
-
-_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
- 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
- 'EST': -500, 'EDT': -400, # Eastern
- 'CST': -600, 'CDT': -500, # Central
- 'MST': -700, 'MDT': -600, # Mountain
- 'PST': -800, 'PDT': -700 # Pacific
- }
-
-
-def parsedate_tz(data):
- """Convert a date string to a time tuple.
-
- Accounts for military timezones.
- """
- res = _parsedate_tz(data)
- if not res:
- return
- if res[9] is None:
- res[9] = 0
- return tuple(res)
-
-def _parsedate_tz(data):
- """Convert date to extended time tuple.
-
- The last (additional) element is the time zone offset in seconds, except if
- the timezone was specified as -0000. In that case the last element is
- None. This indicates a UTC timestamp that explicitly declaims knowledge of
- the source timezone, as opposed to a +0000 timestamp that indicates the
- source timezone really was UTC.
-
- """
- if not data:
- return
- data = data.split()
- # The FWS after the comma after the day-of-week is optional, so search and
- # adjust for this.
- if data[0].endswith(',') or data[0].lower() in _daynames:
- # There's a dayname here. Skip it
- del data[0]
- else:
- i = data[0].rfind(',')
- if i >= 0:
- data[0] = data[0][i+1:]
- if len(data) == 3: # RFC 850 date, deprecated
- stuff = data[0].split('-')
- if len(stuff) == 3:
- data = stuff + data[1:]
- if len(data) == 4:
- s = data[3]
- i = s.find('+')
- if i == -1:
- i = s.find('-')
- if i > 0:
- data[3:] = [s[:i], s[i:]]
- else:
- data.append('') # Dummy tz
- if len(data) < 5:
- return None
- data = data[:5]
- [dd, mm, yy, tm, tz] = data
- mm = mm.lower()
- if mm not in _monthnames:
- dd, mm = mm, dd.lower()
- if mm not in _monthnames:
- return None
- mm = _monthnames.index(mm) + 1
- if mm > 12:
- mm -= 12
- if dd[-1] == ',':
- dd = dd[:-1]
- i = yy.find(':')
- if i > 0:
- yy, tm = tm, yy
- if yy[-1] == ',':
- yy = yy[:-1]
- if not yy[0].isdigit():
- yy, tz = tz, yy
- if tm[-1] == ',':
- tm = tm[:-1]
- tm = tm.split(':')
- if len(tm) == 2:
- [thh, tmm] = tm
- tss = '0'
- elif len(tm) == 3:
- [thh, tmm, tss] = tm
- elif len(tm) == 1 and '.' in tm[0]:
- # Some non-compliant MUAs use '.' to separate time elements.
- tm = tm[0].split('.')
- if len(tm) == 2:
- [thh, tmm] = tm
- tss = 0
- elif len(tm) == 3:
- [thh, tmm, tss] = tm
- else:
- return None
- try:
- yy = int(yy)
- dd = int(dd)
- thh = int(thh)
- tmm = int(tmm)
- tss = int(tss)
- except ValueError:
- return None
- # Check for a yy specified in two-digit format, then convert it to the
- # appropriate four-digit format, according to the POSIX standard. RFC 822
- # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
- # mandates a 4-digit yy. For more information, see the documentation for
- # the time module.
- if yy < 100:
- # The year is between 1969 and 1999 (inclusive).
- if yy > 68:
- yy += 1900
- # The year is between 2000 and 2068 (inclusive).
- else:
- yy += 2000
- tzoffset = None
- tz = tz.upper()
- if tz in _timezones:
- tzoffset = _timezones[tz]
- else:
- try:
- tzoffset = int(tz)
- except ValueError:
- pass
- if tzoffset==0 and tz.startswith('-'):
- tzoffset = None
- # Convert a timezone offset into seconds ; -0500 -> -18000
- if tzoffset:
- if tzoffset < 0:
- tzsign = -1
- tzoffset = -tzoffset
- else:
- tzsign = 1
- tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
- # Daylight Saving Time flag is set to -1, since DST is unknown.
- return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
-
-
-def parsedate(data):
- """Convert a time string to a time tuple."""
- t = parsedate_tz(data)
- if isinstance(t, tuple):
- return t[:9]
- else:
- return t
-
-
-def mktime_tz(data):
- """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
- if data[9] is None:
- # No zone info, so localtime is better assumption than GMT
- return time.mktime(data[:8] + (-1,))
- else:
- t = calendar.timegm(data)
- return t - data[9]
-
-
-def quote(str):
- """Prepare string to be used in a quoted string.
-
- Turns backslash and double quote characters into quoted pairs. These
- are the only characters that need to be quoted inside a quoted string.
- Does not add the surrounding double quotes.
- """
- return str.replace('\\', '\\\\').replace('"', '\\"')
-
-
-class AddrlistClass(object):
- """Address parser class by Ben Escoto.
-
- To understand what this class does, it helps to have a copy of RFC 2822 in
- front of you.
-
- Note: this class interface is deprecated and may be removed in the future.
- Use email.utils.AddressList instead.
- """
-
- def __init__(self, field):
- """Initialize a new instance.
-
- `field' is an unparsed address header field, containing
- one or more addresses.
- """
- self.specials = '()<>@,:;.\"[]'
- self.pos = 0
- self.LWS = ' \t'
- self.CR = '\r\n'
- self.FWS = self.LWS + self.CR
- self.atomends = self.specials + self.LWS + self.CR
- # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
- # is obsolete syntax. RFC 2822 requires that we recognize obsolete
- # syntax, so allow dots in phrases.
- self.phraseends = self.atomends.replace('.', '')
- self.field = field
- self.commentlist = []
-
- def gotonext(self):
- """Skip white space and extract comments."""
- wslist = []
- while self.pos < len(self.field):
- if self.field[self.pos] in self.LWS + '\n\r':
- if self.field[self.pos] not in '\n\r':
- wslist.append(self.field[self.pos])
- self.pos += 1
- elif self.field[self.pos] == '(':
- self.commentlist.append(self.getcomment())
- else:
- break
- return EMPTYSTRING.join(wslist)
-
- def getaddrlist(self):
- """Parse all addresses.
-
- Returns a list containing all of the addresses.
- """
- result = []
- while self.pos < len(self.field):
- ad = self.getaddress()
- if ad:
- result += ad
- else:
- result.append(('', ''))
- return result
-
- def getaddress(self):
- """Parse the next address."""
- self.commentlist = []
- self.gotonext()
-
- oldpos = self.pos
- oldcl = self.commentlist
- plist = self.getphraselist()
-
- self.gotonext()
- returnlist = []
-
- if self.pos >= len(self.field):
- # Bad email address technically, no domain.
- if plist:
- returnlist = [(SPACE.join(self.commentlist), plist[0])]
-
- elif self.field[self.pos] in '.@':
- # email address is just an addrspec
- # this isn't very efficient since we start over
- self.pos = oldpos
- self.commentlist = oldcl
- addrspec = self.getaddrspec()
- returnlist = [(SPACE.join(self.commentlist), addrspec)]
-
- elif self.field[self.pos] == ':':
- # address is a group
- returnlist = []
-
- fieldlen = len(self.field)
- self.pos += 1
- while self.pos < len(self.field):
- self.gotonext()
- if self.pos < fieldlen and self.field[self.pos] == ';':
- self.pos += 1
- break
- returnlist = returnlist + self.getaddress()
-
- elif self.field[self.pos] == '<':
- # Address is a phrase then a route addr
- routeaddr = self.getrouteaddr()
-
- if self.commentlist:
- returnlist = [(SPACE.join(plist) + ' (' +
- ' '.join(self.commentlist) + ')', routeaddr)]
- else:
- returnlist = [(SPACE.join(plist), routeaddr)]
-
- else:
- if plist:
- returnlist = [(SPACE.join(self.commentlist), plist[0])]
- elif self.field[self.pos] in self.specials:
- self.pos += 1
-
- self.gotonext()
- if self.pos < len(self.field) and self.field[self.pos] == ',':
- self.pos += 1
- return returnlist
-
- def getrouteaddr(self):
- """Parse a route address (Return-path value).
-
- This method just skips all the route stuff and returns the addrspec.
- """
- if self.field[self.pos] != '<':
- return
-
- expectroute = False
- self.pos += 1
- self.gotonext()
- adlist = ''
- while self.pos < len(self.field):
- if expectroute:
- self.getdomain()
- expectroute = False
- elif self.field[self.pos] == '>':
- self.pos += 1
- break
- elif self.field[self.pos] == '@':
- self.pos += 1
- expectroute = True
- elif self.field[self.pos] == ':':
- self.pos += 1
- else:
- adlist = self.getaddrspec()
- self.pos += 1
- break
- self.gotonext()
-
- return adlist
-
- def getaddrspec(self):
- """Parse an RFC 2822 addr-spec."""
- aslist = []
-
- self.gotonext()
- while self.pos < len(self.field):
- preserve_ws = True
- if self.field[self.pos] == '.':
- if aslist and not aslist[-1].strip():
- aslist.pop()
- aslist.append('.')
- self.pos += 1
- preserve_ws = False
- elif self.field[self.pos] == '"':
- aslist.append('"%s"' % quote(self.getquote()))
- elif self.field[self.pos] in self.atomends:
- if aslist and not aslist[-1].strip():
- aslist.pop()
- break
- else:
- aslist.append(self.getatom())
- ws = self.gotonext()
- if preserve_ws and ws:
- aslist.append(ws)
-
- if self.pos >= len(self.field) or self.field[self.pos] != '@':
- return EMPTYSTRING.join(aslist)
-
- aslist.append('@')
- self.pos += 1
- self.gotonext()
- return EMPTYSTRING.join(aslist) + self.getdomain()
-
- def getdomain(self):
- """Get the complete domain name from an address."""
- sdlist = []
- while self.pos < len(self.field):
- if self.field[self.pos] in self.LWS:
- self.pos += 1
- elif self.field[self.pos] == '(':
- self.commentlist.append(self.getcomment())
- elif self.field[self.pos] == '[':
- sdlist.append(self.getdomainliteral())
- elif self.field[self.pos] == '.':
- self.pos += 1
- sdlist.append('.')
- elif self.field[self.pos] in self.atomends:
- break
- else:
- sdlist.append(self.getatom())
- return EMPTYSTRING.join(sdlist)
-
- def getdelimited(self, beginchar, endchars, allowcomments=True):
- """Parse a header fragment delimited by special characters.
-
- `beginchar' is the start character for the fragment.
- If self is not looking at an instance of `beginchar' then
- getdelimited returns the empty string.
-
- `endchars' is a sequence of allowable end-delimiting characters.
- Parsing stops when one of these is encountered.
-
- If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
- within the parsed fragment.
- """
- if self.field[self.pos] != beginchar:
- return ''
-
- slist = ['']
- quote = False
- self.pos += 1
- while self.pos < len(self.field):
- if quote:
- slist.append(self.field[self.pos])
- quote = False
- elif self.field[self.pos] in endchars:
- self.pos += 1
- break
- elif allowcomments and self.field[self.pos] == '(':
- slist.append(self.getcomment())
- continue # have already advanced pos from getcomment
- elif self.field[self.pos] == '\\':
- quote = True
- else:
- slist.append(self.field[self.pos])
- self.pos += 1
-
- return EMPTYSTRING.join(slist)
-
- def getquote(self):
- """Get a quote-delimited fragment from self's field."""
- return self.getdelimited('"', '"\r', False)
-
- def getcomment(self):
- """Get a parenthesis-delimited fragment from self's field."""
- return self.getdelimited('(', ')\r', True)
-
- def getdomainliteral(self):
- """Parse an RFC 2822 domain-literal."""
- return '[%s]' % self.getdelimited('[', ']\r', False)
-
- def getatom(self, atomends=None):
- """Parse an RFC 2822 atom.
-
- Optional atomends specifies a different set of end token delimiters
- (the default is to use self.atomends). This is used e.g. in
- getphraselist() since phrase endings must not include the `.' (which
- is legal in phrases)."""
- atomlist = ['']
- if atomends is None:
- atomends = self.atomends
-
- while self.pos < len(self.field):
- if self.field[self.pos] in atomends:
- break
- else:
- atomlist.append(self.field[self.pos])
- self.pos += 1
-
- return EMPTYSTRING.join(atomlist)
-
- def getphraselist(self):
- """Parse a sequence of RFC 2822 phrases.
-
- A phrase is a sequence of words, which are in turn either RFC 2822
- atoms or quoted-strings. Phrases are canonicalized by squeezing all
- runs of continuous whitespace into one space.
- """
- plist = []
-
- while self.pos < len(self.field):
- if self.field[self.pos] in self.FWS:
- self.pos += 1
- elif self.field[self.pos] == '"':
- plist.append(self.getquote())
- elif self.field[self.pos] == '(':
- self.commentlist.append(self.getcomment())
- elif self.field[self.pos] in self.phraseends:
- break
- else:
- plist.append(self.getatom(self.phraseends))
-
- return plist
-
-class AddressList(AddrlistClass):
- """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
- def __init__(self, field):
- AddrlistClass.__init__(self, field)
- if field:
- self.addresslist = self.getaddrlist()
- else:
- self.addresslist = []
-
- def __len__(self):
- return len(self.addresslist)
-
- def __add__(self, other):
- # Set union
- newaddr = AddressList(None)
- newaddr.addresslist = self.addresslist[:]
- for x in other.addresslist:
- if not x in self.addresslist:
- newaddr.addresslist.append(x)
- return newaddr
-
- def __iadd__(self, other):
- # Set union, in-place
- for x in other.addresslist:
- if not x in self.addresslist:
- self.addresslist.append(x)
- return self
-
- def __sub__(self, other):
- # Set difference
- newaddr = AddressList(None)
- for x in self.addresslist:
- if not x in other.addresslist:
- newaddr.addresslist.append(x)
- return newaddr
-
- def __isub__(self, other):
- # Set difference, in-place
- for x in other.addresslist:
- if x in self.addresslist:
- self.addresslist.remove(x)
- return self
-
- def __getitem__(self, index):
- # Make indexing, slices, and 'in' work
- return self.addresslist[index]
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py. This should eventually be rewritten.
+"""
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import int
+
+__all__ = [
+ 'mktime_tz',
+ 'parsedate',
+ 'parsedate_tz',
+ 'quote',
+ ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+
+
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+
+ Accounts for military timezones.
+ """
+ res = _parsedate_tz(data)
+ if not res:
+ return
+ if res[9] is None:
+ res[9] = 0
+ return tuple(res)
+
+def _parsedate_tz(data):
+ """Convert date to extended time tuple.
+
+ The last (additional) element is the time zone offset in seconds, except if
+ the timezone was specified as -0000. In that case the last element is
+ None. This indicates a UTC timestamp that explicitly declaims knowledge of
+ the source timezone, as opposed to a +0000 timestamp that indicates the
+ source timezone really was UTC.
+
+ """
+ if not data:
+ return
+ data = data.split()
+ # The FWS after the comma after the day-of-week is optional, so search and
+ # adjust for this.
+ if data[0].endswith(',') or data[0].lower() in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ else:
+ i = data[0].rfind(',')
+ if i >= 0:
+ data[0] = data[0][i+1:]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = data[0].split('-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = s.find('+')
+ if i == -1:
+ i = s.find('-')
+ if i > 0:
+ data[3:] = [s[:i], s[i:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = mm.lower()
+ if mm not in _monthnames:
+ dd, mm = mm, dd.lower()
+ if mm not in _monthnames:
+ return None
+ mm = _monthnames.index(mm) + 1
+ if mm > 12:
+ mm -= 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = yy.find(':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if not yy[0].isdigit():
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = tm.split(':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ elif len(tm) == 1 and '.' in tm[0]:
+ # Some non-compliant MUAs use '.' to separate time elements.
+ tm = tm[0].split('.')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = 0
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = int(yy)
+ dd = int(dd)
+ thh = int(thh)
+ tmm = int(tmm)
+ tss = int(tss)
+ except ValueError:
+ return None
+ # Check for a yy specified in two-digit format, then convert it to the
+ # appropriate four-digit format, according to the POSIX standard. RFC 822
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+ # mandates a 4-digit yy. For more information, see the documentation for
+ # the time module.
+ if yy < 100:
+ # The year is between 1969 and 1999 (inclusive).
+ if yy > 68:
+ yy += 1900
+ # The year is between 2000 and 2068 (inclusive).
+ else:
+ yy += 2000
+ tzoffset = None
+ tz = tz.upper()
+ if tz in _timezones:
+ tzoffset = _timezones[tz]
+ else:
+ try:
+ tzoffset = int(tz)
+ except ValueError:
+ pass
+ if tzoffset==0 and tz.startswith('-'):
+ tzoffset = None
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+ # Daylight Saving Time flag is set to -1, since DST is unknown.
+ return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+
+
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t = parsedate_tz(data)
+ if isinstance(t, tuple):
+ return t[:9]
+ else:
+ return t
+
+
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = calendar.timegm(data)
+ return t - data[9]
+
+
+def quote(str):
+ """Prepare string to be used in a quoted string.
+
+ Turns backslash and double quote characters into quoted pairs. These
+ are the only characters that need to be quoted inside a quoted string.
+ Does not add the surrounding double quotes.
+ """
+ return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass(object):
+ """Address parser class by Ben Escoto.
+
+ To understand what this class does, it helps to have a copy of RFC 2822 in
+ front of you.
+
+ Note: this class interface is deprecated and may be removed in the future.
+ Use email.utils.AddressList instead.
+ """
+
+ def __init__(self, field):
+ """Initialize a new instance.
+
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.FWS = self.LWS + self.CR
+ self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
+ self.field = field
+ self.commentlist = []
+
+ def gotonext(self):
+ """Skip white space and extract comments."""
+ wslist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else:
+ break
+ return EMPTYSTRING.join(wslist)
+
+ def getaddrlist(self):
+ """Parse all addresses.
+
+ Returns a list containing all of the addresses.
+ """
+ result = []
+ while self.pos < len(self.field):
+ ad = self.getaddress()
+ if ad:
+ result += ad
+ else:
+ result.append(('', ''))
+ return result
+
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+
+ self.gotonext()
+ returnlist = []
+
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+
+ fieldlen = len(self.field)
+ self.pos += 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos += 1
+ break
+ returnlist = returnlist + self.getaddress()
+
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+
+ if self.commentlist:
+ returnlist = [(SPACE.join(plist) + ' (' +
+ ' '.join(self.commentlist) + ')', routeaddr)]
+ else:
+ returnlist = [(SPACE.join(plist), routeaddr)]
+
+ else:
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos += 1
+
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos += 1
+ return returnlist
+
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+
+ expectroute = False
+ self.pos += 1
+ self.gotonext()
+ adlist = ''
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = False
+ elif self.field[self.pos] == '>':
+ self.pos += 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos += 1
+ expectroute = True
+ elif self.field[self.pos] == ':':
+ self.pos += 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos += 1
+ break
+ self.gotonext()
+
+ return adlist
+
+ def getaddrspec(self):
+ """Parse an RFC 2822 addr-spec."""
+ aslist = []
+
+ self.gotonext()
+ while self.pos < len(self.field):
+ preserve_ws = True
+ if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ aslist.append('.')
+ self.pos += 1
+ preserve_ws = False
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % quote(self.getquote()))
+ elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ break
+ else:
+ aslist.append(self.getatom())
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
+
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return EMPTYSTRING.join(aslist)
+
+ aslist.append('@')
+ self.pos += 1
+ self.gotonext()
+ return EMPTYSTRING.join(aslist) + self.getdomain()
+
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos += 1
+ sdlist.append('.')
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ sdlist.append(self.getatom())
+ return EMPTYSTRING.join(sdlist)
+
+ def getdelimited(self, beginchar, endchars, allowcomments=True):
+ """Parse a header fragment delimited by special characters.
+
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+
+ If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+ within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+
+ slist = ['']
+ quote = False
+ self.pos += 1
+ while self.pos < len(self.field):
+ if quote:
+ slist.append(self.field[self.pos])
+ quote = False
+ elif self.field[self.pos] in endchars:
+ self.pos += 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ continue # have already advanced pos from getcomment
+ elif self.field[self.pos] == '\\':
+ quote = True
+ else:
+ slist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(slist)
+
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', False)
+
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', True)
+
+ def getdomainliteral(self):
+ """Parse an RFC 2822 domain-literal."""
+ return '[%s]' % self.getdelimited('[', ']\r', False)
+
+ def getatom(self, atomends=None):
+ """Parse an RFC 2822 atom.
+
+ Optional atomends specifies a different set of end token delimiters
+ (the default is to use self.atomends). This is used e.g. in
+ getphraselist() since phrase endings must not include the `.' (which
+ is legal in phrases)."""
+ atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in atomends:
+ break
+ else:
+ atomlist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(atomlist)
+
+ def getphraselist(self):
+ """Parse a sequence of RFC 2822 phrases.
+
+ A phrase is a sequence of words, which are in turn either RFC 2822
+ atoms or quoted-strings. Phrases are canonicalized by squeezing all
+ runs of continuous whitespace into one space.
+ """
+ plist = []
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.FWS:
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.phraseends:
+ break
+ else:
+ plist.append(self.getatom(self.phraseends))
+
+ return plist
+
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+
+ def __len__(self):
+ return len(self.addresslist)
+
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __iadd__(self, other):
+ # Set union, in-place
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ self.addresslist.append(x)
+ return self
+
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __isub__(self, other):
+ # Set difference, in-place
+ for x in other.addresslist:
+ if x in self.addresslist:
+ self.addresslist.remove(x)
+ return self
+
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
diff --git a/contrib/python/future/future/backports/email/_policybase.py b/contrib/python/future/future/backports/email/_policybase.py
index c3bcdfc00d..c66aea9002 100644
--- a/contrib/python/future/future/backports/email/_policybase.py
+++ b/contrib/python/future/future/backports/email/_policybase.py
@@ -1,365 +1,365 @@
-"""Policy framework for the email package.
-
-Allows fine grained feature control of how the package parses and emits data.
-"""
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import super
-from future.builtins import str
-from future.utils import with_metaclass
-
-import abc
-from future.backports.email import header
-from future.backports.email import charset as _charset
-from future.backports.email.utils import _has_surrogates
-
-__all__ = [
- 'Policy',
- 'Compat32',
- 'compat32',
- ]
-
-
-class _PolicyBase(object):
-
- """Policy Object basic framework.
-
- This class is useless unless subclassed. A subclass should define
- class attributes with defaults for any values that are to be
- managed by the Policy object. The constructor will then allow
- non-default values to be set for these attributes at instance
- creation time. The instance will be callable, taking these same
- attributes keyword arguments, and returning a new instance
- identical to the called instance except for those values changed
- by the keyword arguments. Instances may be added, yielding new
- instances with any non-default values from the right hand
- operand overriding those in the left hand operand. That is,
-
- A + B == A(<non-default values of B>)
-
- The repr of an instance can be used to reconstruct the object
- if and only if the repr of the values can be used to reconstruct
- those values.
-
- """
-
- def __init__(self, **kw):
- """Create new Policy, possibly overriding some defaults.
-
- See class docstring for a list of overridable attributes.
-
- """
- for name, value in kw.items():
- if hasattr(self, name):
- super(_PolicyBase,self).__setattr__(name, value)
- else:
- raise TypeError(
- "{!r} is an invalid keyword argument for {}".format(
- name, self.__class__.__name__))
-
- def __repr__(self):
- args = [ "{}={!r}".format(name, value)
- for name, value in self.__dict__.items() ]
- return "{}({})".format(self.__class__.__name__, ', '.join(args))
-
- def clone(self, **kw):
- """Return a new instance with specified attributes changed.
-
- The new instance has the same attribute values as the current object,
- except for the changes passed in as keyword arguments.
-
- """
- newpolicy = self.__class__.__new__(self.__class__)
- for attr, value in self.__dict__.items():
- object.__setattr__(newpolicy, attr, value)
- for attr, value in kw.items():
- if not hasattr(self, attr):
- raise TypeError(
- "{!r} is an invalid keyword argument for {}".format(
- attr, self.__class__.__name__))
- object.__setattr__(newpolicy, attr, value)
- return newpolicy
-
- def __setattr__(self, name, value):
- if hasattr(self, name):
- msg = "{!r} object attribute {!r} is read-only"
- else:
- msg = "{!r} object has no attribute {!r}"
- raise AttributeError(msg.format(self.__class__.__name__, name))
-
- def __add__(self, other):
- """Non-default values from right operand override those from left.
-
- The object returned is a new instance of the subclass.
-
- """
- return self.clone(**other.__dict__)
-
-
-def _append_doc(doc, added_doc):
- doc = doc.rsplit('\n', 1)[0]
- added_doc = added_doc.split('\n', 1)[1]
- return doc + '\n' + added_doc
-
-def _extend_docstrings(cls):
- if cls.__doc__ and cls.__doc__.startswith('+'):
- cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
- for name, attr in cls.__dict__.items():
- if attr.__doc__ and attr.__doc__.startswith('+'):
- for c in (c for base in cls.__bases__ for c in base.mro()):
- doc = getattr(getattr(c, name), '__doc__')
- if doc:
- attr.__doc__ = _append_doc(doc, attr.__doc__)
- break
- return cls
-
-
-class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)):
-
- r"""Controls for how messages are interpreted and formatted.
-
- Most of the classes and many of the methods in the email package accept
- Policy objects as parameters. A Policy object contains a set of values and
- functions that control how input is interpreted and how output is rendered.
- For example, the parameter 'raise_on_defect' controls whether or not an RFC
- violation results in an error being raised or not, while 'max_line_length'
- controls the maximum length of output lines when a Message is serialized.
-
- Any valid attribute may be overridden when a Policy is created by passing
- it as a keyword argument to the constructor. Policy objects are immutable,
- but a new Policy object can be created with only certain values changed by
- calling the Policy instance with keyword arguments. Policy objects can
- also be added, producing a new Policy object in which the non-default
- attributes set in the right hand operand overwrite those specified in the
- left operand.
-
- Settable attributes:
-
- raise_on_defect -- If true, then defects should be raised as errors.
- Default: False.
-
- linesep -- string containing the value to use as separation
- between output lines. Default '\n'.
-
- cte_type -- Type of allowed content transfer encodings
-
- 7bit -- ASCII only
- 8bit -- Content-Transfer-Encoding: 8bit is allowed
-
- Default: 8bit. Also controls the disposition of
- (RFC invalid) binary data in headers; see the
- documentation of the binary_fold method.
-
- max_line_length -- maximum length of lines, excluding 'linesep',
- during serialization. None or 0 means no line
- wrapping is done. Default is 78.
-
- """
-
- raise_on_defect = False
- linesep = '\n'
- cte_type = '8bit'
- max_line_length = 78
-
- def handle_defect(self, obj, defect):
- """Based on policy, either raise defect or call register_defect.
-
- handle_defect(obj, defect)
-
- defect should be a Defect subclass, but in any case must be an
- Exception subclass. obj is the object on which the defect should be
- registered if it is not raised. If the raise_on_defect is True, the
- defect is raised as an error, otherwise the object and the defect are
- passed to register_defect.
-
- This method is intended to be called by parsers that discover defects.
- The email package parsers always call it with Defect instances.
-
- """
- if self.raise_on_defect:
- raise defect
- self.register_defect(obj, defect)
-
- def register_defect(self, obj, defect):
- """Record 'defect' on 'obj'.
-
- Called by handle_defect if raise_on_defect is False. This method is
- part of the Policy API so that Policy subclasses can implement custom
- defect handling. The default implementation calls the append method of
- the defects attribute of obj. The objects used by the email package by
- default that get passed to this method will always have a defects
- attribute with an append method.
-
- """
- obj.defects.append(defect)
-
- def header_max_count(self, name):
- """Return the maximum allowed number of headers named 'name'.
-
- Called when a header is added to a Message object. If the returned
- value is not 0 or None, and there are already a number of headers with
- the name 'name' equal to the value returned, a ValueError is raised.
-
- Because the default behavior of Message's __setitem__ is to append the
- value to the list of headers, it is easy to create duplicate headers
- without realizing it. This method allows certain headers to be limited
- in the number of instances of that header that may be added to a
- Message programmatically. (The limit is not observed by the parser,
- which will faithfully produce as many headers as exist in the message
- being parsed.)
-
- The default implementation returns None for all header names.
- """
- return None
-
- @abc.abstractmethod
- def header_source_parse(self, sourcelines):
- """Given a list of linesep terminated strings constituting the lines of
- a single header, return the (name, value) tuple that should be stored
- in the model. The input lines should retain their terminating linesep
- characters. The lines passed in by the email package may contain
- surrogateescaped binary data.
- """
- raise NotImplementedError
-
- @abc.abstractmethod
- def header_store_parse(self, name, value):
- """Given the header name and the value provided by the application
- program, return the (name, value) that should be stored in the model.
- """
- raise NotImplementedError
-
- @abc.abstractmethod
- def header_fetch_parse(self, name, value):
- """Given the header name and the value from the model, return the value
- to be returned to the application program that is requesting that
- header. The value passed in by the email package may contain
- surrogateescaped binary data if the lines were parsed by a BytesParser.
- The returned value should not contain any surrogateescaped data.
-
- """
- raise NotImplementedError
-
- @abc.abstractmethod
- def fold(self, name, value):
- """Given the header name and the value from the model, return a string
- containing linesep characters that implement the folding of the header
- according to the policy controls. The value passed in by the email
- package may contain surrogateescaped binary data if the lines were
- parsed by a BytesParser. The returned value should not contain any
- surrogateescaped data.
-
- """
- raise NotImplementedError
-
- @abc.abstractmethod
- def fold_binary(self, name, value):
- """Given the header name and the value from the model, return binary
- data containing linesep characters that implement the folding of the
- header according to the policy controls. The value passed in by the
- email package may contain surrogateescaped binary data.
-
- """
- raise NotImplementedError
-
-
-@_extend_docstrings
-class Compat32(Policy):
-
- """+
- This particular policy is the backward compatibility Policy. It
- replicates the behavior of the email package version 5.1.
- """
-
- def _sanitize_header(self, name, value):
- # If the header value contains surrogates, return a Header using
- # the unknown-8bit charset to encode the bytes as encoded words.
- if not isinstance(value, str):
- # Assume it is already a header object
- return value
- if _has_surrogates(value):
- return header.Header(value, charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- return value
-
- def header_source_parse(self, sourcelines):
- """+
- The name is parsed as everything up to the ':' and returned unmodified.
- The value is determined by stripping leading whitespace off the
- remainder of the first line, joining all subsequent lines together, and
- stripping any trailing carriage return or linefeed characters.
-
- """
- name, value = sourcelines[0].split(':', 1)
- value = value.lstrip(' \t') + ''.join(sourcelines[1:])
- return (name, value.rstrip('\r\n'))
-
- def header_store_parse(self, name, value):
- """+
- The name and value are returned unmodified.
- """
- return (name, value)
-
- def header_fetch_parse(self, name, value):
- """+
- If the value contains binary data, it is converted into a Header object
- using the unknown-8bit charset. Otherwise it is returned unmodified.
- """
- return self._sanitize_header(name, value)
-
- def fold(self, name, value):
- """+
- Headers are folded using the Header folding algorithm, which preserves
- existing line breaks in the value, and wraps each resulting line to the
- max_line_length. Non-ASCII binary data are CTE encoded using the
- unknown-8bit charset.
-
- """
- return self._fold(name, value, sanitize=True)
-
- def fold_binary(self, name, value):
- """+
- Headers are folded using the Header folding algorithm, which preserves
- existing line breaks in the value, and wraps each resulting line to the
- max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
- encoded using the unknown-8bit charset. Otherwise the original source
- header is used, with its existing line breaks and/or binary data.
-
- """
- folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
- return folded.encode('ascii', 'surrogateescape')
-
- def _fold(self, name, value, sanitize):
- parts = []
- parts.append('%s: ' % name)
- if isinstance(value, str):
- if _has_surrogates(value):
- if sanitize:
- h = header.Header(value,
- charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- # If we have raw 8bit data in a byte string, we have no idea
- # what the encoding is. There is no safe way to split this
- # string. If it's ascii-subset, then we could do a normal
- # ascii split, but if it's multibyte then we could break the
- # string. There's no way to know so the least harm seems to
- # be to not split the string and risk it being too long.
- parts.append(value)
- h = None
- else:
- h = header.Header(value, header_name=name)
- else:
- # Assume it is a Header-like object.
- h = value
- if h is not None:
- parts.append(h.encode(linesep=self.linesep,
- maxlinelen=self.max_line_length))
- parts.append(self.linesep)
- return ''.join(parts)
-
-
-compat32 = Compat32()
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+from future.builtins import str
+from future.utils import with_metaclass
+
+import abc
+from future.backports.email import header
+from future.backports.email import charset as _charset
+from future.backports.email.utils import _has_surrogates
+
+__all__ = [
+ 'Policy',
+ 'Compat32',
+ 'compat32',
+ ]
+
+
+class _PolicyBase(object):
+
+ """Policy Object basic framework.
+
+ This class is useless unless subclassed. A subclass should define
+ class attributes with defaults for any values that are to be
+ managed by the Policy object. The constructor will then allow
+ non-default values to be set for these attributes at instance
+ creation time. The instance will be callable, taking these same
+ attributes keyword arguments, and returning a new instance
+ identical to the called instance except for those values changed
+ by the keyword arguments. Instances may be added, yielding new
+ instances with any non-default values from the right hand
+ operand overriding those in the left hand operand. That is,
+
+ A + B == A(<non-default values of B>)
+
+ The repr of an instance can be used to reconstruct the object
+ if and only if the repr of the values can be used to reconstruct
+ those values.
+
+ """
+
+ def __init__(self, **kw):
+ """Create new Policy, possibly overriding some defaults.
+
+ See class docstring for a list of overridable attributes.
+
+ """
+ for name, value in kw.items():
+ if hasattr(self, name):
+ super(_PolicyBase,self).__setattr__(name, value)
+ else:
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ name, self.__class__.__name__))
+
+ def __repr__(self):
+ args = [ "{}={!r}".format(name, value)
+ for name, value in self.__dict__.items() ]
+ return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+ def clone(self, **kw):
+ """Return a new instance with specified attributes changed.
+
+ The new instance has the same attribute values as the current object,
+ except for the changes passed in as keyword arguments.
+
+ """
+ newpolicy = self.__class__.__new__(self.__class__)
+ for attr, value in self.__dict__.items():
+ object.__setattr__(newpolicy, attr, value)
+ for attr, value in kw.items():
+ if not hasattr(self, attr):
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ attr, self.__class__.__name__))
+ object.__setattr__(newpolicy, attr, value)
+ return newpolicy
+
+ def __setattr__(self, name, value):
+ if hasattr(self, name):
+ msg = "{!r} object attribute {!r} is read-only"
+ else:
+ msg = "{!r} object has no attribute {!r}"
+ raise AttributeError(msg.format(self.__class__.__name__, name))
+
+ def __add__(self, other):
+ """Non-default values from right operand override those from left.
+
+ The object returned is a new instance of the subclass.
+
+ """
+ return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+ doc = doc.rsplit('\n', 1)[0]
+ added_doc = added_doc.split('\n', 1)[1]
+ return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+ if cls.__doc__ and cls.__doc__.startswith('+'):
+ cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+ for name, attr in cls.__dict__.items():
+ if attr.__doc__ and attr.__doc__.startswith('+'):
+ for c in (c for base in cls.__bases__ for c in base.mro()):
+ doc = getattr(getattr(c, name), '__doc__')
+ if doc:
+ attr.__doc__ = _append_doc(doc, attr.__doc__)
+ break
+ return cls
+
+
+class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)):
+
+ r"""Controls for how messages are interpreted and formatted.
+
+ Most of the classes and many of the methods in the email package accept
+ Policy objects as parameters. A Policy object contains a set of values and
+ functions that control how input is interpreted and how output is rendered.
+ For example, the parameter 'raise_on_defect' controls whether or not an RFC
+ violation results in an error being raised or not, while 'max_line_length'
+ controls the maximum length of output lines when a Message is serialized.
+
+ Any valid attribute may be overridden when a Policy is created by passing
+ it as a keyword argument to the constructor. Policy objects are immutable,
+ but a new Policy object can be created with only certain values changed by
+ calling the Policy instance with keyword arguments. Policy objects can
+ also be added, producing a new Policy object in which the non-default
+ attributes set in the right hand operand overwrite those specified in the
+ left operand.
+
+ Settable attributes:
+
+ raise_on_defect -- If true, then defects should be raised as errors.
+ Default: False.
+
+ linesep -- string containing the value to use as separation
+ between output lines. Default '\n'.
+
+ cte_type -- Type of allowed content transfer encodings
+
+ 7bit -- ASCII only
+ 8bit -- Content-Transfer-Encoding: 8bit is allowed
+
+ Default: 8bit. Also controls the disposition of
+ (RFC invalid) binary data in headers; see the
+ documentation of the binary_fold method.
+
+ max_line_length -- maximum length of lines, excluding 'linesep',
+ during serialization. None or 0 means no line
+ wrapping is done. Default is 78.
+
+ """
+
+ raise_on_defect = False
+ linesep = '\n'
+ cte_type = '8bit'
+ max_line_length = 78
+
+ def handle_defect(self, obj, defect):
+ """Based on policy, either raise defect or call register_defect.
+
+ handle_defect(obj, defect)
+
+ defect should be a Defect subclass, but in any case must be an
+ Exception subclass. obj is the object on which the defect should be
+ registered if it is not raised. If the raise_on_defect is True, the
+ defect is raised as an error, otherwise the object and the defect are
+ passed to register_defect.
+
+ This method is intended to be called by parsers that discover defects.
+ The email package parsers always call it with Defect instances.
+
+ """
+ if self.raise_on_defect:
+ raise defect
+ self.register_defect(obj, defect)
+
+ def register_defect(self, obj, defect):
+ """Record 'defect' on 'obj'.
+
+ Called by handle_defect if raise_on_defect is False. This method is
+ part of the Policy API so that Policy subclasses can implement custom
+ defect handling. The default implementation calls the append method of
+ the defects attribute of obj. The objects used by the email package by
+ default that get passed to this method will always have a defects
+ attribute with an append method.
+
+ """
+ obj.defects.append(defect)
+
+ def header_max_count(self, name):
+ """Return the maximum allowed number of headers named 'name'.
+
+ Called when a header is added to a Message object. If the returned
+ value is not 0 or None, and there are already a number of headers with
+ the name 'name' equal to the value returned, a ValueError is raised.
+
+ Because the default behavior of Message's __setitem__ is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ Message programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns None for all header names.
+ """
+ return None
+
+ @abc.abstractmethod
+ def header_source_parse(self, sourcelines):
+ """Given a list of linesep terminated strings constituting the lines of
+ a single header, return the (name, value) tuple that should be stored
+ in the model. The input lines should retain their terminating linesep
+ characters. The lines passed in by the email package may contain
+ surrogateescaped binary data.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_store_parse(self, name, value):
+ """Given the header name and the value provided by the application
+ program, return the (name, value) that should be stored in the model.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_fetch_parse(self, name, value):
+ """Given the header name and the value from the model, return the value
+ to be returned to the application program that is requesting that
+ header. The value passed in by the email package may contain
+ surrogateescaped binary data if the lines were parsed by a BytesParser.
+ The returned value should not contain any surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold(self, name, value):
+ """Given the header name and the value from the model, return a string
+ containing linesep characters that implement the folding of the header
+ according to the policy controls. The value passed in by the email
+ package may contain surrogateescaped binary data if the lines were
+ parsed by a BytesParser. The returned value should not contain any
+ surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold_binary(self, name, value):
+ """Given the header name and the value from the model, return binary
+ data containing linesep characters that implement the folding of the
+ header according to the policy controls. The value passed in by the
+ email package may contain surrogateescaped binary data.
+
+ """
+ raise NotImplementedError
+
+
+@_extend_docstrings
+class Compat32(Policy):
+
+ """+
+ This particular policy is the backward compatibility Policy. It
+ replicates the behavior of the email package version 5.1.
+ """
+
+ def _sanitize_header(self, name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters.
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name and value are returned unmodified.
+ """
+ return (name, value)
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value contains binary data, it is converted into a Header object
+ using the unknown-8bit charset. Otherwise it is returned unmodified.
+ """
+ return self._sanitize_header(name, value)
+
+ def fold(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. Non-ASCII binary data are CTE encoded using the
+ unknown-8bit charset.
+
+ """
+ return self._fold(name, value, sanitize=True)
+
+ def fold_binary(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
+ encoded using the unknown-8bit charset. Otherwise the original source
+ header is used, with its existing line breaks and/or binary data.
+
+ """
+ folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, sanitize):
+ parts = []
+ parts.append('%s: ' % name)
+ if isinstance(value, str):
+ if _has_surrogates(value):
+ if sanitize:
+ h = header.Header(value,
+ charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ parts.append(value)
+ h = None
+ else:
+ h = header.Header(value, header_name=name)
+ else:
+ # Assume it is a Header-like object.
+ h = value
+ if h is not None:
+ parts.append(h.encode(linesep=self.linesep,
+ maxlinelen=self.max_line_length))
+ parts.append(self.linesep)
+ return ''.join(parts)
+
+
+compat32 = Compat32()
diff --git a/contrib/python/future/future/backports/email/base64mime.py b/contrib/python/future/future/backports/email/base64mime.py
index 4438ba001a..416d612e01 100644
--- a/contrib/python/future/future/backports/email/base64mime.py
+++ b/contrib/python/future/future/backports/email/base64mime.py
@@ -1,120 +1,120 @@
-# Copyright (C) 2002-2007 Python Software Foundation
-# Author: Ben Gertzfield
-# Contact: email-sig@python.org
-
-"""Base64 content transfer encoding per RFCs 2045-2047.
-
-This module handles the content transfer encoding method defined in RFC 2045
-to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
-characters encoding known as Base64.
-
-It is used in the MIME standards for email to attach images, audio, and text
-using some 8-bit character sets to messages.
-
-This module provides an interface to encode and decode both headers and bodies
-with Base64 encoding.
-
-RFC 2045 defines a method for including character set information in an
-`encoded-word' in a header. This method is commonly used for 8-bit real names
-in To:, From:, Cc:, etc. fields, as well as Subject: lines.
-
-This module does not do the line wrapping or end-of-line character conversion
-necessary for proper internationalized headers; it only does dumb encoding and
-decoding. To deal with the various line wrapping issues, use the email.header
-module.
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import range
-from future.builtins import bytes
-
-__all__ = [
- 'body_decode',
- 'body_encode',
- 'decode',
- 'decodestring',
- 'header_encode',
- 'header_length',
- ]
-
-
-from base64 import b64encode
-from binascii import b2a_base64, a2b_base64
-
-CRLF = '\r\n'
-NL = '\n'
-EMPTYSTRING = ''
-
-# See also Charset.py
-MISC_LEN = 7
-
-
-# Helpers
-def header_length(bytearray):
- """Return the length of s when it is encoded with base64."""
- groups_of_3, leftover = divmod(len(bytearray), 3)
- # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
- n = groups_of_3 * 4
- if leftover:
- n += 4
- return n
-
-
-def header_encode(header_bytes, charset='iso-8859-1'):
- """Encode a single header line with Base64 encoding in a given charset.
-
- charset names the character set to use to encode the header. It defaults
- to iso-8859-1. Base64 encoding is defined in RFC 2045.
- """
- if not header_bytes:
- return ""
- if isinstance(header_bytes, str):
- header_bytes = header_bytes.encode(charset)
- encoded = b64encode(header_bytes).decode("ascii")
- return '=?%s?b?%s?=' % (charset, encoded)
-
-
-def body_encode(s, maxlinelen=76, eol=NL):
- r"""Encode a string with base64.
-
- Each line will be wrapped at, at most, maxlinelen characters (defaults to
- 76 characters).
-
- Each line of encoded text will end with eol, which defaults to "\n". Set
- this to "\r\n" if you will be using the result of this function directly
- in an email.
- """
- if not s:
- return s
-
- encvec = []
- max_unencoded = maxlinelen * 3 // 4
- for i in range(0, len(s), max_unencoded):
- # BAW: should encode() inherit b2a_base64()'s dubious behavior in
- # adding a newline to the encoded string?
- enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
- if enc.endswith(NL) and eol != NL:
- enc = enc[:-1] + eol
- encvec.append(enc)
- return EMPTYSTRING.join(encvec)
-
-
-def decode(string):
- """Decode a raw base64 string, returning a bytes object.
-
- This function does not parse a full MIME header value encoded with
- base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
- level email.header class for that functionality.
- """
- if not string:
- return bytes()
- elif isinstance(string, str):
- return a2b_base64(string.encode('raw-unicode-escape'))
- else:
- return a2b_base64(string)
-
-
-# For convenience and backwards compatibility w/ standard base64 module
-body_decode = decode
-decodestring = decode
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding. To deal with the various line wrapping issues, use the email.header
+module.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import range
+from future.builtins import bytes
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'decode',
+ 'decodestring',
+ 'header_encode',
+ 'header_length',
+ ]
+
+
+from base64 import b64encode
+from binascii import b2a_base64, a2b_base64
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+# Helpers
+def header_length(bytearray):
+ """Return the length of s when it is encoded with base64."""
+ groups_of_3, leftover = divmod(len(bytearray), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ n = groups_of_3 * 4
+ if leftover:
+ n += 4
+ return n
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with Base64 encoding in a given charset.
+
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1. Base64 encoding is defined in RFC 2045.
+ """
+ if not header_bytes:
+ return ""
+ if isinstance(header_bytes, str):
+ header_bytes = header_bytes.encode(charset)
+ encoded = b64encode(header_bytes).decode("ascii")
+ return '=?%s?b?%s?=' % (charset, encoded)
+
+
+def body_encode(s, maxlinelen=76, eol=NL):
+ r"""Encode a string with base64.
+
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters).
+
+ Each line of encoded text will end with eol, which defaults to "\n". Set
+ this to "\r\n" if you will be using the result of this function directly
+ in an email.
+ """
+ if not s:
+ return s
+
+ encvec = []
+ max_unencoded = maxlinelen * 3 // 4
+ for i in range(0, len(s), max_unencoded):
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
+ if enc.endswith(NL) and eol != NL:
+ enc = enc[:-1] + eol
+ encvec.append(enc)
+ return EMPTYSTRING.join(encvec)
+
+
+def decode(string):
+ """Decode a raw base64 string, returning a bytes object.
+
+ This function does not parse a full MIME header value encoded with
+ base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+ level email.header class for that functionality.
+ """
+ if not string:
+ return bytes()
+ elif isinstance(string, str):
+ return a2b_base64(string.encode('raw-unicode-escape'))
+ else:
+ return a2b_base64(string)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
diff --git a/contrib/python/future/future/backports/email/charset.py b/contrib/python/future/future/backports/email/charset.py
index d9a755acc1..2385ce68f3 100644
--- a/contrib/python/future/future/backports/email/charset.py
+++ b/contrib/python/future/future/backports/email/charset.py
@@ -1,409 +1,409 @@
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import str
-from future.builtins import next
-
-# Copyright (C) 2001-2007 Python Software Foundation
-# Author: Ben Gertzfield, Barry Warsaw
-# Contact: email-sig@python.org
-
-__all__ = [
- 'Charset',
- 'add_alias',
- 'add_charset',
- 'add_codec',
- ]
-
-from functools import partial
-
-from future.backports import email
-from future.backports.email import errors
-from future.backports.email.encoders import encode_7or8bit
-
-
-# Flags for types of header encodings
-QP = 1 # Quoted-Printable
-BASE64 = 2 # Base64
-SHORTEST = 3 # the shorter of QP and base64, but only for headers
-
-# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
-RFC2047_CHROME_LEN = 7
-
-DEFAULT_CHARSET = 'us-ascii'
-UNKNOWN8BIT = 'unknown-8bit'
-EMPTYSTRING = ''
-
-
-# Defaults
-CHARSETS = {
- # input header enc body enc output conv
- 'iso-8859-1': (QP, QP, None),
- 'iso-8859-2': (QP, QP, None),
- 'iso-8859-3': (QP, QP, None),
- 'iso-8859-4': (QP, QP, None),
- # iso-8859-5 is Cyrillic, and not especially used
- # iso-8859-6 is Arabic, also not particularly used
- # iso-8859-7 is Greek, QP will not make it readable
- # iso-8859-8 is Hebrew, QP will not make it readable
- 'iso-8859-9': (QP, QP, None),
- 'iso-8859-10': (QP, QP, None),
- # iso-8859-11 is Thai, QP will not make it readable
- 'iso-8859-13': (QP, QP, None),
- 'iso-8859-14': (QP, QP, None),
- 'iso-8859-15': (QP, QP, None),
- 'iso-8859-16': (QP, QP, None),
- 'windows-1252':(QP, QP, None),
- 'viscii': (QP, QP, None),
- 'us-ascii': (None, None, None),
- 'big5': (BASE64, BASE64, None),
- 'gb2312': (BASE64, BASE64, None),
- 'euc-jp': (BASE64, None, 'iso-2022-jp'),
- 'shift_jis': (BASE64, None, 'iso-2022-jp'),
- 'iso-2022-jp': (BASE64, None, None),
- 'koi8-r': (BASE64, BASE64, None),
- 'utf-8': (SHORTEST, BASE64, 'utf-8'),
- }
-
-# Aliases for other commonly-used names for character sets. Map
-# them to the real ones used in email.
-ALIASES = {
- 'latin_1': 'iso-8859-1',
- 'latin-1': 'iso-8859-1',
- 'latin_2': 'iso-8859-2',
- 'latin-2': 'iso-8859-2',
- 'latin_3': 'iso-8859-3',
- 'latin-3': 'iso-8859-3',
- 'latin_4': 'iso-8859-4',
- 'latin-4': 'iso-8859-4',
- 'latin_5': 'iso-8859-9',
- 'latin-5': 'iso-8859-9',
- 'latin_6': 'iso-8859-10',
- 'latin-6': 'iso-8859-10',
- 'latin_7': 'iso-8859-13',
- 'latin-7': 'iso-8859-13',
- 'latin_8': 'iso-8859-14',
- 'latin-8': 'iso-8859-14',
- 'latin_9': 'iso-8859-15',
- 'latin-9': 'iso-8859-15',
- 'latin_10':'iso-8859-16',
- 'latin-10':'iso-8859-16',
- 'cp949': 'ks_c_5601-1987',
- 'euc_jp': 'euc-jp',
- 'euc_kr': 'euc-kr',
- 'ascii': 'us-ascii',
- }
-
-
-# Map charsets to their Unicode codec strings.
-CODEC_MAP = {
- 'gb2312': 'eucgb2312_cn',
- 'big5': 'big5_tw',
- # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
- # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
- # Let that stuff pass through without conversion to/from Unicode.
- 'us-ascii': None,
- }
-
-
-# Convenience functions for extending the above mappings
-def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
- """Add character set properties to the global registry.
-
- charset is the input character set, and must be the canonical name of a
- character set.
-
- Optional header_enc and body_enc is either Charset.QP for
- quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
- the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
- is only valid for header_enc. It describes how message headers and
- message bodies in the input charset are to be encoded. Default is no
- encoding.
-
- Optional output_charset is the character set that the output should be
- in. Conversions will proceed from input charset, to Unicode, to the
- output charset when the method Charset.convert() is called. The default
- is to output in the same character set as the input.
-
- Both input_charset and output_charset must have Unicode codec entries in
- the module's charset-to-codec mapping; use add_codec(charset, codecname)
- to add codecs the module does not know about. See the codecs module's
- documentation for more information.
- """
- if body_enc == SHORTEST:
- raise ValueError('SHORTEST not allowed for body_enc')
- CHARSETS[charset] = (header_enc, body_enc, output_charset)
-
-
-def add_alias(alias, canonical):
- """Add a character set alias.
-
- alias is the alias name, e.g. latin-1
- canonical is the character set's canonical name, e.g. iso-8859-1
- """
- ALIASES[alias] = canonical
-
-
-def add_codec(charset, codecname):
- """Add a codec that map characters in the given charset to/from Unicode.
-
- charset is the canonical name of a character set. codecname is the name
- of a Python codec, as appropriate for the second argument to the unicode()
- built-in, or to the encode() method of a Unicode string.
- """
- CODEC_MAP[charset] = codecname
-
-
-# Convenience function for encoding strings, taking into account
-# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
-def _encode(string, codec):
- string = str(string)
- if codec == UNKNOWN8BIT:
- return string.encode('ascii', 'surrogateescape')
- else:
- return string.encode(codec)
-
-
-class Charset(object):
- """Map character sets to their email properties.
-
- This class provides information about the requirements imposed on email
- for a specific character set. It also provides convenience routines for
- converting between character sets, given the availability of the
- applicable codecs. Given a character set, it will do its best to provide
- information on how to use that character set in an email in an
- RFC-compliant way.
-
- Certain character sets must be encoded with quoted-printable or base64
- when used in email headers or bodies. Certain character sets must be
- converted outright, and are not allowed in email. Instances of this
- module expose the following information about a character set:
-
- input_charset: The initial character set specified. Common aliases
- are converted to their `official' email names (e.g. latin_1
- is converted to iso-8859-1). Defaults to 7-bit us-ascii.
-
- header_encoding: If the character set must be encoded before it can be
- used in an email header, this attribute will be set to
- Charset.QP (for quoted-printable), Charset.BASE64 (for
- base64 encoding), or Charset.SHORTEST for the shortest of
- QP or BASE64 encoding. Otherwise, it will be None.
-
- body_encoding: Same as header_encoding, but describes the encoding for the
- mail message's body, which indeed may be different than the
- header encoding. Charset.SHORTEST is not allowed for
- body_encoding.
-
- output_charset: Some character sets must be converted before they can be
- used in email headers or bodies. If the input_charset is
- one of them, this attribute will contain the name of the
- charset output will be converted to. Otherwise, it will
- be None.
-
- input_codec: The name of the Python codec used to convert the
- input_charset to Unicode. If no conversion codec is
- necessary, this attribute will be None.
-
- output_codec: The name of the Python codec used to convert Unicode
- to the output_charset. If no conversion codec is necessary,
- this attribute will have the same value as the input_codec.
- """
- def __init__(self, input_charset=DEFAULT_CHARSET):
- # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
- # unicode because its .lower() is locale insensitive. If the argument
- # is already a unicode, we leave it at that, but ensure that the
- # charset is ASCII, as the standard (RFC XXX) requires.
- try:
- if isinstance(input_charset, str):
- input_charset.encode('ascii')
- else:
- input_charset = str(input_charset, 'ascii')
- except UnicodeError:
- raise errors.CharsetError(input_charset)
- input_charset = input_charset.lower()
- # Set the input charset after filtering through the aliases
- self.input_charset = ALIASES.get(input_charset, input_charset)
- # We can try to guess which encoding and conversion to use by the
- # charset_map dictionary. Try that first, but let the user override
- # it.
- henc, benc, conv = CHARSETS.get(self.input_charset,
- (SHORTEST, BASE64, None))
- if not conv:
- conv = self.input_charset
- # Set the attributes, allowing the arguments to override the default.
- self.header_encoding = henc
- self.body_encoding = benc
- self.output_charset = ALIASES.get(conv, conv)
- # Now set the codecs. If one isn't defined for input_charset,
- # guess and try a Unicode codec with the same name as input_codec.
- self.input_codec = CODEC_MAP.get(self.input_charset,
- self.input_charset)
- self.output_codec = CODEC_MAP.get(self.output_charset,
- self.output_charset)
-
- def __str__(self):
- return self.input_charset.lower()
-
- __repr__ = __str__
-
- def __eq__(self, other):
- return str(self) == str(other).lower()
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def get_body_encoding(self):
- """Return the content-transfer-encoding used for body encoding.
-
- This is either the string `quoted-printable' or `base64' depending on
- the encoding used, or it is a function in which case you should call
- the function with a single argument, the Message object being
- encoded. The function should then set the Content-Transfer-Encoding
- header itself to whatever is appropriate.
-
- Returns "quoted-printable" if self.body_encoding is QP.
- Returns "base64" if self.body_encoding is BASE64.
- Returns conversion function otherwise.
- """
- assert self.body_encoding != SHORTEST
- if self.body_encoding == QP:
- return 'quoted-printable'
- elif self.body_encoding == BASE64:
- return 'base64'
- else:
- return encode_7or8bit
-
- def get_output_charset(self):
- """Return the output character set.
-
- This is self.output_charset if that is not None, otherwise it is
- self.input_charset.
- """
- return self.output_charset or self.input_charset
-
- def header_encode(self, string):
- """Header-encode a string by converting it first to bytes.
-
- The type of encoding (base64 or quoted-printable) will be based on
- this charset's `header_encoding`.
-
- :param string: A unicode string for the header. It must be possible
- to encode this string to bytes using the character set's
- output codec.
- :return: The encoded string, with RFC 2047 chrome.
- """
- codec = self.output_codec or 'us-ascii'
- header_bytes = _encode(string, codec)
- # 7bit/8bit encodings return the string unchanged (modulo conversions)
- encoder_module = self._get_encoder(header_bytes)
- if encoder_module is None:
- return string
- return encoder_module.header_encode(header_bytes, codec)
-
- def header_encode_lines(self, string, maxlengths):
- """Header-encode a string by converting it first to bytes.
-
- This is similar to `header_encode()` except that the string is fit
- into maximum line lengths as given by the argument.
-
- :param string: A unicode string for the header. It must be possible
- to encode this string to bytes using the character set's
- output codec.
- :param maxlengths: Maximum line length iterator. Each element
- returned from this iterator will provide the next maximum line
- length. This parameter is used as an argument to built-in next()
- and should never be exhausted. The maximum line lengths should
- not count the RFC 2047 chrome. These line lengths are only a
- hint; the splitter does the best it can.
- :return: Lines of encoded strings, each with RFC 2047 chrome.
- """
- # See which encoding we should use.
- codec = self.output_codec or 'us-ascii'
- header_bytes = _encode(string, codec)
- encoder_module = self._get_encoder(header_bytes)
- encoder = partial(encoder_module.header_encode, charset=codec)
- # Calculate the number of characters that the RFC 2047 chrome will
- # contribute to each line.
- charset = self.get_output_charset()
- extra = len(charset) + RFC2047_CHROME_LEN
- # Now comes the hard part. We must encode bytes but we can't split on
- # bytes because some character sets are variable length and each
- # encoded word must stand on its own. So the problem is you have to
- # encode to bytes to figure out this word's length, but you must split
- # on characters. This causes two problems: first, we don't know how
- # many octets a specific substring of unicode characters will get
- # encoded to, and second, we don't know how many ASCII characters
- # those octets will get encoded to. Unless we try it. Which seems
- # inefficient. In the interest of being correct rather than fast (and
- # in the hope that there will be few encoded headers in any such
- # message), brute force it. :(
- lines = []
- current_line = []
- maxlen = next(maxlengths) - extra
- for character in string:
- current_line.append(character)
- this_line = EMPTYSTRING.join(current_line)
- length = encoder_module.header_length(_encode(this_line, charset))
- if length > maxlen:
- # This last character doesn't fit so pop it off.
- current_line.pop()
- # Does nothing fit on the first line?
- if not lines and not current_line:
- lines.append(None)
- else:
- separator = (' ' if lines else '')
- joined_line = EMPTYSTRING.join(current_line)
- header_bytes = _encode(joined_line, codec)
- lines.append(encoder(header_bytes))
- current_line = [character]
- maxlen = next(maxlengths) - extra
- joined_line = EMPTYSTRING.join(current_line)
- header_bytes = _encode(joined_line, codec)
- lines.append(encoder(header_bytes))
- return lines
-
- def _get_encoder(self, header_bytes):
- if self.header_encoding == BASE64:
- return email.base64mime
- elif self.header_encoding == QP:
- return email.quoprimime
- elif self.header_encoding == SHORTEST:
- len64 = email.base64mime.header_length(header_bytes)
- lenqp = email.quoprimime.header_length(header_bytes)
- if len64 < lenqp:
- return email.base64mime
- else:
- return email.quoprimime
- else:
- return None
-
- def body_encode(self, string):
- """Body-encode a string by converting it first to bytes.
-
- The type of encoding (base64 or quoted-printable) will be based on
- self.body_encoding. If body_encoding is None, we assume the
- output charset is a 7bit encoding, so re-encoding the decoded
- string using the ascii codec produces the correct string version
- of the content.
- """
- if not string:
- return string
- if self.body_encoding is BASE64:
- if isinstance(string, str):
- string = string.encode(self.output_charset)
- return email.base64mime.body_encode(string)
- elif self.body_encoding is QP:
- # quopromime.body_encode takes a string, but operates on it as if
- # it were a list of byte codes. For a (minimal) history on why
- # this is so, see changeset 0cf700464177. To correctly encode a
- # character set, then, we must turn it into pseudo bytes via the
- # latin1 charset, which will encode any byte as a single code point
- # between 0 and 255, which is what body_encode is expecting.
- if isinstance(string, str):
- string = string.encode(self.output_charset)
- string = string.decode('latin1')
- return email.quoprimime.body_encode(string)
- else:
- if isinstance(string, str):
- string = string.encode(self.output_charset).decode('ascii')
- return string
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import str
+from future.builtins import next
+
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+__all__ = [
+ 'Charset',
+ 'add_alias',
+ 'add_charset',
+ 'add_codec',
+ ]
+
+from functools import partial
+
+from future.backports import email
+from future.backports.email import errors
+from future.backports.email.encoders import encode_7or8bit
+
+
+# Flags for types of header encodings
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
+
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+RFC2047_CHROME_LEN = 7
+
+DEFAULT_CHARSET = 'us-ascii'
+UNKNOWN8BIT = 'unknown-8bit'
+EMPTYSTRING = ''
+
+
+# Defaults
+CHARSETS = {
+ # input header enc body enc output conv
+ 'iso-8859-1': (QP, QP, None),
+ 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'iso-8859-16': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
+ 'us-ascii': (None, None, None),
+ 'big5': (BASE64, BASE64, None),
+ 'gb2312': (BASE64, BASE64, None),
+ 'euc-jp': (BASE64, None, 'iso-2022-jp'),
+ 'shift_jis': (BASE64, None, 'iso-2022-jp'),
+ 'iso-2022-jp': (BASE64, None, None),
+ 'koi8-r': (BASE64, BASE64, None),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
+ }
+
+# Aliases for other commonly-used names for character sets. Map
+# them to the real ones used in email.
+ALIASES = {
+ 'latin_1': 'iso-8859-1',
+ 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'latin_10':'iso-8859-16',
+ 'latin-10':'iso-8859-16',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
+ 'ascii': 'us-ascii',
+ }
+
+
+# Map charsets to their Unicode codec strings.
+CODEC_MAP = {
+ 'gb2312': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+ # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+ # Let that stuff pass through without conversion to/from Unicode.
+ 'us-ascii': None,
+ }
+
+
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+ """Add character set properties to the global registry.
+
+ charset is the input character set, and must be the canonical name of a
+ character set.
+
+ Optional header_enc and body_enc is either Charset.QP for
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
+
+ Optional output_charset is the character set that the output should be
+ in. Conversions will proceed from input charset, to Unicode, to the
+ output charset when the method Charset.convert() is called. The default
+ is to output in the same character set as the input.
+
+ Both input_charset and output_charset must have Unicode codec entries in
+ the module's charset-to-codec mapping; use add_codec(charset, codecname)
+ to add codecs the module does not know about. See the codecs module's
+ documentation for more information.
+ """
+ if body_enc == SHORTEST:
+ raise ValueError('SHORTEST not allowed for body_enc')
+ CHARSETS[charset] = (header_enc, body_enc, output_charset)
+
+
+def add_alias(alias, canonical):
+ """Add a character set alias.
+
+ alias is the alias name, e.g. latin-1
+ canonical is the character set's canonical name, e.g. iso-8859-1
+ """
+ ALIASES[alias] = canonical
+
+
+def add_codec(charset, codecname):
+ """Add a codec that map characters in the given charset to/from Unicode.
+
+ charset is the canonical name of a character set. codecname is the name
+ of a Python codec, as appropriate for the second argument to the unicode()
+ built-in, or to the encode() method of a Unicode string.
+ """
+ CODEC_MAP[charset] = codecname
+
+
+# Convenience function for encoding strings, taking into account
+# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
+def _encode(string, codec):
+ string = str(string)
+ if codec == UNKNOWN8BIT:
+ return string.encode('ascii', 'surrogateescape')
+ else:
+ return string.encode(codec)
+
+
+class Charset(object):
+ """Map character sets to their email properties.
+
+ This class provides information about the requirements imposed on email
+ for a specific character set. It also provides convenience routines for
+ converting between character sets, given the availability of the
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
+
+ Certain character sets must be encoded with quoted-printable or base64
+ when used in email headers or bodies. Certain character sets must be
+ converted outright, and are not allowed in email. Instances of this
+ module expose the following information about a character set:
+
+ input_charset: The initial character set specified. Common aliases
+ are converted to their `official' email names (e.g. latin_1
+ is converted to iso-8859-1). Defaults to 7-bit us-ascii.
+
+ header_encoding: If the character set must be encoded before it can be
+ used in an email header, this attribute will be set to
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
+
+ body_encoding: Same as header_encoding, but describes the encoding for the
+ mail message's body, which indeed may be different than the
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
+
+ output_charset: Some character sets must be converted before they can be
+ used in email headers or bodies. If the input_charset is
+ one of them, this attribute will contain the name of the
+ charset output will be converted to. Otherwise, it will
+ be None.
+
+ input_codec: The name of the Python codec used to convert the
+ input_charset to Unicode. If no conversion codec is
+ necessary, this attribute will be None.
+
+ output_codec: The name of the Python codec used to convert Unicode
+ to the output_charset. If no conversion codec is necessary,
+ this attribute will have the same value as the input_codec.
+ """
+ def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
+ # unicode because its .lower() is locale insensitive. If the argument
+ # is already a unicode, we leave it at that, but ensure that the
+ # charset is ASCII, as the standard (RFC XXX) requires.
+ try:
+ if isinstance(input_charset, str):
+ input_charset.encode('ascii')
+ else:
+ input_charset = str(input_charset, 'ascii')
+ except UnicodeError:
+ raise errors.CharsetError(input_charset)
+ input_charset = input_charset.lower()
+ # Set the input charset after filtering through the aliases
+ self.input_charset = ALIASES.get(input_charset, input_charset)
+ # We can try to guess which encoding and conversion to use by the
+ # charset_map dictionary. Try that first, but let the user override
+ # it.
+ henc, benc, conv = CHARSETS.get(self.input_charset,
+ (SHORTEST, BASE64, None))
+ if not conv:
+ conv = self.input_charset
+ # Set the attributes, allowing the arguments to override the default.
+ self.header_encoding = henc
+ self.body_encoding = benc
+ self.output_charset = ALIASES.get(conv, conv)
+ # Now set the codecs. If one isn't defined for input_charset,
+ # guess and try a Unicode codec with the same name as input_codec.
+ self.input_codec = CODEC_MAP.get(self.input_charset,
+ self.input_charset)
+ self.output_codec = CODEC_MAP.get(self.output_charset,
+ self.output_charset)
+
+ def __str__(self):
+ return self.input_charset.lower()
+
+ __repr__ = __str__
+
+ def __eq__(self, other):
+ return str(self) == str(other).lower()
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def get_body_encoding(self):
+ """Return the content-transfer-encoding used for body encoding.
+
+ This is either the string `quoted-printable' or `base64' depending on
+ the encoding used, or it is a function in which case you should call
+ the function with a single argument, the Message object being
+ encoded. The function should then set the Content-Transfer-Encoding
+ header itself to whatever is appropriate.
+
+ Returns "quoted-printable" if self.body_encoding is QP.
+ Returns "base64" if self.body_encoding is BASE64.
+ Returns conversion function otherwise.
+ """
+ assert self.body_encoding != SHORTEST
+ if self.body_encoding == QP:
+ return 'quoted-printable'
+ elif self.body_encoding == BASE64:
+ return 'base64'
+ else:
+ return encode_7or8bit
+
+ def get_output_charset(self):
+ """Return the output character set.
+
+ This is self.output_charset if that is not None, otherwise it is
+ self.input_charset.
+ """
+ return self.output_charset or self.input_charset
+
+ def header_encode(self, string):
+ """Header-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ this charset's `header_encoding`.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :return: The encoded string, with RFC 2047 chrome.
+ """
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ # 7bit/8bit encodings return the string unchanged (modulo conversions)
+ encoder_module = self._get_encoder(header_bytes)
+ if encoder_module is None:
+ return string
+ return encoder_module.header_encode(header_bytes, codec)
+
+ def header_encode_lines(self, string, maxlengths):
+ """Header-encode a string by converting it first to bytes.
+
+ This is similar to `header_encode()` except that the string is fit
+ into maximum line lengths as given by the argument.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :param maxlengths: Maximum line length iterator. Each element
+ returned from this iterator will provide the next maximum line
+ length. This parameter is used as an argument to built-in next()
+ and should never be exhausted. The maximum line lengths should
+ not count the RFC 2047 chrome. These line lengths are only a
+ hint; the splitter does the best it can.
+ :return: Lines of encoded strings, each with RFC 2047 chrome.
+ """
+ # See which encoding we should use.
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ encoder_module = self._get_encoder(header_bytes)
+ encoder = partial(encoder_module.header_encode, charset=codec)
+ # Calculate the number of characters that the RFC 2047 chrome will
+ # contribute to each line.
+ charset = self.get_output_charset()
+ extra = len(charset) + RFC2047_CHROME_LEN
+ # Now comes the hard part. We must encode bytes but we can't split on
+ # bytes because some character sets are variable length and each
+ # encoded word must stand on its own. So the problem is you have to
+ # encode to bytes to figure out this word's length, but you must split
+ # on characters. This causes two problems: first, we don't know how
+ # many octets a specific substring of unicode characters will get
+ # encoded to, and second, we don't know how many ASCII characters
+ # those octets will get encoded to. Unless we try it. Which seems
+ # inefficient. In the interest of being correct rather than fast (and
+ # in the hope that there will be few encoded headers in any such
+ # message), brute force it. :(
+ lines = []
+ current_line = []
+ maxlen = next(maxlengths) - extra
+ for character in string:
+ current_line.append(character)
+ this_line = EMPTYSTRING.join(current_line)
+ length = encoder_module.header_length(_encode(this_line, charset))
+ if length > maxlen:
+ # This last character doesn't fit so pop it off.
+ current_line.pop()
+ # Does nothing fit on the first line?
+ if not lines and not current_line:
+ lines.append(None)
+ else:
+ separator = (' ' if lines else '')
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ current_line = [character]
+ maxlen = next(maxlengths) - extra
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ return lines
+
+ def _get_encoder(self, header_bytes):
+ if self.header_encoding == BASE64:
+ return email.base64mime
+ elif self.header_encoding == QP:
+ return email.quoprimime
+ elif self.header_encoding == SHORTEST:
+ len64 = email.base64mime.header_length(header_bytes)
+ lenqp = email.quoprimime.header_length(header_bytes)
+ if len64 < lenqp:
+ return email.base64mime
+ else:
+ return email.quoprimime
+ else:
+ return None
+
+ def body_encode(self, string):
+ """Body-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.body_encoding. If body_encoding is None, we assume the
+ output charset is a 7bit encoding, so re-encoding the decoded
+ string using the ascii codec produces the correct string version
+ of the content.
+ """
+ if not string:
+ return string
+ if self.body_encoding is BASE64:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ return email.base64mime.body_encode(string)
+ elif self.body_encoding is QP:
+ # quopromime.body_encode takes a string, but operates on it as if
+ # it were a list of byte codes. For a (minimal) history on why
+ # this is so, see changeset 0cf700464177. To correctly encode a
+ # character set, then, we must turn it into pseudo bytes via the
+ # latin1 charset, which will encode any byte as a single code point
+ # between 0 and 255, which is what body_encode is expecting.
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ string = string.decode('latin1')
+ return email.quoprimime.body_encode(string)
+ else:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('ascii')
+ return string
diff --git a/contrib/python/future/future/backports/email/encoders.py b/contrib/python/future/future/backports/email/encoders.py
index 67627b8707..15d2eb4650 100644
--- a/contrib/python/future/future/backports/email/encoders.py
+++ b/contrib/python/future/future/backports/email/encoders.py
@@ -1,90 +1,90 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Encodings and related functions."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import str
-
-__all__ = [
- 'encode_7or8bit',
- 'encode_base64',
- 'encode_noop',
- 'encode_quopri',
- ]
-
-
-try:
- from base64 import encodebytes as _bencode
-except ImportError:
- # Py2 compatibility. TODO: test this!
- from base64 import encodestring as _bencode
-from quopri import encodestring as _encodestring
-
-
-def _qencode(s):
- enc = _encodestring(s, quotetabs=True)
- # Must encode spaces, which quopri.encodestring() doesn't do
- return enc.replace(' ', '=20')
-
-
-def encode_base64(msg):
- """Encode the message's payload in Base64.
-
- Also, add an appropriate Content-Transfer-Encoding header.
- """
- orig = msg.get_payload()
- encdata = str(_bencode(orig), 'ascii')
- msg.set_payload(encdata)
- msg['Content-Transfer-Encoding'] = 'base64'
-
-
-def encode_quopri(msg):
- """Encode the message's payload in quoted-printable.
-
- Also, add an appropriate Content-Transfer-Encoding header.
- """
- orig = msg.get_payload()
- encdata = _qencode(orig)
- msg.set_payload(encdata)
- msg['Content-Transfer-Encoding'] = 'quoted-printable'
-
-
-def encode_7or8bit(msg):
- """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
- orig = msg.get_payload()
- if orig is None:
- # There's no payload. For backwards compatibility we use 7bit
- msg['Content-Transfer-Encoding'] = '7bit'
- return
- # We play a trick to make this go fast. If encoding/decode to ASCII
- # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
- try:
- if isinstance(orig, str):
- orig.encode('ascii')
- else:
- orig.decode('ascii')
- except UnicodeError:
- charset = msg.get_charset()
- output_cset = charset and charset.output_charset
- # iso-2022-* is non-ASCII but encodes to a 7-bit representation
- if output_cset and output_cset.lower().startswith('iso-2022-'):
- msg['Content-Transfer-Encoding'] = '7bit'
- else:
- msg['Content-Transfer-Encoding'] = '8bit'
- else:
- msg['Content-Transfer-Encoding'] = '7bit'
- if not isinstance(orig, str):
- msg.set_payload(orig.decode('ascii', 'surrogateescape'))
-
-
-def encode_noop(msg):
- """Do nothing."""
- # Well, not quite *nothing*: in Python3 we have to turn bytes into a string
- # in our internal surrogateescaped form in order to keep the model
- # consistent.
- orig = msg.get_payload()
- if not isinstance(orig, str):
- msg.set_payload(orig.decode('ascii', 'surrogateescape'))
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Encodings and related functions."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import str
+
+__all__ = [
+ 'encode_7or8bit',
+ 'encode_base64',
+ 'encode_noop',
+ 'encode_quopri',
+ ]
+
+
+try:
+ from base64 import encodebytes as _bencode
+except ImportError:
+ # Py2 compatibility. TODO: test this!
+ from base64 import encodestring as _bencode
+from quopri import encodestring as _encodestring
+
+
+def _qencode(s):
+ enc = _encodestring(s, quotetabs=True)
+ # Must encode spaces, which quopri.encodestring() doesn't do
+ return enc.replace(' ', '=20')
+
+
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = str(_bencode(orig), 'ascii')
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+
+
+def encode_quopri(msg):
+ """Encode the message's payload in quoted-printable.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+
+
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
+ orig = msg.get_payload()
+ if orig is None:
+ # There's no payload. For backwards compatibility we use 7bit
+ msg['Content-Transfer-Encoding'] = '7bit'
+ return
+ # We play a trick to make this go fast. If encoding/decode to ASCII
+ # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ if isinstance(orig, str):
+ orig.encode('ascii')
+ else:
+ orig.decode('ascii')
+ except UnicodeError:
+ charset = msg.get_charset()
+ output_cset = charset and charset.output_charset
+ # iso-2022-* is non-ASCII but encodes to a 7-bit representation
+ if output_cset and output_cset.lower().startswith('iso-2022-'):
+ msg['Content-Transfer-Encoding'] = '7bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+ if not isinstance(orig, str):
+ msg.set_payload(orig.decode('ascii', 'surrogateescape'))
+
+
+def encode_noop(msg):
+ """Do nothing."""
+ # Well, not quite *nothing*: in Python3 we have to turn bytes into a string
+ # in our internal surrogateescaped form in order to keep the model
+ # consistent.
+ orig = msg.get_payload()
+ if not isinstance(orig, str):
+ msg.set_payload(orig.decode('ascii', 'surrogateescape'))
diff --git a/contrib/python/future/future/backports/email/errors.py b/contrib/python/future/future/backports/email/errors.py
index 7333368b52..0fe599cf0a 100644
--- a/contrib/python/future/future/backports/email/errors.py
+++ b/contrib/python/future/future/backports/email/errors.py
@@ -1,111 +1,111 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""email package exception classes."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import super
-
-
-class MessageError(Exception):
- """Base class for errors in the email package."""
-
-
-class MessageParseError(MessageError):
- """Base class for message parsing errors."""
-
-
-class HeaderParseError(MessageParseError):
- """Error while parsing headers."""
-
-
-class BoundaryError(MessageParseError):
- """Couldn't find terminating boundary."""
-
-
-class MultipartConversionError(MessageError, TypeError):
- """Conversion to a multipart is prohibited."""
-
-
-class CharsetError(MessageError):
- """An illegal charset was given."""
-
-
-# These are parsing defects which the parser was able to work around.
-class MessageDefect(ValueError):
- """Base class for a message defect."""
-
- def __init__(self, line=None):
- if line is not None:
- super().__init__(line)
- self.line = line
-
-class NoBoundaryInMultipartDefect(MessageDefect):
- """A message claimed to be a multipart but had no boundary parameter."""
-
-class StartBoundaryNotFoundDefect(MessageDefect):
- """The claimed start boundary was never found."""
-
-class CloseBoundaryNotFoundDefect(MessageDefect):
- """A start boundary was found, but not the corresponding close boundary."""
-
-class FirstHeaderLineIsContinuationDefect(MessageDefect):
- """A message had a continuation line as its first header line."""
-
-class MisplacedEnvelopeHeaderDefect(MessageDefect):
- """A 'Unix-from' header was found in the middle of a header block."""
-
-class MissingHeaderBodySeparatorDefect(MessageDefect):
- """Found line with no leading whitespace and no colon before blank line."""
-# XXX: backward compatibility, just in case (it was never emitted).
-MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
-
-class MultipartInvariantViolationDefect(MessageDefect):
- """A message claimed to be a multipart but no subparts were found."""
-
-class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
- """An invalid content transfer encoding was set on the multipart itself."""
-
-class UndecodableBytesDefect(MessageDefect):
- """Header contained bytes that could not be decoded"""
-
-class InvalidBase64PaddingDefect(MessageDefect):
- """base64 encoded sequence had an incorrect length"""
-
-class InvalidBase64CharactersDefect(MessageDefect):
- """base64 encoded sequence had characters not in base64 alphabet"""
-
-# These errors are specific to header parsing.
-
-class HeaderDefect(MessageDefect):
- """Base class for a header defect."""
-
- def __init__(self, *args, **kw):
- super().__init__(*args, **kw)
-
-class InvalidHeaderDefect(HeaderDefect):
- """Header is not valid, message gives details."""
-
-class HeaderMissingRequiredValue(HeaderDefect):
- """A header that must have a value had none"""
-
-class NonPrintableDefect(HeaderDefect):
- """ASCII characters outside the ascii-printable range found"""
-
- def __init__(self, non_printables):
- super().__init__(non_printables)
- self.non_printables = non_printables
-
- def __str__(self):
- return ("the following ASCII non-printables found in header: "
- "{}".format(self.non_printables))
-
-class ObsoleteHeaderDefect(HeaderDefect):
- """Header uses syntax declared obsolete by RFC 5322"""
-
-class NonASCIILocalPartDefect(HeaderDefect):
- """local_part contains non-ASCII characters"""
- # This defect only occurs during unicode parsing, not when
- # parsing messages decoded from binary.
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""email package exception classes."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+
+
+class MessageError(Exception):
+ """Base class for errors in the email package."""
+
+
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+
+
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+
+
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+
+
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
+
+
+class CharsetError(MessageError):
+ """An illegal charset was given."""
+
+
+# These are parsing defects which the parser was able to work around.
+class MessageDefect(ValueError):
+ """Base class for a message defect."""
+
+ def __init__(self, line=None):
+ if line is not None:
+ super().__init__(line)
+ self.line = line
+
+class NoBoundaryInMultipartDefect(MessageDefect):
+ """A message claimed to be a multipart but had no boundary parameter."""
+
+class StartBoundaryNotFoundDefect(MessageDefect):
+ """The claimed start boundary was never found."""
+
+class CloseBoundaryNotFoundDefect(MessageDefect):
+ """A start boundary was found, but not the corresponding close boundary."""
+
+class FirstHeaderLineIsContinuationDefect(MessageDefect):
+ """A message had a continuation line as its first header line."""
+
+class MisplacedEnvelopeHeaderDefect(MessageDefect):
+ """A 'Unix-from' header was found in the middle of a header block."""
+
+class MissingHeaderBodySeparatorDefect(MessageDefect):
+ """Found line with no leading whitespace and no colon before blank line."""
+# XXX: backward compatibility, just in case (it was never emitted).
+MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
+
+class MultipartInvariantViolationDefect(MessageDefect):
+ """A message claimed to be a multipart but no subparts were found."""
+
+class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
+ """An invalid content transfer encoding was set on the multipart itself."""
+
+class UndecodableBytesDefect(MessageDefect):
+ """Header contained bytes that could not be decoded"""
+
+class InvalidBase64PaddingDefect(MessageDefect):
+ """base64 encoded sequence had an incorrect length"""
+
+class InvalidBase64CharactersDefect(MessageDefect):
+ """base64 encoded sequence had characters not in base64 alphabet"""
+
+# These errors are specific to header parsing.
+
+class HeaderDefect(MessageDefect):
+ """Base class for a header defect."""
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+
+class InvalidHeaderDefect(HeaderDefect):
+ """Header is not valid, message gives details."""
+
+class HeaderMissingRequiredValue(HeaderDefect):
+ """A header that must have a value had none"""
+
+class NonPrintableDefect(HeaderDefect):
+ """ASCII characters outside the ascii-printable range found"""
+
+ def __init__(self, non_printables):
+ super().__init__(non_printables)
+ self.non_printables = non_printables
+
+ def __str__(self):
+ return ("the following ASCII non-printables found in header: "
+ "{}".format(self.non_printables))
+
+class ObsoleteHeaderDefect(HeaderDefect):
+ """Header uses syntax declared obsolete by RFC 5322"""
+
+class NonASCIILocalPartDefect(HeaderDefect):
+ """local_part contains non-ASCII characters"""
+ # This defect only occurs during unicode parsing, not when
+ # parsing messages decoded from binary.
diff --git a/contrib/python/future/future/backports/email/feedparser.py b/contrib/python/future/future/backports/email/feedparser.py
index b7777ef097..935c26e317 100644
--- a/contrib/python/future/future/backports/email/feedparser.py
+++ b/contrib/python/future/future/backports/email/feedparser.py
@@ -1,525 +1,525 @@
-# Copyright (C) 2004-2006 Python Software Foundation
-# Authors: Baxter, Wouters and Warsaw
-# Contact: email-sig@python.org
-
-"""FeedParser - An email feed parser.
-
-The feed parser implements an interface for incrementally parsing an email
-message, line by line. This has advantages for certain applications, such as
-those reading email messages off a socket.
-
-FeedParser.feed() is the primary interface for pushing new data into the
-parser. It returns when there's nothing more it can do with the available
-data. When you have no more data to push into the parser, call .close().
-This completes the parsing and returns the root message object.
-
-The other advantage of this parser is that it will never raise a parsing
-exception. Instead, when it finds something unexpected, it adds a 'defect' to
-the current message. Defects are just instances that live on the message
-object's .defects attribute.
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import object, range, super
-from future.utils import implements_iterator, PY3
-
-__all__ = ['FeedParser', 'BytesFeedParser']
-
-import re
-
-from future.backports.email import errors
-from future.backports.email import message
-from future.backports.email._policybase import compat32
-
-NLCRE = re.compile('\r\n|\r|\n')
-NLCRE_bol = re.compile('(\r\n|\r|\n)')
-NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
-NLCRE_crack = re.compile('(\r\n|\r|\n)')
-# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
-# except controls, SP, and ":".
-headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
-EMPTYSTRING = ''
-NL = '\n'
-
-NeedMoreData = object()
-
-
-# @implements_iterator
-class BufferedSubFile(object):
- """A file-ish object that can have new data loaded into it.
-
- You can also push and pop line-matching predicates onto a stack. When the
- current predicate matches the current line, a false EOF response
- (i.e. empty string) is returned instead. This lets the parser adhere to a
- simple abstraction -- it parses until EOF closes the current message.
- """
- def __init__(self):
- # The last partial line pushed into this object.
- self._partial = ''
- # The list of full, pushed lines, in reverse order
- self._lines = []
- # The stack of false-EOF checking predicates.
- self._eofstack = []
- # A flag indicating whether the file has been closed or not.
- self._closed = False
-
- def push_eof_matcher(self, pred):
- self._eofstack.append(pred)
-
- def pop_eof_matcher(self):
- return self._eofstack.pop()
-
- def close(self):
- # Don't forget any trailing partial line.
- self._lines.append(self._partial)
- self._partial = ''
- self._closed = True
-
- def readline(self):
- if not self._lines:
- if self._closed:
- return ''
- return NeedMoreData
- # Pop the line off the stack and see if it matches the current
- # false-EOF predicate.
- line = self._lines.pop()
- # RFC 2046, section 5.1.2 requires us to recognize outer level
- # boundaries at any level of inner nesting. Do this, but be sure it's
- # in the order of most to least nested.
- for ateof in self._eofstack[::-1]:
- if ateof(line):
- # We're at the false EOF. But push the last line back first.
- self._lines.append(line)
- return ''
- return line
-
- def unreadline(self, line):
- # Let the consumer push a line back into the buffer.
- assert line is not NeedMoreData
- self._lines.append(line)
-
- def push(self, data):
- """Push some new data into this object."""
- # Handle any previous leftovers
- data, self._partial = self._partial + data, ''
- # Crack into lines, but preserve the newlines on the end of each
- parts = NLCRE_crack.split(data)
- # The *ahem* interesting behaviour of re.split when supplied grouping
- # parentheses is that the last element of the resulting list is the
- # data after the final RE. In the case of a NL/CR terminated string,
- # this is the empty string.
- self._partial = parts.pop()
- #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
- # is there a \n to follow later?
- if not self._partial and parts and parts[-1].endswith('\r'):
- self._partial = parts.pop(-2)+parts.pop()
- # parts is a list of strings, alternating between the line contents
- # and the eol character(s). Gather up a list of lines after
- # re-attaching the newlines.
- lines = []
- for i in range(len(parts) // 2):
- lines.append(parts[i*2] + parts[i*2+1])
- self.pushlines(lines)
-
- def pushlines(self, lines):
- # Reverse and insert at the front of the lines.
- self._lines[:0] = lines[::-1]
-
- def __iter__(self):
- return self
-
- def __next__(self):
- line = self.readline()
- if line == '':
- raise StopIteration
- return line
-
-
-class FeedParser(object):
- """A feed-style parser of email."""
-
- def __init__(self, _factory=message.Message, **_3to2kwargs):
- if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
- else: policy = compat32
- """_factory is called with no arguments to create a new message obj
-
- The policy keyword specifies a policy object that controls a number of
- aspects of the parser's operation. The default policy maintains
- backward compatibility.
-
- """
- self._factory = _factory
- self.policy = policy
- try:
- _factory(policy=self.policy)
- self._factory_kwds = lambda: {'policy': self.policy}
- except TypeError:
- # Assume this is an old-style factory
- self._factory_kwds = lambda: {}
- self._input = BufferedSubFile()
- self._msgstack = []
- if PY3:
- self._parse = self._parsegen().__next__
- else:
- self._parse = self._parsegen().next
- self._cur = None
- self._last = None
- self._headersonly = False
-
- # Non-public interface for supporting Parser's headersonly flag
- def _set_headersonly(self):
- self._headersonly = True
-
- def feed(self, data):
- """Push more data into the parser."""
- self._input.push(data)
- self._call_parse()
-
- def _call_parse(self):
- try:
- self._parse()
- except StopIteration:
- pass
-
- def close(self):
- """Parse all remaining data and return the root message object."""
- self._input.close()
- self._call_parse()
- root = self._pop_message()
- assert not self._msgstack
- # Look for final set of defects
- if root.get_content_maintype() == 'multipart' \
- and not root.is_multipart():
- defect = errors.MultipartInvariantViolationDefect()
- self.policy.handle_defect(root, defect)
- return root
-
- def _new_message(self):
- msg = self._factory(**self._factory_kwds())
- if self._cur and self._cur.get_content_type() == 'multipart/digest':
- msg.set_default_type('message/rfc822')
- if self._msgstack:
- self._msgstack[-1].attach(msg)
- self._msgstack.append(msg)
- self._cur = msg
- self._last = msg
-
- def _pop_message(self):
- retval = self._msgstack.pop()
- if self._msgstack:
- self._cur = self._msgstack[-1]
- else:
- self._cur = None
- return retval
-
- def _parsegen(self):
- # Create a new message and start by parsing headers.
- self._new_message()
- headers = []
- # Collect the headers, searching for a line that doesn't match the RFC
- # 2822 header or continuation pattern (including an empty line).
- for line in self._input:
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- if not headerRE.match(line):
- # If we saw the RFC defined header/body separator
- # (i.e. newline), just throw it away. Otherwise the line is
- # part of the body so push it back.
- if not NLCRE.match(line):
- defect = errors.MissingHeaderBodySeparatorDefect()
- self.policy.handle_defect(self._cur, defect)
- self._input.unreadline(line)
- break
- headers.append(line)
- # Done with the headers, so parse them and figure out what we're
- # supposed to see in the body of the message.
- self._parse_headers(headers)
- # Headers-only parsing is a backwards compatibility hack, which was
- # necessary in the older parser, which could raise errors. All
- # remaining lines in the input are thrown into the message body.
- if self._headersonly:
- lines = []
- while True:
- line = self._input.readline()
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- if line == '':
- break
- lines.append(line)
- self._cur.set_payload(EMPTYSTRING.join(lines))
- return
- if self._cur.get_content_type() == 'message/delivery-status':
- # message/delivery-status contains blocks of headers separated by
- # a blank line. We'll represent each header block as a separate
- # nested message object, but the processing is a bit different
- # than standard message/* types because there is no body for the
- # nested messages. A blank line separates the subparts.
- while True:
- self._input.push_eof_matcher(NLCRE.match)
- for retval in self._parsegen():
- if retval is NeedMoreData:
- yield NeedMoreData
- continue
- break
- msg = self._pop_message()
- # We need to pop the EOF matcher in order to tell if we're at
- # the end of the current file, not the end of the last block
- # of message headers.
- self._input.pop_eof_matcher()
- # The input stream must be sitting at the newline or at the
- # EOF. We want to see if we're at the end of this subpart, so
- # first consume the blank line, then test the next line to see
- # if we're at this subpart's EOF.
- while True:
- line = self._input.readline()
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- break
- while True:
- line = self._input.readline()
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- break
- if line == '':
- break
- # Not at EOF so this is a line we're going to need.
- self._input.unreadline(line)
- return
- if self._cur.get_content_maintype() == 'message':
- # The message claims to be a message/* type, then what follows is
- # another RFC 2822 message.
- for retval in self._parsegen():
- if retval is NeedMoreData:
- yield NeedMoreData
- continue
- break
- self._pop_message()
- return
- if self._cur.get_content_maintype() == 'multipart':
- boundary = self._cur.get_boundary()
- if boundary is None:
- # The message /claims/ to be a multipart but it has not
- # defined a boundary. That's a problem which we'll handle by
- # reading everything until the EOF and marking the message as
- # defective.
- defect = errors.NoBoundaryInMultipartDefect()
- self.policy.handle_defect(self._cur, defect)
- lines = []
- for line in self._input:
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- lines.append(line)
- self._cur.set_payload(EMPTYSTRING.join(lines))
- return
- # Make sure a valid content type was specified per RFC 2045:6.4.
- if (self._cur.get('content-transfer-encoding', '8bit').lower()
- not in ('7bit', '8bit', 'binary')):
- defect = errors.InvalidMultipartContentTransferEncodingDefect()
- self.policy.handle_defect(self._cur, defect)
- # Create a line match predicate which matches the inter-part
- # boundary as well as the end-of-multipart boundary. Don't push
- # this onto the input stream until we've scanned past the
- # preamble.
- separator = '--' + boundary
- boundaryre = re.compile(
- '(?P<sep>' + re.escape(separator) +
- r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
- capturing_preamble = True
- preamble = []
- linesep = False
- close_boundary_seen = False
- while True:
- line = self._input.readline()
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- if line == '':
- break
- mo = boundaryre.match(line)
- if mo:
- # If we're looking at the end boundary, we're done with
- # this multipart. If there was a newline at the end of
- # the closing boundary, then we need to initialize the
- # epilogue with the empty string (see below).
- if mo.group('end'):
- close_boundary_seen = True
- linesep = mo.group('linesep')
- break
- # We saw an inter-part boundary. Were we in the preamble?
- if capturing_preamble:
- if preamble:
- # According to RFC 2046, the last newline belongs
- # to the boundary.
- lastline = preamble[-1]
- eolmo = NLCRE_eol.search(lastline)
- if eolmo:
- preamble[-1] = lastline[:-len(eolmo.group(0))]
- self._cur.preamble = EMPTYSTRING.join(preamble)
- capturing_preamble = False
- self._input.unreadline(line)
- continue
- # We saw a boundary separating two parts. Consume any
- # multiple boundary lines that may be following. Our
- # interpretation of RFC 2046 BNF grammar does not produce
- # body parts within such double boundaries.
- while True:
- line = self._input.readline()
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- mo = boundaryre.match(line)
- if not mo:
- self._input.unreadline(line)
- break
- # Recurse to parse this subpart; the input stream points
- # at the subpart's first line.
- self._input.push_eof_matcher(boundaryre.match)
- for retval in self._parsegen():
- if retval is NeedMoreData:
- yield NeedMoreData
- continue
- break
- # Because of RFC 2046, the newline preceding the boundary
- # separator actually belongs to the boundary, not the
- # previous subpart's payload (or epilogue if the previous
- # part is a multipart).
- if self._last.get_content_maintype() == 'multipart':
- epilogue = self._last.epilogue
- if epilogue == '':
- self._last.epilogue = None
- elif epilogue is not None:
- mo = NLCRE_eol.search(epilogue)
- if mo:
- end = len(mo.group(0))
- self._last.epilogue = epilogue[:-end]
- else:
- payload = self._last._payload
- if isinstance(payload, str):
- mo = NLCRE_eol.search(payload)
- if mo:
- payload = payload[:-len(mo.group(0))]
- self._last._payload = payload
- self._input.pop_eof_matcher()
- self._pop_message()
- # Set the multipart up for newline cleansing, which will
- # happen if we're in a nested multipart.
- self._last = self._cur
- else:
- # I think we must be in the preamble
- assert capturing_preamble
- preamble.append(line)
- # We've seen either the EOF or the end boundary. If we're still
- # capturing the preamble, we never saw the start boundary. Note
- # that as a defect and store the captured text as the payload.
- if capturing_preamble:
- defect = errors.StartBoundaryNotFoundDefect()
- self.policy.handle_defect(self._cur, defect)
- self._cur.set_payload(EMPTYSTRING.join(preamble))
- epilogue = []
- for line in self._input:
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- self._cur.epilogue = EMPTYSTRING.join(epilogue)
- return
- # If we're not processing the preamble, then we might have seen
- # EOF without seeing that end boundary...that is also a defect.
- if not close_boundary_seen:
- defect = errors.CloseBoundaryNotFoundDefect()
- self.policy.handle_defect(self._cur, defect)
- return
- # Everything from here to the EOF is epilogue. If the end boundary
- # ended in a newline, we'll need to make sure the epilogue isn't
- # None
- if linesep:
- epilogue = ['']
- else:
- epilogue = []
- for line in self._input:
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- epilogue.append(line)
- # Any CRLF at the front of the epilogue is not technically part of
- # the epilogue. Also, watch out for an empty string epilogue,
- # which means a single newline.
- if epilogue:
- firstline = epilogue[0]
- bolmo = NLCRE_bol.match(firstline)
- if bolmo:
- epilogue[0] = firstline[len(bolmo.group(0)):]
- self._cur.epilogue = EMPTYSTRING.join(epilogue)
- return
- # Otherwise, it's some non-multipart type, so the entire rest of the
- # file contents becomes the payload.
- lines = []
- for line in self._input:
- if line is NeedMoreData:
- yield NeedMoreData
- continue
- lines.append(line)
- self._cur.set_payload(EMPTYSTRING.join(lines))
-
- def _parse_headers(self, lines):
- # Passed a list of lines that make up the headers for the current msg
- lastheader = ''
- lastvalue = []
- for lineno, line in enumerate(lines):
- # Check for continuation
- if line[0] in ' \t':
- if not lastheader:
- # The first line of the headers was a continuation. This
- # is illegal, so let's note the defect, store the illegal
- # line, and ignore it for purposes of headers.
- defect = errors.FirstHeaderLineIsContinuationDefect(line)
- self.policy.handle_defect(self._cur, defect)
- continue
- lastvalue.append(line)
- continue
- if lastheader:
- self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
- lastheader, lastvalue = '', []
- # Check for envelope header, i.e. unix-from
- if line.startswith('From '):
- if lineno == 0:
- # Strip off the trailing newline
- mo = NLCRE_eol.search(line)
- if mo:
- line = line[:-len(mo.group(0))]
- self._cur.set_unixfrom(line)
- continue
- elif lineno == len(lines) - 1:
- # Something looking like a unix-from at the end - it's
- # probably the first line of the body, so push back the
- # line and stop.
- self._input.unreadline(line)
- return
- else:
- # Weirdly placed unix-from line. Note this as a defect
- # and ignore it.
- defect = errors.MisplacedEnvelopeHeaderDefect(line)
- self._cur.defects.append(defect)
- continue
- # Split the line on the colon separating field name from value.
- # There will always be a colon, because if there wasn't the part of
- # the parser that calls us would have started parsing the body.
- i = line.find(':')
- assert i>0, "_parse_headers fed line with no : and no leading WS"
- lastheader = line[:i]
- lastvalue = [line]
- # Done with all the lines, so handle the last header.
- if lastheader:
- self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
-
-
-class BytesFeedParser(FeedParser):
- """Like FeedParser, but feed accepts bytes."""
-
- def feed(self, data):
- super().feed(data.decode('ascii', 'surrogateescape'))
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+
+"""FeedParser - An email feed parser.
+
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+
+The other advantage of this parser is that it will never raise a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import object, range, super
+from future.utils import implements_iterator, PY3
+
+__all__ = ['FeedParser', 'BytesFeedParser']
+
+import re
+
+from future.backports.email import errors
+from future.backports.email import message
+from future.backports.email._policybase import compat32
+
+NLCRE = re.compile('\r\n|\r|\n')
+NLCRE_bol = re.compile('(\r\n|\r|\n)')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile('(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+EMPTYSTRING = ''
+NL = '\n'
+
+NeedMoreData = object()
+
+
+# @implements_iterator
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # The last partial line pushed into this object.
+ self._partial = ''
+ # The list of full, pushed lines, in reverse order
+ self._lines = []
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+
+ def close(self):
+ # Don't forget any trailing partial line.
+ self._lines.append(self._partial)
+ self._partial = ''
+ self._closed = True
+
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.pop()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in self._eofstack[::-1]:
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.append(line)
+ return ''
+ return line
+
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.append(line)
+
+ def push(self, data):
+ """Push some new data into this object."""
+ # Handle any previous leftovers
+ data, self._partial = self._partial + data, ''
+ # Crack into lines, but preserve the newlines on the end of each
+ parts = NLCRE_crack.split(data)
+ # The *ahem* interesting behaviour of re.split when supplied grouping
+ # parentheses is that the last element of the resulting list is the
+ # data after the final RE. In the case of a NL/CR terminated string,
+ # this is the empty string.
+ self._partial = parts.pop()
+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
+ # is there a \n to follow later?
+ if not self._partial and parts and parts[-1].endswith('\r'):
+ self._partial = parts.pop(-2)+parts.pop()
+ # parts is a list of strings, alternating between the line contents
+ # and the eol character(s). Gather up a list of lines after
+ # re-attaching the newlines.
+ lines = []
+ for i in range(len(parts) // 2):
+ lines.append(parts[i*2] + parts[i*2+1])
+ self.pushlines(lines)
+
+ def pushlines(self, lines):
+ # Reverse and insert at the front of the lines.
+ self._lines[:0] = lines[::-1]
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+
+
+class FeedParser(object):
+ """A feed-style parser of email."""
+
+ def __init__(self, _factory=message.Message, **_3to2kwargs):
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = compat32
+ """_factory is called with no arguments to create a new message obj
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._factory = _factory
+ self.policy = policy
+ try:
+ _factory(policy=self.policy)
+ self._factory_kwds = lambda: {'policy': self.policy}
+ except TypeError:
+ # Assume this is an old-style factory
+ self._factory_kwds = lambda: {}
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ if PY3:
+ self._parse = self._parsegen().__next__
+ else:
+ self._parse = self._parsegen().next
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ defect = errors.MultipartInvariantViolationDefect()
+ self.policy.handle_defect(root, defect)
+ return root
+
+ def _new_message(self):
+ msg = self._factory(**self._factory_kwds())
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ defect = errors.MissingHeaderBodySeparatorDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could raise errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ defect = errors.NoBoundaryInMultipartDefect()
+ self.policy.handle_defect(self._cur, defect)
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Make sure a valid content type was specified per RFC 2045:6.4.
+ if (self._cur.get('content-transfer-encoding', '8bit').lower()
+ not in ('7bit', '8bit', 'binary')):
+ defect = errors.InvalidMultipartContentTransferEncodingDefect()
+ self.policy.handle_defect(self._cur, defect)
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P<sep>' + re.escape(separator) +
+ r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ close_boundary_seen = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ close_boundary_seen = True
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last._payload
+ if isinstance(payload, str):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last._payload = payload
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ if capturing_preamble:
+ defect = errors.StartBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If we're not processing the preamble, then we might have seen
+ # EOF without seeing that end boundary...that is also a defect.
+ if not close_boundary_seen:
+ defect = errors.CloseBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ return
+ # Everything from here to the EOF is epilogue. If the end boundary
+ # ended in a newline, we'll need to make sure the epilogue isn't
+ # None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self.policy.handle_defect(self._cur, defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ # There will always be a colon, because if there wasn't the part of
+ # the parser that calls us would have started parsing the body.
+ i = line.find(':')
+ assert i>0, "_parse_headers fed line with no : and no leading WS"
+ lastheader = line[:i]
+ lastvalue = [line]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
+
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/contrib/python/future/future/backports/email/generator.py b/contrib/python/future/future/backports/email/generator.py
index 70374dfc98..53493d0ac5 100644
--- a/contrib/python/future/future/backports/email/generator.py
+++ b/contrib/python/future/future/backports/email/generator.py
@@ -1,498 +1,498 @@
-# Copyright (C) 2001-2010 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Classes to generate plain text from a message object tree."""
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import super
-from future.builtins import str
-
-__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
-
-import re
-import sys
-import time
-import random
-import warnings
-
-from io import StringIO, BytesIO
-from future.backports.email._policybase import compat32
-from future.backports.email.header import Header
-from future.backports.email.utils import _has_surrogates
-import future.backports.email.charset as _charset
-
-UNDERSCORE = '_'
-NL = '\n' # XXX: no longer used by the code below.
-
-fcre = re.compile(r'^From ', re.MULTILINE)
-
-
-class Generator(object):
- """Generates output from a Message object tree.
-
- This basic generator writes the message to the given file object as plain
- text.
- """
- #
- # Public interface
- #
-
- def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs):
- if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
- else: policy = None
- """Create the generator for message flattening.
-
- outfp is the output file-like object for writing the message to. It
- must have a write() method.
-
- Optional mangle_from_ is a flag that, when True (the default), escapes
- From_ lines in the body of the message by putting a `>' in front of
- them.
-
- Optional maxheaderlen specifies the longest length for a non-continued
- header. When a header line is longer (in characters, with tabs
- expanded to 8 spaces) than maxheaderlen, the header will split as
- defined in the Header class. Set maxheaderlen to zero to disable
- header wrapping. The default is 78, as recommended (but not required)
- by RFC 2822.
-
- The policy keyword specifies a policy object that controls a number of
- aspects of the generator's operation. The default policy maintains
- backward compatibility.
-
- """
- self._fp = outfp
- self._mangle_from_ = mangle_from_
- self.maxheaderlen = maxheaderlen
- self.policy = policy
-
- def write(self, s):
- # Just delegate to the file object
- self._fp.write(s)
-
- def flatten(self, msg, unixfrom=False, linesep=None):
- r"""Print the message object tree rooted at msg to the output file
- specified when the Generator instance was created.
-
- unixfrom is a flag that forces the printing of a Unix From_ delimiter
- before the first object in the message tree. If the original message
- has no From_ delimiter, a `standard' one is crafted. By default, this
- is False to inhibit the printing of any From_ delimiter.
-
- Note that for subobjects, no From_ line is printed.
-
- linesep specifies the characters used to indicate a new line in
- the output. The default value is determined by the policy.
-
- """
- # We use the _XXX constants for operating on data that comes directly
- # from the msg, and _encoded_XXX constants for operating on data that
- # has already been converted (to bytes in the BytesGenerator) and
- # inserted into a temporary buffer.
- policy = msg.policy if self.policy is None else self.policy
- if linesep is not None:
- policy = policy.clone(linesep=linesep)
- if self.maxheaderlen is not None:
- policy = policy.clone(max_line_length=self.maxheaderlen)
- self._NL = policy.linesep
- self._encoded_NL = self._encode(self._NL)
- self._EMPTY = ''
- self._encoded_EMTPY = self._encode('')
- # Because we use clone (below) when we recursively process message
- # subparts, and because clone uses the computed policy (not None),
- # submessages will automatically get set to the computed policy when
- # they are processed by this code.
- old_gen_policy = self.policy
- old_msg_policy = msg.policy
- try:
- self.policy = policy
- msg.policy = policy
- if unixfrom:
- ufrom = msg.get_unixfrom()
- if not ufrom:
- ufrom = 'From nobody ' + time.ctime(time.time())
- self.write(ufrom + self._NL)
- self._write(msg)
- finally:
- self.policy = old_gen_policy
- msg.policy = old_msg_policy
-
- def clone(self, fp):
- """Clone this generator with the exact same options."""
- return self.__class__(fp,
- self._mangle_from_,
- None, # Use policy setting, which we've adjusted
- policy=self.policy)
-
- #
- # Protected interface - undocumented ;/
- #
-
- # Note that we use 'self.write' when what we are writing is coming from
- # the source, and self._fp.write when what we are writing is coming from a
- # buffer (because the Bytes subclass has already had a chance to transform
- # the data in its write method in that case). This is an entirely
- # pragmatic split determined by experiment; we could be more general by
- # always using write and having the Bytes subclass write method detect when
- # it has already transformed the input; but, since this whole thing is a
- # hack anyway this seems good enough.
-
- # Similarly, we have _XXX and _encoded_XXX attributes that are used on
- # source and buffer data, respectively.
- _encoded_EMPTY = ''
-
- def _new_buffer(self):
- # BytesGenerator overrides this to return BytesIO.
- return StringIO()
-
- def _encode(self, s):
- # BytesGenerator overrides this to encode strings to bytes.
- return s
-
- def _write_lines(self, lines):
- # We have to transform the line endings.
- if not lines:
- return
- lines = lines.splitlines(True)
- for line in lines[:-1]:
- self.write(line.rstrip('\r\n'))
- self.write(self._NL)
- laststripped = lines[-1].rstrip('\r\n')
- self.write(laststripped)
- if len(lines[-1]) != len(laststripped):
- self.write(self._NL)
-
- def _write(self, msg):
- # We can't write the headers yet because of the following scenario:
- # say a multipart message includes the boundary string somewhere in
- # its body. We'd have to calculate the new boundary /before/ we write
- # the headers so that we can write the correct Content-Type:
- # parameter.
- #
- # The way we do this, so as to make the _handle_*() methods simpler,
- # is to cache any subpart writes into a buffer. The we write the
- # headers and the buffer contents. That way, subpart handlers can
- # Do The Right Thing, and can still modify the Content-Type: header if
- # necessary.
- oldfp = self._fp
- try:
- self._fp = sfp = self._new_buffer()
- self._dispatch(msg)
- finally:
- self._fp = oldfp
- # Write the headers. First we see if the message object wants to
- # handle that itself. If not, we'll do it generically.
- meth = getattr(msg, '_write_headers', None)
- if meth is None:
- self._write_headers(msg)
- else:
- meth(self)
- self._fp.write(sfp.getvalue())
-
- def _dispatch(self, msg):
- # Get the Content-Type: for the message, then try to dispatch to
- # self._handle_<maintype>_<subtype>(). If there's no handler for the
- # full MIME type, then dispatch to self._handle_<maintype>(). If
- # that's missing too, then dispatch to self._writeBody().
- main = msg.get_content_maintype()
- sub = msg.get_content_subtype()
- specific = UNDERSCORE.join((main, sub)).replace('-', '_')
- meth = getattr(self, '_handle_' + specific, None)
- if meth is None:
- generic = main.replace('-', '_')
- meth = getattr(self, '_handle_' + generic, None)
- if meth is None:
- meth = self._writeBody
- meth(msg)
-
- #
- # Default handlers
- #
-
- def _write_headers(self, msg):
- for h, v in msg.raw_items():
- self.write(self.policy.fold(h, v))
- # A blank line always separates headers from body
- self.write(self._NL)
-
- #
- # Handlers for writing types and subtypes
- #
-
- def _handle_text(self, msg):
- payload = msg.get_payload()
- if payload is None:
- return
- if not isinstance(payload, str):
- raise TypeError('string payload expected: %s' % type(payload))
- if _has_surrogates(msg._payload):
- charset = msg.get_param('charset')
- if charset is not None:
- del msg['content-transfer-encoding']
- msg.set_payload(payload, charset)
- payload = msg.get_payload()
- if self._mangle_from_:
- payload = fcre.sub('>From ', payload)
- self._write_lines(payload)
-
- # Default body handler
- _writeBody = _handle_text
-
- def _handle_multipart(self, msg):
- # The trick here is to write out each part separately, merge them all
- # together, and then make sure that the boundary we've chosen isn't
- # present in the payload.
- msgtexts = []
- subparts = msg.get_payload()
- if subparts is None:
- subparts = []
- elif isinstance(subparts, str):
- # e.g. a non-strict parse of a message with no starting boundary.
- self.write(subparts)
- return
- elif not isinstance(subparts, list):
- # Scalar payload
- subparts = [subparts]
- for part in subparts:
- s = self._new_buffer()
- g = self.clone(s)
- g.flatten(part, unixfrom=False, linesep=self._NL)
- msgtexts.append(s.getvalue())
- # BAW: What about boundaries that are wrapped in double-quotes?
- boundary = msg.get_boundary()
- if not boundary:
- # Create a boundary that doesn't appear in any of the
- # message texts.
- alltext = self._encoded_NL.join(msgtexts)
- boundary = self._make_boundary(alltext)
- msg.set_boundary(boundary)
- # If there's a preamble, write it out, with a trailing CRLF
- if msg.preamble is not None:
- if self._mangle_from_:
- preamble = fcre.sub('>From ', msg.preamble)
- else:
- preamble = msg.preamble
- self._write_lines(preamble)
- self.write(self._NL)
- # dash-boundary transport-padding CRLF
- self.write('--' + boundary + self._NL)
- # body-part
- if msgtexts:
- self._fp.write(msgtexts.pop(0))
- # *encapsulation
- # --> delimiter transport-padding
- # --> CRLF body-part
- for body_part in msgtexts:
- # delimiter transport-padding CRLF
- self.write(self._NL + '--' + boundary + self._NL)
- # body-part
- self._fp.write(body_part)
- # close-delimiter transport-padding
- self.write(self._NL + '--' + boundary + '--')
- if msg.epilogue is not None:
- self.write(self._NL)
- if self._mangle_from_:
- epilogue = fcre.sub('>From ', msg.epilogue)
- else:
- epilogue = msg.epilogue
- self._write_lines(epilogue)
-
- def _handle_multipart_signed(self, msg):
- # The contents of signed parts has to stay unmodified in order to keep
- # the signature intact per RFC1847 2.1, so we disable header wrapping.
- # RDM: This isn't enough to completely preserve the part, but it helps.
- p = self.policy
- self.policy = p.clone(max_line_length=0)
- try:
- self._handle_multipart(msg)
- finally:
- self.policy = p
-
- def _handle_message_delivery_status(self, msg):
- # We can't just write the headers directly to self's file object
- # because this will leave an extra newline between the last header
- # block and the boundary. Sigh.
- blocks = []
- for part in msg.get_payload():
- s = self._new_buffer()
- g = self.clone(s)
- g.flatten(part, unixfrom=False, linesep=self._NL)
- text = s.getvalue()
- lines = text.split(self._encoded_NL)
- # Strip off the unnecessary trailing empty line
- if lines and lines[-1] == self._encoded_EMPTY:
- blocks.append(self._encoded_NL.join(lines[:-1]))
- else:
- blocks.append(text)
- # Now join all the blocks with an empty line. This has the lovely
- # effect of separating each block with an empty line, but not adding
- # an extra one after the last one.
- self._fp.write(self._encoded_NL.join(blocks))
-
- def _handle_message(self, msg):
- s = self._new_buffer()
- g = self.clone(s)
- # The payload of a message/rfc822 part should be a multipart sequence
- # of length 1. The zeroth element of the list should be the Message
- # object for the subpart. Extract that object, stringify it, and
- # write it out.
- # Except, it turns out, when it's a string instead, which happens when
- # and only when HeaderParser is used on a message of mime type
- # message/rfc822. Such messages are generated by, for example,
- # Groupwise when forwarding unadorned messages. (Issue 7970.) So
- # in that case we just emit the string body.
- payload = msg._payload
- if isinstance(payload, list):
- g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
- payload = s.getvalue()
- else:
- payload = self._encode(payload)
- self._fp.write(payload)
-
- # This used to be a module level function; we use a classmethod for this
- # and _compile_re so we can continue to provide the module level function
- # for backward compatibility by doing
- # _make_boudary = Generator._make_boundary
- # at the end of the module. It *is* internal, so we could drop that...
- @classmethod
- def _make_boundary(cls, text=None):
- # Craft a random boundary. If text is given, ensure that the chosen
- # boundary doesn't appear in the text.
- token = random.randrange(sys.maxsize)
- boundary = ('=' * 15) + (_fmt % token) + '=='
- if text is None:
- return boundary
- b = boundary
- counter = 0
- while True:
- cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
- if not cre.search(text):
- break
- b = boundary + '.' + str(counter)
- counter += 1
- return b
-
- @classmethod
- def _compile_re(cls, s, flags):
- return re.compile(s, flags)
-
-class BytesGenerator(Generator):
- """Generates a bytes version of a Message object tree.
-
- Functionally identical to the base Generator except that the output is
- bytes and not string. When surrogates were used in the input to encode
- bytes, these are decoded back to bytes for output. If the policy has
- cte_type set to 7bit, then the message is transformed such that the
- non-ASCII bytes are properly content transfer encoded, using the charset
- unknown-8bit.
-
- The outfp object must accept bytes in its write method.
- """
-
- # Bytes versions of this constant for use in manipulating data from
- # the BytesIO buffer.
- _encoded_EMPTY = b''
-
- def write(self, s):
- self._fp.write(str(s).encode('ascii', 'surrogateescape'))
-
- def _new_buffer(self):
- return BytesIO()
-
- def _encode(self, s):
- return s.encode('ascii')
-
- def _write_headers(self, msg):
- # This is almost the same as the string version, except for handling
- # strings with 8bit bytes.
- for h, v in msg.raw_items():
- self._fp.write(self.policy.fold_binary(h, v))
- # A blank line always separates headers from body
- self.write(self._NL)
-
- def _handle_text(self, msg):
- # If the string has surrogates the original source was bytes, so
- # just write it back out.
- if msg._payload is None:
- return
- if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
- if self._mangle_from_:
- msg._payload = fcre.sub(">From ", msg._payload)
- self._write_lines(msg._payload)
- else:
- super(BytesGenerator,self)._handle_text(msg)
-
- # Default body handler
- _writeBody = _handle_text
-
- @classmethod
- def _compile_re(cls, s, flags):
- return re.compile(s.encode('ascii'), flags)
-
-
-_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
-
-class DecodedGenerator(Generator):
- """Generates a text representation of a message.
-
- Like the Generator base class, except that non-text parts are substituted
- with a format string representing the part.
- """
- def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
- """Like Generator.__init__() except that an additional optional
- argument is allowed.
-
- Walks through all subparts of a message. If the subpart is of main
- type `text', then it prints the decoded payload of the subpart.
-
- Otherwise, fmt is a format string that is used instead of the message
- payload. fmt is expanded with the following keywords (in
- %(keyword)s format):
-
- type : Full MIME type of the non-text part
- maintype : Main MIME type of the non-text part
- subtype : Sub-MIME type of the non-text part
- filename : Filename of the non-text part
- description: Description associated with the non-text part
- encoding : Content transfer encoding of the non-text part
-
- The default value for fmt is None, meaning
-
- [Non-text (%(type)s) part of message omitted, filename %(filename)s]
- """
- Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
- if fmt is None:
- self._fmt = _FMT
- else:
- self._fmt = fmt
-
- def _dispatch(self, msg):
- for part in msg.walk():
- maintype = part.get_content_maintype()
- if maintype == 'text':
- print(part.get_payload(decode=False), file=self)
- elif maintype == 'multipart':
- # Just skip this
- pass
- else:
- print(self._fmt % {
- 'type' : part.get_content_type(),
- 'maintype' : part.get_content_maintype(),
- 'subtype' : part.get_content_subtype(),
- 'filename' : part.get_filename('[no filename]'),
- 'description': part.get('Content-Description',
- '[no description]'),
- 'encoding' : part.get('Content-Transfer-Encoding',
- '[no encoding]'),
- }, file=self)
-
-
-# Helper used by Generator._make_boundary
-_width = len(repr(sys.maxsize-1))
-_fmt = '%%0%dd' % _width
-
-# Backward compatibility
-_make_boundary = Generator._make_boundary
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Classes to generate plain text from a message object tree."""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+from future.builtins import str
+
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
+
+import re
+import sys
+import time
+import random
+import warnings
+
+from io import StringIO, BytesIO
+from future.backports.email._policybase import compat32
+from future.backports.email.header import Header
+from future.backports.email.utils import _has_surrogates
+import future.backports.email.charset as _charset
+
+UNDERSCORE = '_'
+NL = '\n' # XXX: no longer used by the code below.
+
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+
+class Generator(object):
+ """Generates output from a Message object tree.
+
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs):
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = None
+ """Create the generator for message flattening.
+
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+
+ Optional mangle_from_ is a flag that, when True (the default), escapes
+ From_ lines in the body of the message by putting a `>' in front of
+ them.
+
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces) than maxheaderlen, the header will split as
+ defined in the Header class. Set maxheaderlen to zero to disable
+ header wrapping. The default is 78, as recommended (but not required)
+ by RFC 2822.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the generator's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self.maxheaderlen = maxheaderlen
+ self.policy = policy
+
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+
+ def flatten(self, msg, unixfrom=False, linesep=None):
+ r"""Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is False to inhibit the printing of any From_ delimiter.
+
+ Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is determined by the policy.
+
+ """
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ policy = msg.policy if self.policy is None else self.policy
+ if linesep is not None:
+ policy = policy.clone(linesep=linesep)
+ if self.maxheaderlen is not None:
+ policy = policy.clone(max_line_length=self.maxheaderlen)
+ self._NL = policy.linesep
+ self._encoded_NL = self._encode(self._NL)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
+ # Because we use clone (below) when we recursively process message
+ # subparts, and because clone uses the computed policy (not None),
+ # submessages will automatically get set to the computed policy when
+ # they are processed by this code.
+ old_gen_policy = self.policy
+ old_msg_policy = msg.policy
+ try:
+ self.policy = policy
+ msg.policy = policy
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ self.write(ufrom + self._NL)
+ self._write(msg)
+ finally:
+ self.policy = old_gen_policy
+ msg.policy = old_msg_policy
+
+ def clone(self, fp):
+ """Clone this generator with the exact same options."""
+ return self.__class__(fp,
+ self._mangle_from_,
+ None, # Use policy setting, which we've adjusted
+ policy=self.policy)
+
+ #
+ # Protected interface - undocumented ;/
+ #
+
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
+
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
+ def _write_lines(self, lines):
+ # We have to transform the line endings.
+ if not lines:
+ return
+ lines = lines.splitlines(True)
+ for line in lines[:-1]:
+ self.write(line.rstrip('\r\n'))
+ self.write(self._NL)
+ laststripped = lines[-1].rstrip('\r\n')
+ self.write(laststripped)
+ if len(lines[-1]) != len(laststripped):
+ self.write(self._NL)
+
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._fp = sfp = self._new_buffer()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle_<maintype>_<subtype>(). If there's no handler for the
+ # full MIME type, then dispatch to self._handle_<maintype>(). If
+ # that's missing too, then dispatch to self._writeBody().
+ main = msg.get_content_maintype()
+ sub = msg.get_content_subtype()
+ specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = main.replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+
+ #
+ # Default handlers
+ #
+
+ def _write_headers(self, msg):
+ for h, v in msg.raw_items():
+ self.write(self.policy.fold(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ #
+ # Handlers for writing types and subtypes
+ #
+
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if payload is None:
+ return
+ if not isinstance(payload, str):
+ raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self._write_lines(payload)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ def _handle_multipart(self, msg):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ subparts = msg.get_payload()
+ if subparts is None:
+ subparts = []
+ elif isinstance(subparts, str):
+ # e.g. a non-strict parse of a message with no starting boundary.
+ self.write(subparts)
+ return
+ elif not isinstance(subparts, list):
+ # Scalar payload
+ subparts = [subparts]
+ for part in subparts:
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ msgtexts.append(s.getvalue())
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary()
+ if not boundary:
+ # Create a boundary that doesn't appear in any of the
+ # message texts.
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
+ msg.set_boundary(boundary)
+ # If there's a preamble, write it out, with a trailing CRLF
+ if msg.preamble is not None:
+ if self._mangle_from_:
+ preamble = fcre.sub('>From ', msg.preamble)
+ else:
+ preamble = msg.preamble
+ self._write_lines(preamble)
+ self.write(self._NL)
+ # dash-boundary transport-padding CRLF
+ self.write('--' + boundary + self._NL)
+ # body-part
+ if msgtexts:
+ self._fp.write(msgtexts.pop(0))
+ # *encapsulation
+ # --> delimiter transport-padding
+ # --> CRLF body-part
+ for body_part in msgtexts:
+ # delimiter transport-padding CRLF
+ self.write(self._NL + '--' + boundary + self._NL)
+ # body-part
+ self._fp.write(body_part)
+ # close-delimiter transport-padding
+ self.write(self._NL + '--' + boundary + '--')
+ if msg.epilogue is not None:
+ self.write(self._NL)
+ if self._mangle_from_:
+ epilogue = fcre.sub('>From ', msg.epilogue)
+ else:
+ epilogue = msg.epilogue
+ self._write_lines(epilogue)
+
+ def _handle_multipart_signed(self, msg):
+ # The contents of signed parts has to stay unmodified in order to keep
+ # the signature intact per RFC1847 2.1, so we disable header wrapping.
+ # RDM: This isn't enough to completely preserve the part, but it helps.
+ p = self.policy
+ self.policy = p.clone(max_line_length=0)
+ try:
+ self._handle_multipart(msg)
+ finally:
+ self.policy = p
+
+ def _handle_message_delivery_status(self, msg):
+ # We can't just write the headers directly to self's file object
+ # because this will leave an extra newline between the last header
+ # block and the boundary. Sigh.
+ blocks = []
+ for part in msg.get_payload():
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ text = s.getvalue()
+ lines = text.split(self._encoded_NL)
+ # Strip off the unnecessary trailing empty line
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
+ else:
+ blocks.append(text)
+ # Now join all the blocks with an empty line. This has the lovely
+ # effect of separating each block with an empty line, but not adding
+ # an extra one after the last one.
+ self._fp.write(self._encoded_NL.join(blocks))
+
+ def _handle_message(self, msg):
+ s = self._new_buffer()
+ g = self.clone(s)
+ # The payload of a message/rfc822 part should be a multipart sequence
+ # of length 1. The zeroth element of the list should be the Message
+ # object for the subpart. Extract that object, stringify it, and
+ # write it out.
+ # Except, it turns out, when it's a string instead, which happens when
+ # and only when HeaderParser is used on a message of mime type
+ # message/rfc822. Such messages are generated by, for example,
+ # Groupwise when forwarding unadorned messages. (Issue 7970.) So
+ # in that case we just emit the string body.
+ payload = msg._payload
+ if isinstance(payload, list):
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
+ payload = s.getvalue()
+ else:
+ payload = self._encode(payload)
+ self._fp.write(payload)
+
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boudary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output. If the policy has
+ cte_type set to 7bit, then the message is transformed such that the
+ non-ASCII bytes are properly content transfer encoded, using the charset
+ unknown-8bit.
+
+ The outfp object must accept bytes in its write method.
+ """
+
+ # Bytes versions of this constant for use in manipulating data from
+ # the BytesIO buffer.
+ _encoded_EMPTY = b''
+
+ def write(self, s):
+ self._fp.write(str(s).encode('ascii', 'surrogateescape'))
+
+ def _new_buffer(self):
+ return BytesIO()
+
+ def _encode(self, s):
+ return s.encode('ascii')
+
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg.raw_items():
+ self._fp.write(self.policy.fold_binary(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
+ if self._mangle_from_:
+ msg._payload = fcre.sub(">From ", msg._payload)
+ self._write_lines(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+
+
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+
+class DecodedGenerator(Generator):
+ """Generates a text representation of a message.
+
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+
+ The default value for fmt is None, meaning
+
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
+ if fmt is None:
+ self._fmt = _FMT
+ else:
+ self._fmt = fmt
+
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ maintype = part.get_content_maintype()
+ if maintype == 'text':
+ print(part.get_payload(decode=False), file=self)
+ elif maintype == 'multipart':
+ # Just skip this
+ pass
+ else:
+ print(self._fmt % {
+ 'type' : part.get_content_type(),
+ 'maintype' : part.get_content_maintype(),
+ 'subtype' : part.get_content_subtype(),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }, file=self)
+
+
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/contrib/python/future/future/backports/email/header.py b/contrib/python/future/future/backports/email/header.py
index 0536e32cf3..63bf038c02 100644
--- a/contrib/python/future/future/backports/email/header.py
+++ b/contrib/python/future/future/backports/email/header.py
@@ -1,581 +1,581 @@
-# Copyright (C) 2002-2007 Python Software Foundation
-# Author: Ben Gertzfield, Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Header encoding and decoding functionality."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import bytes, range, str, super, zip
-
-__all__ = [
- 'Header',
- 'decode_header',
- 'make_header',
- ]
-
-import re
-import binascii
-
-from future.backports import email
-from future.backports.email import base64mime
-from future.backports.email.errors import HeaderParseError
-import future.backports.email.charset as _charset
-
-# Helpers
-from future.backports.email.quoprimime import _max_append, header_decode
-
-Charset = _charset.Charset
-
-NL = '\n'
-SPACE = ' '
-BSPACE = b' '
-SPACE8 = ' ' * 8
-EMPTYSTRING = ''
-MAXLINELEN = 78
-FWS = ' \t'
-
-USASCII = Charset('us-ascii')
-UTF8 = Charset('utf-8')
-
-# Match encoded-word strings in the form =?charset?q?Hello_World?=
-ecre = re.compile(r'''
- =\? # literal =?
- (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
- \? # literal ?
- (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
- \? # literal ?
- (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
- \?= # literal ?=
- ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
-
-# Field name regexp, including trailing colon, but not separating whitespace,
-# according to RFC 2822. Character range is from tilde to exclamation mark.
-# For use with .match()
-fcre = re.compile(r'[\041-\176]+:$')
-
-# Find a header embedded in a putative header value. Used to check for
-# header injection attack.
-_embeded_header = re.compile(r'\n[^ \t]+:')
-
-
-def decode_header(header):
- """Decode a message header value without converting charset.
-
- Returns a list of (string, charset) pairs containing each of the decoded
- parts of the header. Charset is None for non-encoded parts of the header,
- otherwise a lower-case string containing the name of the character set
- specified in the encoded string.
-
- header may be a string that may or may not contain RFC2047 encoded words,
- or it may be a Header object.
-
- An email.errors.HeaderParseError may be raised when certain decoding error
- occurs (e.g. a base64 decoding exception).
- """
- # If it is a Header object, we can just return the encoded chunks.
- if hasattr(header, '_chunks'):
- return [(_charset._encode(string, str(charset)), str(charset))
- for string, charset in header._chunks]
- # If no encoding, just return the header with no charset.
- if not ecre.search(header):
- return [(header, None)]
- # First step is to parse all the encoded parts into triplets of the form
- # (encoded_string, encoding, charset). For unencoded strings, the last
- # two parts will be None.
- words = []
- for line in header.splitlines():
- parts = ecre.split(line)
- first = True
- while parts:
- unencoded = parts.pop(0)
- if first:
- unencoded = unencoded.lstrip()
- first = False
- if unencoded:
- words.append((unencoded, None, None))
- if parts:
- charset = parts.pop(0).lower()
- encoding = parts.pop(0).lower()
- encoded = parts.pop(0)
- words.append((encoded, encoding, charset))
- # Now loop over words and remove words that consist of whitespace
- # between two encoded strings.
- import sys
- droplist = []
- for n, w in enumerate(words):
- if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
- droplist.append(n-1)
- for d in reversed(droplist):
- del words[d]
-
- # The next step is to decode each encoded word by applying the reverse
- # base64 or quopri transformation. decoded_words is now a list of the
- # form (decoded_word, charset).
- decoded_words = []
- for encoded_string, encoding, charset in words:
- if encoding is None:
- # This is an unencoded word.
- decoded_words.append((encoded_string, charset))
- elif encoding == 'q':
- word = header_decode(encoded_string)
- decoded_words.append((word, charset))
- elif encoding == 'b':
- paderr = len(encoded_string) % 4 # Postel's law: add missing padding
- if paderr:
- encoded_string += '==='[:4 - paderr]
- try:
- word = base64mime.decode(encoded_string)
- except binascii.Error:
- raise HeaderParseError('Base64 decoding error')
- else:
- decoded_words.append((word, charset))
- else:
- raise AssertionError('Unexpected encoding: ' + encoding)
- # Now convert all words to bytes and collapse consecutive runs of
- # similarly encoded words.
- collapsed = []
- last_word = last_charset = None
- for word, charset in decoded_words:
- if isinstance(word, str):
- word = bytes(word, 'raw-unicode-escape')
- if last_word is None:
- last_word = word
- last_charset = charset
- elif charset != last_charset:
- collapsed.append((last_word, last_charset))
- last_word = word
- last_charset = charset
- elif last_charset is None:
- last_word += BSPACE + word
- else:
- last_word += word
- collapsed.append((last_word, last_charset))
- return collapsed
-
-
-def make_header(decoded_seq, maxlinelen=None, header_name=None,
- continuation_ws=' '):
- """Create a Header from a sequence of pairs as returned by decode_header()
-
- decode_header() takes a header value string and returns a sequence of
- pairs of the format (decoded_string, charset) where charset is the string
- name of the character set.
-
- This function takes one of those sequence of pairs and returns a Header
- instance. Optional maxlinelen, header_name, and continuation_ws are as in
- the Header constructor.
- """
- h = Header(maxlinelen=maxlinelen, header_name=header_name,
- continuation_ws=continuation_ws)
- for s, charset in decoded_seq:
- # None means us-ascii but we can simply pass it on to h.append()
- if charset is not None and not isinstance(charset, Charset):
- charset = Charset(charset)
- h.append(s, charset)
- return h
-
-
-class Header(object):
- def __init__(self, s=None, charset=None,
- maxlinelen=None, header_name=None,
- continuation_ws=' ', errors='strict'):
- """Create a MIME-compliant header that can contain many character sets.
-
- Optional s is the initial header value. If None, the initial header
- value is not set. You can later append to the header with .append()
- method calls. s may be a byte string or a Unicode string, but see the
- .append() documentation for semantics.
-
- Optional charset serves two purposes: it has the same meaning as the
- charset argument to the .append() method. It also sets the default
- character set for all subsequent .append() calls that omit the charset
- argument. If charset is not provided in the constructor, the us-ascii
- charset is used both as s's initial charset and as the default for
- subsequent .append() calls.
-
- The maximum line length can be specified explicitly via maxlinelen. For
- splitting the first line to a shorter value (to account for the field
- header which isn't included in s, e.g. `Subject') pass in the name of
- the field in header_name. The default maxlinelen is 78 as recommended
- by RFC 2822.
-
- continuation_ws must be RFC 2822 compliant folding whitespace (usually
- either a space or a hard tab) which will be prepended to continuation
- lines.
-
- errors is passed through to the .append() call.
- """
- if charset is None:
- charset = USASCII
- elif not isinstance(charset, Charset):
- charset = Charset(charset)
- self._charset = charset
- self._continuation_ws = continuation_ws
- self._chunks = []
- if s is not None:
- self.append(s, charset, errors)
- if maxlinelen is None:
- maxlinelen = MAXLINELEN
- self._maxlinelen = maxlinelen
- if header_name is None:
- self._headerlen = 0
- else:
- # Take the separating colon and space into account.
- self._headerlen = len(header_name) + 2
-
- def __str__(self):
- """Return the string value of the header."""
- self._normalize()
- uchunks = []
- lastcs = None
- lastspace = None
- for string, charset in self._chunks:
- # We must preserve spaces between encoded and non-encoded word
- # boundaries, which means for us we need to add a space when we go
- # from a charset to None/us-ascii, or from None/us-ascii to a
- # charset. Only do this for the second and subsequent chunks.
- # Don't add a space if the None/us-ascii string already has
- # a space (trailing or leading depending on transition)
- nextcs = charset
- if nextcs == _charset.UNKNOWN8BIT:
- original_bytes = string.encode('ascii', 'surrogateescape')
- string = original_bytes.decode('ascii', 'replace')
- if uchunks:
- hasspace = string and self._nonctext(string[0])
- if lastcs not in (None, 'us-ascii'):
- if nextcs in (None, 'us-ascii') and not hasspace:
- uchunks.append(SPACE)
- nextcs = None
- elif nextcs not in (None, 'us-ascii') and not lastspace:
- uchunks.append(SPACE)
- lastspace = string and self._nonctext(string[-1])
- lastcs = nextcs
- uchunks.append(string)
- return EMPTYSTRING.join(uchunks)
-
- # Rich comparison operators for equality only. BAW: does it make sense to
- # have or explicitly disable <, <=, >, >= operators?
- def __eq__(self, other):
- # other may be a Header or a string. Both are fine so coerce
- # ourselves to a unicode (of the unencoded header value), swap the
- # args and do another comparison.
- return other == str(self)
-
- def __ne__(self, other):
- return not self == other
-
- def append(self, s, charset=None, errors='strict'):
- """Append a string to the MIME header.
-
- Optional charset, if given, should be a Charset instance or the name
- of a character set (which will be converted to a Charset instance). A
- value of None (the default) means that the charset given in the
- constructor is used.
-
- s may be a byte string or a Unicode string. If it is a byte string
- (i.e. isinstance(s, str) is false), then charset is the encoding of
- that byte string, and a UnicodeError will be raised if the string
- cannot be decoded with that charset. If s is a Unicode string, then
- charset is a hint specifying the character set of the characters in
- the string. In either case, when producing an RFC 2822 compliant
- header using RFC 2047 rules, the string will be encoded using the
- output codec of the charset. If the string cannot be encoded to the
- output codec, a UnicodeError will be raised.
-
- Optional `errors' is passed as the errors argument to the decode
- call if s is a byte string.
- """
- if charset is None:
- charset = self._charset
- elif not isinstance(charset, Charset):
- charset = Charset(charset)
- if not isinstance(s, str):
- input_charset = charset.input_codec or 'us-ascii'
- if input_charset == _charset.UNKNOWN8BIT:
- s = s.decode('us-ascii', 'surrogateescape')
- else:
- s = s.decode(input_charset, errors)
- # Ensure that the bytes we're storing can be decoded to the output
- # character set, otherwise an early error is raised.
- output_charset = charset.output_codec or 'us-ascii'
- if output_charset != _charset.UNKNOWN8BIT:
- try:
- s.encode(output_charset, errors)
- except UnicodeEncodeError:
- if output_charset!='us-ascii':
- raise
- charset = UTF8
- self._chunks.append((s, charset))
-
- def _nonctext(self, s):
- """True if string s is not a ctext character of RFC822.
- """
- return s.isspace() or s in ('(', ')', '\\')
-
- def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
- r"""Encode a message header into an RFC-compliant format.
-
- There are many issues involved in converting a given string for use in
- an email header. Only certain character sets are readable in most
- email clients, and as header strings can only contain a subset of
- 7-bit ASCII, care must be taken to properly convert and encode (with
- Base64 or quoted-printable) header strings. In addition, there is a
- 75-character length limit on any given encoded header field, so
- line-wrapping must be performed, even with double-byte character sets.
-
- Optional maxlinelen specifies the maximum length of each generated
- line, exclusive of the linesep string. Individual lines may be longer
- than maxlinelen if a folding point cannot be found. The first line
- will be shorter by the length of the header name plus ": " if a header
- name was specified at Header construction time. The default value for
- maxlinelen is determined at header construction time.
-
- Optional splitchars is a string containing characters which should be
- given extra weight by the splitting algorithm during normal header
- wrapping. This is in very rough support of RFC 2822's `higher level
- syntactic breaks': split points preceded by a splitchar are preferred
- during line splitting, with the characters preferred in the order in
- which they appear in the string. Space and tab may be included in the
- string to indicate whether preference should be given to one over the
- other as a split point when other split chars do not appear in the line
- being split. Splitchars does not affect RFC 2047 encoded lines.
-
- Optional linesep is a string to be used to separate the lines of
- the value. The default value is the most useful for typical
- Python applications, but it can be set to \r\n to produce RFC-compliant
- line separators when needed.
- """
- self._normalize()
- if maxlinelen is None:
- maxlinelen = self._maxlinelen
- # A maxlinelen of 0 means don't wrap. For all practical purposes,
- # choosing a huge number here accomplishes that and makes the
- # _ValueFormatter algorithm much simpler.
- if maxlinelen == 0:
- maxlinelen = 1000000
- formatter = _ValueFormatter(self._headerlen, maxlinelen,
- self._continuation_ws, splitchars)
- lastcs = None
- hasspace = lastspace = None
- for string, charset in self._chunks:
- if hasspace is not None:
- hasspace = string and self._nonctext(string[0])
- import sys
- if lastcs not in (None, 'us-ascii'):
- if not hasspace or charset not in (None, 'us-ascii'):
- formatter.add_transition()
- elif charset not in (None, 'us-ascii') and not lastspace:
- formatter.add_transition()
- lastspace = string and self._nonctext(string[-1])
- lastcs = charset
- hasspace = False
- lines = string.splitlines()
- if lines:
- formatter.feed('', lines[0], charset)
- else:
- formatter.feed('', '', charset)
- for line in lines[1:]:
- formatter.newline()
- if charset.header_encoding is not None:
- formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
- charset)
- else:
- sline = line.lstrip()
- fws = line[:len(line)-len(sline)]
- formatter.feed(fws, sline, charset)
- if len(lines) > 1:
- formatter.newline()
- if self._chunks:
- formatter.add_transition()
- value = formatter._str(linesep)
- if _embeded_header.search(value):
- raise HeaderParseError("header value appears to contain "
- "an embedded header: {!r}".format(value))
- return value
-
- def _normalize(self):
- # Step 1: Normalize the chunks so that all runs of identical charsets
- # get collapsed into a single unicode string.
- chunks = []
- last_charset = None
- last_chunk = []
- for string, charset in self._chunks:
- if charset == last_charset:
- last_chunk.append(string)
- else:
- if last_charset is not None:
- chunks.append((SPACE.join(last_chunk), last_charset))
- last_chunk = [string]
- last_charset = charset
- if last_chunk:
- chunks.append((SPACE.join(last_chunk), last_charset))
- self._chunks = chunks
-
-
-class _ValueFormatter(object):
- def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
- self._maxlen = maxlen
- self._continuation_ws = continuation_ws
- self._continuation_ws_len = len(continuation_ws)
- self._splitchars = splitchars
- self._lines = []
- self._current_line = _Accumulator(headerlen)
-
- def _str(self, linesep):
- self.newline()
- return linesep.join(self._lines)
-
- def __str__(self):
- return self._str(NL)
-
- def newline(self):
- end_of_line = self._current_line.pop()
- if end_of_line != (' ', ''):
- self._current_line.push(*end_of_line)
- if len(self._current_line) > 0:
- if self._current_line.is_onlyws():
- self._lines[-1] += str(self._current_line)
- else:
- self._lines.append(str(self._current_line))
- self._current_line.reset()
-
- def add_transition(self):
- self._current_line.push(' ', '')
-
- def feed(self, fws, string, charset):
- # If the charset has no header encoding (i.e. it is an ASCII encoding)
- # then we must split the header at the "highest level syntactic break"
- # possible. Note that we don't have a lot of smarts about field
- # syntax; we just try to break on semi-colons, then commas, then
- # whitespace. Eventually, this should be pluggable.
- if charset.header_encoding is None:
- self._ascii_split(fws, string, self._splitchars)
- return
- # Otherwise, we're doing either a Base64 or a quoted-printable
- # encoding which means we don't need to split the line on syntactic
- # breaks. We can basically just find enough characters to fit on the
- # current line, minus the RFC 2047 chrome. What makes this trickier
- # though is that we have to split at octet boundaries, not character
- # boundaries but it's only safe to split at character boundaries so at
- # best we can only get close.
- encoded_lines = charset.header_encode_lines(string, self._maxlengths())
- # The first element extends the current line, but if it's None then
- # nothing more fit on the current line so start a new line.
- try:
- first_line = encoded_lines.pop(0)
- except IndexError:
- # There are no encoded lines, so we're done.
- return
- if first_line is not None:
- self._append_chunk(fws, first_line)
- try:
- last_line = encoded_lines.pop()
- except IndexError:
- # There was only one line.
- return
- self.newline()
- self._current_line.push(self._continuation_ws, last_line)
- # Everything else are full lines in themselves.
- for line in encoded_lines:
- self._lines.append(self._continuation_ws + line)
-
- def _maxlengths(self):
- # The first line's length.
- yield self._maxlen - len(self._current_line)
- while True:
- yield self._maxlen - self._continuation_ws_len
-
- def _ascii_split(self, fws, string, splitchars):
- # The RFC 2822 header folding algorithm is simple in principle but
- # complex in practice. Lines may be folded any place where "folding
- # white space" appears by inserting a linesep character in front of the
- # FWS. The complication is that not all spaces or tabs qualify as FWS,
- # and we are also supposed to prefer to break at "higher level
- # syntactic breaks". We can't do either of these without intimate
- # knowledge of the structure of structured headers, which we don't have
- # here. So the best we can do here is prefer to break at the specified
- # splitchars, and hope that we don't choose any spaces or tabs that
- # aren't legal FWS. (This is at least better than the old algorithm,
- # where we would sometimes *introduce* FWS after a splitchar, or the
- # algorithm before that, where we would turn all white space runs into
- # single spaces or tabs.)
- parts = re.split("(["+FWS+"]+)", fws+string)
- if parts[0]:
- parts[:0] = ['']
- else:
- parts.pop(0)
- for fws, part in zip(*[iter(parts)]*2):
- self._append_chunk(fws, part)
-
- def _append_chunk(self, fws, string):
- self._current_line.push(fws, string)
- if len(self._current_line) > self._maxlen:
- # Find the best split point, working backward from the end.
- # There might be none, on a long first line.
- for ch in self._splitchars:
- for i in range(self._current_line.part_count()-1, 0, -1):
- if ch.isspace():
- fws = self._current_line[i][0]
- if fws and fws[0]==ch:
- break
- prevpart = self._current_line[i-1][1]
- if prevpart and prevpart[-1]==ch:
- break
- else:
- continue
- break
- else:
- fws, part = self._current_line.pop()
- if self._current_line._initial_size > 0:
- # There will be a header, so leave it on a line by itself.
- self.newline()
- if not fws:
- # We don't use continuation_ws here because the whitespace
- # after a header should always be a space.
- fws = ' '
- self._current_line.push(fws, part)
- return
- remainder = self._current_line.pop_from(i)
- self._lines.append(str(self._current_line))
- self._current_line.reset(remainder)
-
-
-class _Accumulator(list):
-
- def __init__(self, initial_size=0):
- self._initial_size = initial_size
- super().__init__()
-
- def push(self, fws, string):
- self.append((fws, string))
-
- def pop_from(self, i=0):
- popped = self[i:]
- self[i:] = []
- return popped
-
- def pop(self):
- if self.part_count()==0:
- return ('', '')
- return super().pop()
-
- def __len__(self):
- return sum((len(fws)+len(part) for fws, part in self),
- self._initial_size)
-
- def __str__(self):
- return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
- for fws, part in self))
-
- def reset(self, startval=None):
- if startval is None:
- startval = []
- self[:] = startval
- self._initial_size = 0
-
- def is_onlyws(self):
- return self._initial_size==0 and (not self or str(self).isspace())
-
- def part_count(self):
- return super().__len__()
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Header encoding and decoding functionality."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes, range, str, super, zip
+
+__all__ = [
+ 'Header',
+ 'decode_header',
+ 'make_header',
+ ]
+
+import re
+import binascii
+
+from future.backports import email
+from future.backports.email import base64mime
+from future.backports.email.errors import HeaderParseError
+import future.backports.email.charset as _charset
+
+# Helpers
+from future.backports.email.quoprimime import _max_append, header_decode
+
+Charset = _charset.Charset
+
+NL = '\n'
+SPACE = ' '
+BSPACE = b' '
+SPACE8 = ' ' * 8
+EMPTYSTRING = ''
+MAXLINELEN = 78
+FWS = ' \t'
+
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
+# Find a header embedded in a putative header value. Used to check for
+# header injection attack.
+_embeded_header = re.compile(r'\n[^ \t]+:')
+
+
+def decode_header(header):
+ """Decode a message header value without converting charset.
+
+ Returns a list of (string, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
+
+ header may be a string that may or may not contain RFC2047 encoded words,
+ or it may be a Header object.
+
+ An email.errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
+ """
+ # If it is a Header object, we can just return the encoded chunks.
+ if hasattr(header, '_chunks'):
+ return [(_charset._encode(string, str(charset)), str(charset))
+ for string, charset in header._chunks]
+ # If no encoding, just return the header with no charset.
+ if not ecre.search(header):
+ return [(header, None)]
+ # First step is to parse all the encoded parts into triplets of the form
+ # (encoded_string, encoding, charset). For unencoded strings, the last
+ # two parts will be None.
+ words = []
+ for line in header.splitlines():
+ parts = ecre.split(line)
+ first = True
+ while parts:
+ unencoded = parts.pop(0)
+ if first:
+ unencoded = unencoded.lstrip()
+ first = False
+ if unencoded:
+ words.append((unencoded, None, None))
+ if parts:
+ charset = parts.pop(0).lower()
+ encoding = parts.pop(0).lower()
+ encoded = parts.pop(0)
+ words.append((encoded, encoding, charset))
+ # Now loop over words and remove words that consist of whitespace
+ # between two encoded strings.
+ import sys
+ droplist = []
+ for n, w in enumerate(words):
+ if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
+ droplist.append(n-1)
+ for d in reversed(droplist):
+ del words[d]
+
+ # The next step is to decode each encoded word by applying the reverse
+ # base64 or quopri transformation. decoded_words is now a list of the
+ # form (decoded_word, charset).
+ decoded_words = []
+ for encoded_string, encoding, charset in words:
+ if encoding is None:
+ # This is an unencoded word.
+ decoded_words.append((encoded_string, charset))
+ elif encoding == 'q':
+ word = header_decode(encoded_string)
+ decoded_words.append((word, charset))
+ elif encoding == 'b':
+ paderr = len(encoded_string) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded_string += '==='[:4 - paderr]
+ try:
+ word = base64mime.decode(encoded_string)
+ except binascii.Error:
+ raise HeaderParseError('Base64 decoding error')
+ else:
+ decoded_words.append((word, charset))
+ else:
+ raise AssertionError('Unexpected encoding: ' + encoding)
+ # Now convert all words to bytes and collapse consecutive runs of
+ # similarly encoded words.
+ collapsed = []
+ last_word = last_charset = None
+ for word, charset in decoded_words:
+ if isinstance(word, str):
+ word = bytes(word, 'raw-unicode-escape')
+ if last_word is None:
+ last_word = word
+ last_charset = charset
+ elif charset != last_charset:
+ collapsed.append((last_word, last_charset))
+ last_word = word
+ last_charset = charset
+ elif last_charset is None:
+ last_word += BSPACE + word
+ else:
+ last_word += word
+ collapsed.append((last_word, last_charset))
+ return collapsed
+
+
+def make_header(decoded_seq, maxlinelen=None, header_name=None,
+ continuation_ws=' '):
+ """Create a Header from a sequence of pairs as returned by decode_header()
+
+ decode_header() takes a header value string and returns a sequence of
+ pairs of the format (decoded_string, charset) where charset is the string
+ name of the character set.
+
+ This function takes one of those sequence of pairs and returns a Header
+ instance. Optional maxlinelen, header_name, and continuation_ws are as in
+ the Header constructor.
+ """
+ h = Header(maxlinelen=maxlinelen, header_name=header_name,
+ continuation_ws=continuation_ws)
+ for s, charset in decoded_seq:
+ # None means us-ascii but we can simply pass it on to h.append()
+ if charset is not None and not isinstance(charset, Charset):
+ charset = Charset(charset)
+ h.append(s, charset)
+ return h
+
+
+class Header(object):
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
+ """Create a MIME-compliant header that can contain many character sets.
+
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
+
+ The maximum line length can be specified explicitly via maxlinelen. For
+ splitting the first line to a shorter value (to account for the field
+ header which isn't included in s, e.g. `Subject') pass in the name of
+ the field in header_name. The default maxlinelen is 78 as recommended
+ by RFC 2822.
+
+ continuation_ws must be RFC 2822 compliant folding whitespace (usually
+ either a space or a hard tab) which will be prepended to continuation
+ lines.
+
+ errors is passed through to the .append() call.
+ """
+ if charset is None:
+ charset = USASCII
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ self._continuation_ws = continuation_ws
+ self._chunks = []
+ if s is not None:
+ self.append(s, charset, errors)
+ if maxlinelen is None:
+ maxlinelen = MAXLINELEN
+ self._maxlinelen = maxlinelen
+ if header_name is None:
+ self._headerlen = 0
+ else:
+ # Take the separating colon and space into account.
+ self._headerlen = len(header_name) + 2
+
+ def __str__(self):
+ """Return the string value of the header."""
+ self._normalize()
+ uchunks = []
+ lastcs = None
+ lastspace = None
+ for string, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ # Don't add a space if the None/us-ascii string already has
+ # a space (trailing or leading depending on transition)
+ nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
+ if uchunks:
+ hasspace = string and self._nonctext(string[0])
+ if lastcs not in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii') and not hasspace:
+ uchunks.append(SPACE)
+ nextcs = None
+ elif nextcs not in (None, 'us-ascii') and not lastspace:
+ uchunks.append(SPACE)
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = nextcs
+ uchunks.append(string)
+ return EMPTYSTRING.join(uchunks)
+
+ # Rich comparison operators for equality only. BAW: does it make sense to
+ # have or explicitly disable <, <=, >, >= operators?
+ def __eq__(self, other):
+ # other may be a Header or a string. Both are fine so coerce
+ # ourselves to a unicode (of the unencoded header value), swap the
+ # args and do another comparison.
+ return other == str(self)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def append(self, s, charset=None, errors='strict'):
+ """Append a string to the MIME header.
+
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, str) is false), then charset is the encoding of
+ that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
+
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
+ """
+ if charset is None:
+ charset = self._charset
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ if not isinstance(s, str):
+ input_charset = charset.input_codec or 'us-ascii'
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
+ else:
+ s = s.decode(input_charset, errors)
+ # Ensure that the bytes we're storing can be decoded to the output
+ # character set, otherwise an early error is raised.
+ output_charset = charset.output_codec or 'us-ascii'
+ if output_charset != _charset.UNKNOWN8BIT:
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
+ self._chunks.append((s, charset))
+
+ def _nonctext(self, s):
+ """True if string s is not a ctext character of RFC822.
+ """
+ return s.isspace() or s in ('(', ')', '\\')
+
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
+
+ There are many issues involved in converting a given string for use in
+ an email header. Only certain character sets are readable in most
+ email clients, and as header strings can only contain a subset of
+ 7-bit ASCII, care must be taken to properly convert and encode (with
+ Base64 or quoted-printable) header strings. In addition, there is a
+ 75-character length limit on any given encoded header field, so
+ line-wrapping must be performed, even with double-byte character sets.
+
+ Optional maxlinelen specifies the maximum length of each generated
+ line, exclusive of the linesep string. Individual lines may be longer
+ than maxlinelen if a folding point cannot be found. The first line
+ will be shorter by the length of the header name plus ": " if a header
+ name was specified at Header construction time. The default value for
+ maxlinelen is determined at header construction time.
+
+ Optional splitchars is a string containing characters which should be
+ given extra weight by the splitting algorithm during normal header
+ wrapping. This is in very rough support of RFC 2822's `higher level
+ syntactic breaks': split points preceded by a splitchar are preferred
+ during line splitting, with the characters preferred in the order in
+ which they appear in the string. Space and tab may be included in the
+ string to indicate whether preference should be given to one over the
+ other as a split point when other split chars do not appear in the line
+ being split. Splitchars does not affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+ """
+ self._normalize()
+ if maxlinelen is None:
+ maxlinelen = self._maxlinelen
+ # A maxlinelen of 0 means don't wrap. For all practical purposes,
+ # choosing a huge number here accomplishes that and makes the
+ # _ValueFormatter algorithm much simpler.
+ if maxlinelen == 0:
+ maxlinelen = 1000000
+ formatter = _ValueFormatter(self._headerlen, maxlinelen,
+ self._continuation_ws, splitchars)
+ lastcs = None
+ hasspace = lastspace = None
+ for string, charset in self._chunks:
+ if hasspace is not None:
+ hasspace = string and self._nonctext(string[0])
+ import sys
+ if lastcs not in (None, 'us-ascii'):
+ if not hasspace or charset not in (None, 'us-ascii'):
+ formatter.add_transition()
+ elif charset not in (None, 'us-ascii') and not lastspace:
+ formatter.add_transition()
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = charset
+ hasspace = False
+ lines = string.splitlines()
+ if lines:
+ formatter.feed('', lines[0], charset)
+ else:
+ formatter.feed('', '', charset)
+ for line in lines[1:]:
+ formatter.newline()
+ if charset.header_encoding is not None:
+ formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
+ charset)
+ else:
+ sline = line.lstrip()
+ fws = line[:len(line)-len(sline)]
+ formatter.feed(fws, sline, charset)
+ if len(lines) > 1:
+ formatter.newline()
+ if self._chunks:
+ formatter.add_transition()
+ value = formatter._str(linesep)
+ if _embeded_header.search(value):
+ raise HeaderParseError("header value appears to contain "
+ "an embedded header: {!r}".format(value))
+ return value
+
+ def _normalize(self):
+ # Step 1: Normalize the chunks so that all runs of identical charsets
+ # get collapsed into a single unicode string.
+ chunks = []
+ last_charset = None
+ last_chunk = []
+ for string, charset in self._chunks:
+ if charset == last_charset:
+ last_chunk.append(string)
+ else:
+ if last_charset is not None:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ last_chunk = [string]
+ last_charset = charset
+ if last_chunk:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ self._chunks = chunks
+
+
+class _ValueFormatter(object):
+ def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
+ self._maxlen = maxlen
+ self._continuation_ws = continuation_ws
+ self._continuation_ws_len = len(continuation_ws)
+ self._splitchars = splitchars
+ self._lines = []
+ self._current_line = _Accumulator(headerlen)
+
+ def _str(self, linesep):
+ self.newline()
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
+
+ def newline(self):
+ end_of_line = self._current_line.pop()
+ if end_of_line != (' ', ''):
+ self._current_line.push(*end_of_line)
+ if len(self._current_line) > 0:
+ if self._current_line.is_onlyws():
+ self._lines[-1] += str(self._current_line)
+ else:
+ self._lines.append(str(self._current_line))
+ self._current_line.reset()
+
+ def add_transition(self):
+ self._current_line.push(' ', '')
+
+ def feed(self, fws, string, charset):
+ # If the charset has no header encoding (i.e. it is an ASCII encoding)
+ # then we must split the header at the "highest level syntactic break"
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace. Eventually, this should be pluggable.
+ if charset.header_encoding is None:
+ self._ascii_split(fws, string, self._splitchars)
+ return
+ # Otherwise, we're doing either a Base64 or a quoted-printable
+ # encoding which means we don't need to split the line on syntactic
+ # breaks. We can basically just find enough characters to fit on the
+ # current line, minus the RFC 2047 chrome. What makes this trickier
+ # though is that we have to split at octet boundaries, not character
+ # boundaries but it's only safe to split at character boundaries so at
+ # best we can only get close.
+ encoded_lines = charset.header_encode_lines(string, self._maxlengths())
+ # The first element extends the current line, but if it's None then
+ # nothing more fit on the current line so start a new line.
+ try:
+ first_line = encoded_lines.pop(0)
+ except IndexError:
+ # There are no encoded lines, so we're done.
+ return
+ if first_line is not None:
+ self._append_chunk(fws, first_line)
+ try:
+ last_line = encoded_lines.pop()
+ except IndexError:
+ # There was only one line.
+ return
+ self.newline()
+ self._current_line.push(self._continuation_ws, last_line)
+ # Everything else are full lines in themselves.
+ for line in encoded_lines:
+ self._lines.append(self._continuation_ws + line)
+
+ def _maxlengths(self):
+ # The first line's length.
+ yield self._maxlen - len(self._current_line)
+ while True:
+ yield self._maxlen - self._continuation_ws_len
+
+ def _ascii_split(self, fws, string, splitchars):
+ # The RFC 2822 header folding algorithm is simple in principle but
+ # complex in practice. Lines may be folded any place where "folding
+ # white space" appears by inserting a linesep character in front of the
+ # FWS. The complication is that not all spaces or tabs qualify as FWS,
+ # and we are also supposed to prefer to break at "higher level
+ # syntactic breaks". We can't do either of these without intimate
+ # knowledge of the structure of structured headers, which we don't have
+ # here. So the best we can do here is prefer to break at the specified
+ # splitchars, and hope that we don't choose any spaces or tabs that
+ # aren't legal FWS. (This is at least better than the old algorithm,
+ # where we would sometimes *introduce* FWS after a splitchar, or the
+ # algorithm before that, where we would turn all white space runs into
+ # single spaces or tabs.)
+ parts = re.split("(["+FWS+"]+)", fws+string)
+ if parts[0]:
+ parts[:0] = ['']
+ else:
+ parts.pop(0)
+ for fws, part in zip(*[iter(parts)]*2):
+ self._append_chunk(fws, part)
+
+ def _append_chunk(self, fws, string):
+ self._current_line.push(fws, string)
+ if len(self._current_line) > self._maxlen:
+ # Find the best split point, working backward from the end.
+ # There might be none, on a long first line.
+ for ch in self._splitchars:
+ for i in range(self._current_line.part_count()-1, 0, -1):
+ if ch.isspace():
+ fws = self._current_line[i][0]
+ if fws and fws[0]==ch:
+ break
+ prevpart = self._current_line[i-1][1]
+ if prevpart and prevpart[-1]==ch:
+ break
+ else:
+ continue
+ break
+ else:
+ fws, part = self._current_line.pop()
+ if self._current_line._initial_size > 0:
+ # There will be a header, so leave it on a line by itself.
+ self.newline()
+ if not fws:
+ # We don't use continuation_ws here because the whitespace
+ # after a header should always be a space.
+ fws = ' '
+ self._current_line.push(fws, part)
+ return
+ remainder = self._current_line.pop_from(i)
+ self._lines.append(str(self._current_line))
+ self._current_line.reset(remainder)
+
+
+class _Accumulator(list):
+
+ def __init__(self, initial_size=0):
+ self._initial_size = initial_size
+ super().__init__()
+
+ def push(self, fws, string):
+ self.append((fws, string))
+
+ def pop_from(self, i=0):
+ popped = self[i:]
+ self[i:] = []
+ return popped
+
+ def pop(self):
+ if self.part_count()==0:
+ return ('', '')
+ return super().pop()
+
+ def __len__(self):
+ return sum((len(fws)+len(part) for fws, part in self),
+ self._initial_size)
+
+ def __str__(self):
+ return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
+ for fws, part in self))
+
+ def reset(self, startval=None):
+ if startval is None:
+ startval = []
+ self[:] = startval
+ self._initial_size = 0
+
+ def is_onlyws(self):
+ return self._initial_size==0 and (not self or str(self).isspace())
+
+ def part_count(self):
+ return super().__len__()
diff --git a/contrib/python/future/future/backports/email/headerregistry.py b/contrib/python/future/future/backports/email/headerregistry.py
index 01c806f7af..9aaad65a14 100644
--- a/contrib/python/future/future/backports/email/headerregistry.py
+++ b/contrib/python/future/future/backports/email/headerregistry.py
@@ -1,592 +1,592 @@
-"""Representing and manipulating email headers via custom objects.
-
-This module provides an implementation of the HeaderRegistry API.
-The implementation is designed to flexibly follow RFC5322 rules.
-
-Eventually HeaderRegistry will be a public API, but it isn't yet,
-and will probably change some before that happens.
-
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-from future.builtins import super
-from future.builtins import str
-from future.utils import text_to_native_str
-from future.backports.email import utils
-from future.backports.email import errors
-from future.backports.email import _header_value_parser as parser
-
-class Address(object):
-
- def __init__(self, display_name='', username='', domain='', addr_spec=None):
- """Create an object represeting a full email address.
-
- An address can have a 'display_name', a 'username', and a 'domain'. In
- addition to specifying the username and domain separately, they may be
- specified together by using the addr_spec keyword *instead of* the
- username and domain keywords. If an addr_spec string is specified it
- must be properly quoted according to RFC 5322 rules; an error will be
- raised if it is not.
-
- An Address object has display_name, username, domain, and addr_spec
- attributes, all of which are read-only. The addr_spec and the string
- value of the object are both quoted according to RFC5322 rules, but
- without any Content Transfer Encoding.
-
- """
- # This clause with its potential 'raise' may only happen when an
- # application program creates an Address object using an addr_spec
- # keyword. The email library code itself must always supply username
- # and domain.
- if addr_spec is not None:
- if username or domain:
- raise TypeError("addrspec specified when username and/or "
- "domain also specified")
- a_s, rest = parser.get_addr_spec(addr_spec)
- if rest:
- raise ValueError("Invalid addr_spec; only '{}' "
- "could be parsed from '{}'".format(
- a_s, addr_spec))
- if a_s.all_defects:
- raise a_s.all_defects[0]
- username = a_s.local_part
- domain = a_s.domain
- self._display_name = display_name
- self._username = username
- self._domain = domain
-
- @property
- def display_name(self):
- return self._display_name
-
- @property
- def username(self):
- return self._username
-
- @property
- def domain(self):
- return self._domain
-
- @property
- def addr_spec(self):
- """The addr_spec (username@domain) portion of the address, quoted
- according to RFC 5322 rules, but with no Content Transfer Encoding.
- """
- nameset = set(self.username)
- if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
- lp = parser.quote_string(self.username)
- else:
- lp = self.username
- if self.domain:
- return lp + '@' + self.domain
- if not lp:
- return '<>'
- return lp
-
- def __repr__(self):
- return "Address(display_name={!r}, username={!r}, domain={!r})".format(
- self.display_name, self.username, self.domain)
-
- def __str__(self):
- nameset = set(self.display_name)
- if len(nameset) > len(nameset-parser.SPECIALS):
- disp = parser.quote_string(self.display_name)
- else:
- disp = self.display_name
- if disp:
- addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
- return "{} <{}>".format(disp, addr_spec)
- return self.addr_spec
-
- def __eq__(self, other):
- if type(other) != type(self):
- return False
- return (self.display_name == other.display_name and
- self.username == other.username and
- self.domain == other.domain)
-
-
-class Group(object):
-
- def __init__(self, display_name=None, addresses=None):
- """Create an object representing an address group.
-
- An address group consists of a display_name followed by colon and an
- list of addresses (see Address) terminated by a semi-colon. The Group
- is created by specifying a display_name and a possibly empty list of
- Address objects. A Group can also be used to represent a single
- address that is not in a group, which is convenient when manipulating
- lists that are a combination of Groups and individual Addresses. In
- this case the display_name should be set to None. In particular, the
- string representation of a Group whose display_name is None is the same
- as the Address object, if there is one and only one Address object in
- the addresses list.
-
- """
- self._display_name = display_name
- self._addresses = tuple(addresses) if addresses else tuple()
-
- @property
- def display_name(self):
- return self._display_name
-
- @property
- def addresses(self):
- return self._addresses
-
- def __repr__(self):
- return "Group(display_name={!r}, addresses={!r}".format(
- self.display_name, self.addresses)
-
- def __str__(self):
- if self.display_name is None and len(self.addresses)==1:
- return str(self.addresses[0])
- disp = self.display_name
- if disp is not None:
- nameset = set(disp)
- if len(nameset) > len(nameset-parser.SPECIALS):
- disp = parser.quote_string(disp)
- adrstr = ", ".join(str(x) for x in self.addresses)
- adrstr = ' ' + adrstr if adrstr else adrstr
- return "{}:{};".format(disp, adrstr)
-
- def __eq__(self, other):
- if type(other) != type(self):
- return False
- return (self.display_name == other.display_name and
- self.addresses == other.addresses)
-
-
-# Header Classes #
-
-class BaseHeader(str):
-
- """Base class for message headers.
-
- Implements generic behavior and provides tools for subclasses.
-
- A subclass must define a classmethod named 'parse' that takes an unfolded
- value string and a dictionary as its arguments. The dictionary will
- contain one key, 'defects', initialized to an empty list. After the call
- the dictionary must contain two additional keys: parse_tree, set to the
- parse tree obtained from parsing the header, and 'decoded', set to the
- string value of the idealized representation of the data from the value.
- (That is, encoded words are decoded, and values that have canonical
- representations are so represented.)
-
- The defects key is intended to collect parsing defects, which the message
- parser will subsequently dispose of as appropriate. The parser should not,
- insofar as practical, raise any errors. Defects should be added to the
- list instead. The standard header parsers register defects for RFC
- compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
- errors.
-
- The parse method may add additional keys to the dictionary. In this case
- the subclass must define an 'init' method, which will be passed the
- dictionary as its keyword arguments. The method should use (usually by
- setting them as the value of similarly named attributes) and remove all the
- extra keys added by its parse method, and then use super to call its parent
- class with the remaining arguments and keywords.
-
- The subclass should also make sure that a 'max_count' attribute is defined
- that is either None or 1. XXX: need to better define this API.
-
- """
-
- def __new__(cls, name, value):
- kwds = {'defects': []}
- cls.parse(value, kwds)
- if utils._has_surrogates(kwds['decoded']):
- kwds['decoded'] = utils._sanitize(kwds['decoded'])
- self = str.__new__(cls, kwds['decoded'])
- # del kwds['decoded']
- self.init(name, **kwds)
- return self
-
- def init(self, name, **_3to2kwargs):
- defects = _3to2kwargs['defects']; del _3to2kwargs['defects']
- parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree']
- self._name = name
- self._parse_tree = parse_tree
- self._defects = defects
-
- @property
- def name(self):
- return self._name
-
- @property
- def defects(self):
- return tuple(self._defects)
-
- def __reduce__(self):
- return (
- _reconstruct_header,
- (
- self.__class__.__name__,
- self.__class__.__bases__,
- str(self),
- ),
- self.__dict__)
-
- @classmethod
- def _reconstruct(cls, value):
- return str.__new__(cls, value)
-
- def fold(self, **_3to2kwargs):
- policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
- """Fold header according to policy.
-
- The parsed representation of the header is folded according to
- RFC5322 rules, as modified by the policy. If the parse tree
- contains surrogateescaped bytes, the bytes are CTE encoded using
- the charset 'unknown-8bit".
-
- Any non-ASCII characters in the parse tree are CTE encoded using
- charset utf-8. XXX: make this a policy setting.
-
- The returned value is an ASCII-only string possibly containing linesep
- characters, and ending with a linesep character. The string includes
- the header name and the ': ' separator.
-
- """
- # At some point we need to only put fws here if it was in the source.
- header = parser.Header([
- parser.HeaderLabel([
- parser.ValueTerminal(self.name, 'header-name'),
- parser.ValueTerminal(':', 'header-sep')]),
- parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
- self._parse_tree])
- return header.fold(policy=policy)
-
-
-def _reconstruct_header(cls_name, bases, value):
- return type(text_to_native_str(cls_name), bases, {})._reconstruct(value)
-
-
-class UnstructuredHeader(object):
-
- max_count = None
- value_parser = staticmethod(parser.get_unstructured)
-
- @classmethod
- def parse(cls, value, kwds):
- kwds['parse_tree'] = cls.value_parser(value)
- kwds['decoded'] = str(kwds['parse_tree'])
-
-
-class UniqueUnstructuredHeader(UnstructuredHeader):
-
- max_count = 1
-
-
-class DateHeader(object):
-
- """Header whose value consists of a single timestamp.
-
- Provides an additional attribute, datetime, which is either an aware
- datetime using a timezone, or a naive datetime if the timezone
- in the input string is -0000. Also accepts a datetime as input.
- The 'value' attribute is the normalized form of the timestamp,
- which means it is the output of format_datetime on the datetime.
- """
-
- max_count = None
-
- # This is used only for folding, not for creating 'decoded'.
- value_parser = staticmethod(parser.get_unstructured)
-
- @classmethod
- def parse(cls, value, kwds):
- if not value:
- kwds['defects'].append(errors.HeaderMissingRequiredValue())
- kwds['datetime'] = None
- kwds['decoded'] = ''
- kwds['parse_tree'] = parser.TokenList()
- return
- if isinstance(value, str):
- value = utils.parsedate_to_datetime(value)
- kwds['datetime'] = value
- kwds['decoded'] = utils.format_datetime(kwds['datetime'])
- kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
-
- def init(self, *args, **kw):
- self._datetime = kw.pop('datetime')
- super().init(*args, **kw)
-
- @property
- def datetime(self):
- return self._datetime
-
-
-class UniqueDateHeader(DateHeader):
-
- max_count = 1
-
-
-class AddressHeader(object):
-
- max_count = None
-
- @staticmethod
- def value_parser(value):
- address_list, value = parser.get_address_list(value)
- assert not value, 'this should not happen'
- return address_list
-
- @classmethod
- def parse(cls, value, kwds):
- if isinstance(value, str):
- # We are translating here from the RFC language (address/mailbox)
- # to our API language (group/address).
- kwds['parse_tree'] = address_list = cls.value_parser(value)
- groups = []
- for addr in address_list.addresses:
- groups.append(Group(addr.display_name,
- [Address(mb.display_name or '',
- mb.local_part or '',
- mb.domain or '')
- for mb in addr.all_mailboxes]))
- defects = list(address_list.all_defects)
- else:
- # Assume it is Address/Group stuff
- if not hasattr(value, '__iter__'):
- value = [value]
- groups = [Group(None, [item]) if not hasattr(item, 'addresses')
- else item
- for item in value]
- defects = []
- kwds['groups'] = groups
- kwds['defects'] = defects
- kwds['decoded'] = ', '.join([str(item) for item in groups])
- if 'parse_tree' not in kwds:
- kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
-
- def init(self, *args, **kw):
- self._groups = tuple(kw.pop('groups'))
- self._addresses = None
- super().init(*args, **kw)
-
- @property
- def groups(self):
- return self._groups
-
- @property
- def addresses(self):
- if self._addresses is None:
- self._addresses = tuple([address for group in self._groups
- for address in group.addresses])
- return self._addresses
-
-
-class UniqueAddressHeader(AddressHeader):
-
- max_count = 1
-
-
-class SingleAddressHeader(AddressHeader):
-
- @property
- def address(self):
- if len(self.addresses)!=1:
- raise ValueError(("value of single address header {} is not "
- "a single address").format(self.name))
- return self.addresses[0]
-
-
-class UniqueSingleAddressHeader(SingleAddressHeader):
-
- max_count = 1
-
-
-class MIMEVersionHeader(object):
-
- max_count = 1
-
- value_parser = staticmethod(parser.parse_mime_version)
-
- @classmethod
- def parse(cls, value, kwds):
- kwds['parse_tree'] = parse_tree = cls.value_parser(value)
- kwds['decoded'] = str(parse_tree)
- kwds['defects'].extend(parse_tree.all_defects)
- kwds['major'] = None if parse_tree.minor is None else parse_tree.major
- kwds['minor'] = parse_tree.minor
- if parse_tree.minor is not None:
- kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
- else:
- kwds['version'] = None
-
- def init(self, *args, **kw):
- self._version = kw.pop('version')
- self._major = kw.pop('major')
- self._minor = kw.pop('minor')
- super().init(*args, **kw)
-
- @property
- def major(self):
- return self._major
-
- @property
- def minor(self):
- return self._minor
-
- @property
- def version(self):
- return self._version
-
-
-class ParameterizedMIMEHeader(object):
-
- # Mixin that handles the params dict. Must be subclassed and
- # a property value_parser for the specific header provided.
-
- max_count = 1
-
- @classmethod
- def parse(cls, value, kwds):
- kwds['parse_tree'] = parse_tree = cls.value_parser(value)
- kwds['decoded'] = str(parse_tree)
- kwds['defects'].extend(parse_tree.all_defects)
- if parse_tree.params is None:
- kwds['params'] = {}
- else:
- # The MIME RFCs specify that parameter ordering is arbitrary.
- kwds['params'] = dict((utils._sanitize(name).lower(),
- utils._sanitize(value))
- for name, value in parse_tree.params)
-
- def init(self, *args, **kw):
- self._params = kw.pop('params')
- super().init(*args, **kw)
-
- @property
- def params(self):
- return self._params.copy()
-
-
-class ContentTypeHeader(ParameterizedMIMEHeader):
-
- value_parser = staticmethod(parser.parse_content_type_header)
-
- def init(self, *args, **kw):
- super().init(*args, **kw)
- self._maintype = utils._sanitize(self._parse_tree.maintype)
- self._subtype = utils._sanitize(self._parse_tree.subtype)
-
- @property
- def maintype(self):
- return self._maintype
-
- @property
- def subtype(self):
- return self._subtype
-
- @property
- def content_type(self):
- return self.maintype + '/' + self.subtype
-
-
-class ContentDispositionHeader(ParameterizedMIMEHeader):
-
- value_parser = staticmethod(parser.parse_content_disposition_header)
-
- def init(self, *args, **kw):
- super().init(*args, **kw)
- cd = self._parse_tree.content_disposition
- self._content_disposition = cd if cd is None else utils._sanitize(cd)
-
- @property
- def content_disposition(self):
- return self._content_disposition
-
-
-class ContentTransferEncodingHeader(object):
-
- max_count = 1
-
- value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
-
- @classmethod
- def parse(cls, value, kwds):
- kwds['parse_tree'] = parse_tree = cls.value_parser(value)
- kwds['decoded'] = str(parse_tree)
- kwds['defects'].extend(parse_tree.all_defects)
-
- def init(self, *args, **kw):
- super().init(*args, **kw)
- self._cte = utils._sanitize(self._parse_tree.cte)
-
- @property
- def cte(self):
- return self._cte
-
-
-# The header factory #
-
-_default_header_map = {
- 'subject': UniqueUnstructuredHeader,
- 'date': UniqueDateHeader,
- 'resent-date': DateHeader,
- 'orig-date': UniqueDateHeader,
- 'sender': UniqueSingleAddressHeader,
- 'resent-sender': SingleAddressHeader,
- 'to': UniqueAddressHeader,
- 'resent-to': AddressHeader,
- 'cc': UniqueAddressHeader,
- 'resent-cc': AddressHeader,
- 'bcc': UniqueAddressHeader,
- 'resent-bcc': AddressHeader,
- 'from': UniqueAddressHeader,
- 'resent-from': AddressHeader,
- 'reply-to': UniqueAddressHeader,
- 'mime-version': MIMEVersionHeader,
- 'content-type': ContentTypeHeader,
- 'content-disposition': ContentDispositionHeader,
- 'content-transfer-encoding': ContentTransferEncodingHeader,
- }
-
-class HeaderRegistry(object):
-
- """A header_factory and header registry."""
-
- def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
- use_default_map=True):
- """Create a header_factory that works with the Policy API.
-
- base_class is the class that will be the last class in the created
- header class's __bases__ list. default_class is the class that will be
- used if "name" (see __call__) does not appear in the registry.
- use_default_map controls whether or not the default mapping of names to
- specialized classes is copied in to the registry when the factory is
- created. The default is True.
-
- """
- self.registry = {}
- self.base_class = base_class
- self.default_class = default_class
- if use_default_map:
- self.registry.update(_default_header_map)
-
- def map_to_type(self, name, cls):
- """Register cls as the specialized class for handling "name" headers.
-
- """
- self.registry[name.lower()] = cls
-
- def __getitem__(self, name):
- cls = self.registry.get(name.lower(), self.default_class)
- return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {})
-
- def __call__(self, name, value):
- """Create a header instance for header 'name' from 'value'.
-
- Creates a header instance by creating a specialized class for parsing
- and representing the specified header by combining the factory
- base_class with a specialized class from the registry or the
- default_class, and passing the name and value to the constructed
- class's constructor.
-
- """
- return self[name](name, value)
+"""Representing and manipulating email headers via custom objects.
+
+This module provides an implementation of the HeaderRegistry API.
+The implementation is designed to flexibly follow RFC5322 rules.
+
+Eventually HeaderRegistry will be a public API, but it isn't yet,
+and will probably change some before that happens.
+
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+from future.builtins import super
+from future.builtins import str
+from future.utils import text_to_native_str
+from future.backports.email import utils
+from future.backports.email import errors
+from future.backports.email import _header_value_parser as parser
+
+class Address(object):
+
+ def __init__(self, display_name='', username='', domain='', addr_spec=None):
+ """Create an object represeting a full email address.
+
+ An address can have a 'display_name', a 'username', and a 'domain'. In
+ addition to specifying the username and domain separately, they may be
+ specified together by using the addr_spec keyword *instead of* the
+ username and domain keywords. If an addr_spec string is specified it
+ must be properly quoted according to RFC 5322 rules; an error will be
+ raised if it is not.
+
+ An Address object has display_name, username, domain, and addr_spec
+ attributes, all of which are read-only. The addr_spec and the string
+ value of the object are both quoted according to RFC5322 rules, but
+ without any Content Transfer Encoding.
+
+ """
+ # This clause with its potential 'raise' may only happen when an
+ # application program creates an Address object using an addr_spec
+ # keyword. The email library code itself must always supply username
+ # and domain.
+ if addr_spec is not None:
+ if username or domain:
+ raise TypeError("addrspec specified when username and/or "
+ "domain also specified")
+ a_s, rest = parser.get_addr_spec(addr_spec)
+ if rest:
+ raise ValueError("Invalid addr_spec; only '{}' "
+ "could be parsed from '{}'".format(
+ a_s, addr_spec))
+ if a_s.all_defects:
+ raise a_s.all_defects[0]
+ username = a_s.local_part
+ domain = a_s.domain
+ self._display_name = display_name
+ self._username = username
+ self._domain = domain
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def username(self):
+ return self._username
+
+ @property
+ def domain(self):
+ return self._domain
+
+ @property
+ def addr_spec(self):
+ """The addr_spec (username@domain) portion of the address, quoted
+ according to RFC 5322 rules, but with no Content Transfer Encoding.
+ """
+ nameset = set(self.username)
+ if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
+ lp = parser.quote_string(self.username)
+ else:
+ lp = self.username
+ if self.domain:
+ return lp + '@' + self.domain
+ if not lp:
+ return '<>'
+ return lp
+
+ def __repr__(self):
+ return "Address(display_name={!r}, username={!r}, domain={!r})".format(
+ self.display_name, self.username, self.domain)
+
+ def __str__(self):
+ nameset = set(self.display_name)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(self.display_name)
+ else:
+ disp = self.display_name
+ if disp:
+ addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
+ return "{} <{}>".format(disp, addr_spec)
+ return self.addr_spec
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.username == other.username and
+ self.domain == other.domain)
+
+
+class Group(object):
+
+ def __init__(self, display_name=None, addresses=None):
+ """Create an object representing an address group.
+
+ An address group consists of a display_name followed by colon and an
+ list of addresses (see Address) terminated by a semi-colon. The Group
+ is created by specifying a display_name and a possibly empty list of
+ Address objects. A Group can also be used to represent a single
+ address that is not in a group, which is convenient when manipulating
+ lists that are a combination of Groups and individual Addresses. In
+ this case the display_name should be set to None. In particular, the
+ string representation of a Group whose display_name is None is the same
+ as the Address object, if there is one and only one Address object in
+ the addresses list.
+
+ """
+ self._display_name = display_name
+ self._addresses = tuple(addresses) if addresses else tuple()
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def addresses(self):
+ return self._addresses
+
+ def __repr__(self):
+ return "Group(display_name={!r}, addresses={!r}".format(
+ self.display_name, self.addresses)
+
+ def __str__(self):
+ if self.display_name is None and len(self.addresses)==1:
+ return str(self.addresses[0])
+ disp = self.display_name
+ if disp is not None:
+ nameset = set(disp)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(disp)
+ adrstr = ", ".join(str(x) for x in self.addresses)
+ adrstr = ' ' + adrstr if adrstr else adrstr
+ return "{}:{};".format(disp, adrstr)
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.addresses == other.addresses)
+
+
+# Header Classes #
+
+class BaseHeader(str):
+
+ """Base class for message headers.
+
+ Implements generic behavior and provides tools for subclasses.
+
+ A subclass must define a classmethod named 'parse' that takes an unfolded
+ value string and a dictionary as its arguments. The dictionary will
+ contain one key, 'defects', initialized to an empty list. After the call
+ the dictionary must contain two additional keys: parse_tree, set to the
+ parse tree obtained from parsing the header, and 'decoded', set to the
+ string value of the idealized representation of the data from the value.
+ (That is, encoded words are decoded, and values that have canonical
+ representations are so represented.)
+
+ The defects key is intended to collect parsing defects, which the message
+ parser will subsequently dispose of as appropriate. The parser should not,
+ insofar as practical, raise any errors. Defects should be added to the
+ list instead. The standard header parsers register defects for RFC
+ compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
+ errors.
+
+ The parse method may add additional keys to the dictionary. In this case
+ the subclass must define an 'init' method, which will be passed the
+ dictionary as its keyword arguments. The method should use (usually by
+ setting them as the value of similarly named attributes) and remove all the
+ extra keys added by its parse method, and then use super to call its parent
+ class with the remaining arguments and keywords.
+
+ The subclass should also make sure that a 'max_count' attribute is defined
+ that is either None or 1. XXX: need to better define this API.
+
+ """
+
+ def __new__(cls, name, value):
+ kwds = {'defects': []}
+ cls.parse(value, kwds)
+ if utils._has_surrogates(kwds['decoded']):
+ kwds['decoded'] = utils._sanitize(kwds['decoded'])
+ self = str.__new__(cls, kwds['decoded'])
+ # del kwds['decoded']
+ self.init(name, **kwds)
+ return self
+
+ def init(self, name, **_3to2kwargs):
+ defects = _3to2kwargs['defects']; del _3to2kwargs['defects']
+ parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree']
+ self._name = name
+ self._parse_tree = parse_tree
+ self._defects = defects
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def defects(self):
+ return tuple(self._defects)
+
+ def __reduce__(self):
+ return (
+ _reconstruct_header,
+ (
+ self.__class__.__name__,
+ self.__class__.__bases__,
+ str(self),
+ ),
+ self.__dict__)
+
+ @classmethod
+ def _reconstruct(cls, value):
+ return str.__new__(cls, value)
+
+ def fold(self, **_3to2kwargs):
+ policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ """Fold header according to policy.
+
+ The parsed representation of the header is folded according to
+ RFC5322 rules, as modified by the policy. If the parse tree
+ contains surrogateescaped bytes, the bytes are CTE encoded using
+ the charset 'unknown-8bit".
+
+ Any non-ASCII characters in the parse tree are CTE encoded using
+ charset utf-8. XXX: make this a policy setting.
+
+ The returned value is an ASCII-only string possibly containing linesep
+ characters, and ending with a linesep character. The string includes
+ the header name and the ': ' separator.
+
+ """
+ # At some point we need to only put fws here if it was in the source.
+ header = parser.Header([
+ parser.HeaderLabel([
+ parser.ValueTerminal(self.name, 'header-name'),
+ parser.ValueTerminal(':', 'header-sep')]),
+ parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
+ self._parse_tree])
+ return header.fold(policy=policy)
+
+
+def _reconstruct_header(cls_name, bases, value):
+ return type(text_to_native_str(cls_name), bases, {})._reconstruct(value)
+
+
+class UnstructuredHeader(object):
+
+ max_count = None
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = cls.value_parser(value)
+ kwds['decoded'] = str(kwds['parse_tree'])
+
+
+class UniqueUnstructuredHeader(UnstructuredHeader):
+
+ max_count = 1
+
+
+class DateHeader(object):
+
+ """Header whose value consists of a single timestamp.
+
+ Provides an additional attribute, datetime, which is either an aware
+ datetime using a timezone, or a naive datetime if the timezone
+ in the input string is -0000. Also accepts a datetime as input.
+ The 'value' attribute is the normalized form of the timestamp,
+ which means it is the output of format_datetime on the datetime.
+ """
+
+ max_count = None
+
+ # This is used only for folding, not for creating 'decoded'.
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if not value:
+ kwds['defects'].append(errors.HeaderMissingRequiredValue())
+ kwds['datetime'] = None
+ kwds['decoded'] = ''
+ kwds['parse_tree'] = parser.TokenList()
+ return
+ if isinstance(value, str):
+ value = utils.parsedate_to_datetime(value)
+ kwds['datetime'] = value
+ kwds['decoded'] = utils.format_datetime(kwds['datetime'])
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._datetime = kw.pop('datetime')
+ super().init(*args, **kw)
+
+ @property
+ def datetime(self):
+ return self._datetime
+
+
+class UniqueDateHeader(DateHeader):
+
+ max_count = 1
+
+
+class AddressHeader(object):
+
+ max_count = None
+
+ @staticmethod
+ def value_parser(value):
+ address_list, value = parser.get_address_list(value)
+ assert not value, 'this should not happen'
+ return address_list
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if isinstance(value, str):
+ # We are translating here from the RFC language (address/mailbox)
+ # to our API language (group/address).
+ kwds['parse_tree'] = address_list = cls.value_parser(value)
+ groups = []
+ for addr in address_list.addresses:
+ groups.append(Group(addr.display_name,
+ [Address(mb.display_name or '',
+ mb.local_part or '',
+ mb.domain or '')
+ for mb in addr.all_mailboxes]))
+ defects = list(address_list.all_defects)
+ else:
+ # Assume it is Address/Group stuff
+ if not hasattr(value, '__iter__'):
+ value = [value]
+ groups = [Group(None, [item]) if not hasattr(item, 'addresses')
+ else item
+ for item in value]
+ defects = []
+ kwds['groups'] = groups
+ kwds['defects'] = defects
+ kwds['decoded'] = ', '.join([str(item) for item in groups])
+ if 'parse_tree' not in kwds:
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._groups = tuple(kw.pop('groups'))
+ self._addresses = None
+ super().init(*args, **kw)
+
+ @property
+ def groups(self):
+ return self._groups
+
+ @property
+ def addresses(self):
+ if self._addresses is None:
+ self._addresses = tuple([address for group in self._groups
+ for address in group.addresses])
+ return self._addresses
+
+
+class UniqueAddressHeader(AddressHeader):
+
+ max_count = 1
+
+
+class SingleAddressHeader(AddressHeader):
+
+ @property
+ def address(self):
+ if len(self.addresses)!=1:
+ raise ValueError(("value of single address header {} is not "
+ "a single address").format(self.name))
+ return self.addresses[0]
+
+
+class UniqueSingleAddressHeader(SingleAddressHeader):
+
+ max_count = 1
+
+
+class MIMEVersionHeader(object):
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_mime_version)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+ kwds['minor'] = parse_tree.minor
+ if parse_tree.minor is not None:
+ kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+ else:
+ kwds['version'] = None
+
+ def init(self, *args, **kw):
+ self._version = kw.pop('version')
+ self._major = kw.pop('major')
+ self._minor = kw.pop('minor')
+ super().init(*args, **kw)
+
+ @property
+ def major(self):
+ return self._major
+
+ @property
+ def minor(self):
+ return self._minor
+
+ @property
+ def version(self):
+ return self._version
+
+
+class ParameterizedMIMEHeader(object):
+
+ # Mixin that handles the params dict. Must be subclassed and
+ # a property value_parser for the specific header provided.
+
+ max_count = 1
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ if parse_tree.params is None:
+ kwds['params'] = {}
+ else:
+ # The MIME RFCs specify that parameter ordering is arbitrary.
+ kwds['params'] = dict((utils._sanitize(name).lower(),
+ utils._sanitize(value))
+ for name, value in parse_tree.params)
+
+ def init(self, *args, **kw):
+ self._params = kw.pop('params')
+ super().init(*args, **kw)
+
+ @property
+ def params(self):
+ return self._params.copy()
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_type_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._maintype = utils._sanitize(self._parse_tree.maintype)
+ self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+ @property
+ def maintype(self):
+ return self._maintype
+
+ @property
+ def subtype(self):
+ return self._subtype
+
+ @property
+ def content_type(self):
+ return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_disposition_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ cd = self._parse_tree.content_disposition
+ self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+ @property
+ def content_disposition(self):
+ return self._content_disposition
+
+
+class ContentTransferEncodingHeader(object):
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._cte = utils._sanitize(self._parse_tree.cte)
+
+ @property
+ def cte(self):
+ return self._cte
+
+
+# The header factory #
+
+_default_header_map = {
+ 'subject': UniqueUnstructuredHeader,
+ 'date': UniqueDateHeader,
+ 'resent-date': DateHeader,
+ 'orig-date': UniqueDateHeader,
+ 'sender': UniqueSingleAddressHeader,
+ 'resent-sender': SingleAddressHeader,
+ 'to': UniqueAddressHeader,
+ 'resent-to': AddressHeader,
+ 'cc': UniqueAddressHeader,
+ 'resent-cc': AddressHeader,
+ 'bcc': UniqueAddressHeader,
+ 'resent-bcc': AddressHeader,
+ 'from': UniqueAddressHeader,
+ 'resent-from': AddressHeader,
+ 'reply-to': UniqueAddressHeader,
+ 'mime-version': MIMEVersionHeader,
+ 'content-type': ContentTypeHeader,
+ 'content-disposition': ContentDispositionHeader,
+ 'content-transfer-encoding': ContentTransferEncodingHeader,
+ }
+
+class HeaderRegistry(object):
+
+ """A header_factory and header registry."""
+
+ def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
+ use_default_map=True):
+ """Create a header_factory that works with the Policy API.
+
+ base_class is the class that will be the last class in the created
+ header class's __bases__ list. default_class is the class that will be
+ used if "name" (see __call__) does not appear in the registry.
+ use_default_map controls whether or not the default mapping of names to
+ specialized classes is copied in to the registry when the factory is
+ created. The default is True.
+
+ """
+ self.registry = {}
+ self.base_class = base_class
+ self.default_class = default_class
+ if use_default_map:
+ self.registry.update(_default_header_map)
+
+ def map_to_type(self, name, cls):
+ """Register cls as the specialized class for handling "name" headers.
+
+ """
+ self.registry[name.lower()] = cls
+
+ def __getitem__(self, name):
+ cls = self.registry.get(name.lower(), self.default_class)
+ return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {})
+
+ def __call__(self, name, value):
+ """Create a header instance for header 'name' from 'value'.
+
+ Creates a header instance by creating a specialized class for parsing
+ and representing the specified header by combining the factory
+ base_class with a specialized class from the registry or the
+ default_class, and passing the name and value to the constructed
+ class's constructor.
+
+ """
+ return self[name](name, value)
diff --git a/contrib/python/future/future/backports/email/iterators.py b/contrib/python/future/future/backports/email/iterators.py
index afd4ed7754..82d320f814 100644
--- a/contrib/python/future/future/backports/email/iterators.py
+++ b/contrib/python/future/future/backports/email/iterators.py
@@ -1,74 +1,74 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Various types of useful iterators and generators."""
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = [
- 'body_line_iterator',
- 'typed_subpart_iterator',
- 'walk',
- # Do not include _structure() since it's part of the debugging API.
- ]
-
-import sys
-from io import StringIO
-
-
-# This function will become a method of the Message class
-def walk(self):
- """Walk over the message tree, yielding each subpart.
-
- The walk is performed in depth-first order. This method is a
- generator.
- """
- yield self
- if self.is_multipart():
- for subpart in self.get_payload():
- for subsubpart in subpart.walk():
- yield subsubpart
-
-
-# These two functions are imported into the Iterators.py interface module.
-def body_line_iterator(msg, decode=False):
- """Iterate over the parts, returning string payloads line-by-line.
-
- Optional decode (default False) is passed through to .get_payload().
- """
- for subpart in msg.walk():
- payload = subpart.get_payload(decode=decode)
- if isinstance(payload, str):
- for line in StringIO(payload):
- yield line
-
-
-def typed_subpart_iterator(msg, maintype='text', subtype=None):
- """Iterate over the subparts with a given MIME type.
-
- Use `maintype' as the main MIME type to match against; this defaults to
- "text". Optional `subtype' is the MIME subtype to match against; if
- omitted, only the main type is matched.
- """
- for subpart in msg.walk():
- if subpart.get_content_maintype() == maintype:
- if subtype is None or subpart.get_content_subtype() == subtype:
- yield subpart
-
-
-def _structure(msg, fp=None, level=0, include_default=False):
- """A handy debugging aid"""
- if fp is None:
- fp = sys.stdout
- tab = ' ' * (level * 4)
- print(tab + msg.get_content_type(), end='', file=fp)
- if include_default:
- print(' [%s]' % msg.get_default_type(), file=fp)
- else:
- print(file=fp)
- if msg.is_multipart():
- for subpart in msg.get_payload():
- _structure(subpart, fp, level+1, include_default)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Various types of useful iterators and generators."""
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = [
+ 'body_line_iterator',
+ 'typed_subpart_iterator',
+ 'walk',
+ # Do not include _structure() since it's part of the debugging API.
+ ]
+
+import sys
+from io import StringIO
+
+
+# This function will become a method of the Message class
+def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in depth-first order. This method is a
+ generator.
+ """
+ yield self
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+
+
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
+ for subpart in msg.walk():
+ payload = subpart.get_payload(decode=decode)
+ if isinstance(payload, str):
+ for line in StringIO(payload):
+ yield line
+
+
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+ """Iterate over the subparts with a given MIME type.
+
+ Use `maintype' as the main MIME type to match against; this defaults to
+ "text". Optional `subtype' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_content_maintype() == maintype:
+ if subtype is None or subpart.get_content_subtype() == subtype:
+ yield subpart
+
+
+def _structure(msg, fp=None, level=0, include_default=False):
+ """A handy debugging aid"""
+ if fp is None:
+ fp = sys.stdout
+ tab = ' ' * (level * 4)
+ print(tab + msg.get_content_type(), end='', file=fp)
+ if include_default:
+ print(' [%s]' % msg.get_default_type(), file=fp)
+ else:
+ print(file=fp)
+ if msg.is_multipart():
+ for subpart in msg.get_payload():
+ _structure(subpart, fp, level+1, include_default)
diff --git a/contrib/python/future/future/backports/email/message.py b/contrib/python/future/future/backports/email/message.py
index b54a96494c..d8d9615d7d 100644
--- a/contrib/python/future/future/backports/email/message.py
+++ b/contrib/python/future/future/backports/email/message.py
@@ -1,882 +1,882 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2001-2007 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Basic message object for the email package object model."""
-from __future__ import absolute_import, division, unicode_literals
-from future.builtins import list, range, str, zip
-
-__all__ = ['Message']
-
-import re
-import uu
-import base64
-import binascii
-from io import BytesIO, StringIO
-
-# Intrapackage imports
-from future.utils import as_native_str
-from future.backports.email import utils
-from future.backports.email import errors
-from future.backports.email._policybase import compat32
-from future.backports.email import charset as _charset
-from future.backports.email._encoded_words import decode_b
-Charset = _charset.Charset
-
-SEMISPACE = '; '
-
-# Regular expression that matches `special' characters in parameters, the
-# existence of which force quoting of the parameter value.
-tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
-
-
-def _splitparam(param):
- # Split header parameters. BAW: this may be too simple. It isn't
- # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
- # found in the wild. We may eventually need a full fledged parser.
- # RDM: we might have a Header here; for now just stringify it.
- a, sep, b = str(param).partition(';')
- if not sep:
- return a.strip(), None
- return a.strip(), b.strip()
-
-def _formatparam(param, value=None, quote=True):
- """Convenience function to format and return a key=value pair.
-
- This will quote the value if needed or if quote is true. If value is a
- three tuple (charset, language, value), it will be encoded according
- to RFC2231 rules. If it contains non-ascii characters it will likewise
- be encoded according to RFC2231 rules, using the utf-8 charset and
- a null language.
- """
- if value is not None and len(value) > 0:
- # A tuple is used for RFC 2231 encoded parameter values where items
- # are (charset, language, value). charset is a string, not a Charset
- # instance. RFC 2231 encoded values are never quoted, per RFC.
- if isinstance(value, tuple):
- # Encode as per RFC 2231
- param += '*'
- value = utils.encode_rfc2231(value[2], value[0], value[1])
- return '%s=%s' % (param, value)
- else:
- try:
- value.encode('ascii')
- except UnicodeEncodeError:
- param += '*'
- value = utils.encode_rfc2231(value, 'utf-8', '')
- return '%s=%s' % (param, value)
- # BAW: Please check this. I think that if quote is set it should
- # force quoting even if not necessary.
- if quote or tspecials.search(value):
- return '%s="%s"' % (param, utils.quote(value))
- else:
- return '%s=%s' % (param, value)
- else:
- return param
-
-def _parseparam(s):
- # RDM This might be a Header, so for now stringify it.
- s = ';' + str(s)
- plist = []
- while s[:1] == ';':
- s = s[1:]
- end = s.find(';')
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
- end = s.find(';', end + 1)
- if end < 0:
- end = len(s)
- f = s[:end]
- if '=' in f:
- i = f.index('=')
- f = f[:i].strip().lower() + '=' + f[i+1:].strip()
- plist.append(f.strip())
- s = s[end:]
- return plist
-
-
-def _unquotevalue(value):
- # This is different than utils.collapse_rfc2231_value() because it doesn't
- # try to convert the value to a unicode. Message.get_param() and
- # Message.get_params() are both currently defined to return the tuple in
- # the face of RFC 2231 parameters.
- if isinstance(value, tuple):
- return value[0], value[1], utils.unquote(value[2])
- else:
- return utils.unquote(value)
-
-
-class Message(object):
- """Basic message object.
-
- A message object is defined as something that has a bunch of RFC 2822
- headers and a payload. It may optionally have an envelope header
- (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
- multipart or a message/rfc822), then the payload is a list of Message
- objects, otherwise it is a string.
-
- Message objects implement part of the `mapping' interface, which assumes
- there is exactly one occurrence of the header per message. Some headers
- do in fact appear multiple times (e.g. Received) and for those headers,
- you must use the explicit API to set or get all the headers. Not all of
- the mapping methods are implemented.
- """
- def __init__(self, policy=compat32):
- self.policy = policy
- self._headers = list()
- self._unixfrom = None
- self._payload = None
- self._charset = None
- # Defaults for multipart messages
- self.preamble = self.epilogue = None
- self.defects = []
- # Default content type
- self._default_type = 'text/plain'
-
- @as_native_str(encoding='utf-8')
- def __str__(self):
- """Return the entire formatted message as a string.
- This includes the headers, body, and envelope header.
- """
- return self.as_string()
-
- def as_string(self, unixfrom=False, maxheaderlen=0):
- """Return the entire formatted message as a (unicode) string.
- Optional `unixfrom' when True, means include the Unix From_ envelope
- header.
-
- This is a convenience method and may not generate the message exactly
- as you intend. For more flexibility, use the flatten() method of a
- Generator instance.
- """
- from future.backports.email.generator import Generator
- fp = StringIO()
- g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
- g.flatten(self, unixfrom=unixfrom)
- return fp.getvalue()
-
- def is_multipart(self):
- """Return True if the message consists of multiple parts."""
- return isinstance(self._payload, list)
-
- #
- # Unix From_ line
- #
- def set_unixfrom(self, unixfrom):
- self._unixfrom = unixfrom
-
- def get_unixfrom(self):
- return self._unixfrom
-
- #
- # Payload manipulation.
- #
- def attach(self, payload):
- """Add the given payload to the current payload.
-
- The current payload will always be a list of objects after this method
- is called. If you want to set the payload to a scalar object, use
- set_payload() instead.
- """
- if self._payload is None:
- self._payload = [payload]
- else:
- self._payload.append(payload)
-
- def get_payload(self, i=None, decode=False):
- """Return a reference to the payload.
-
- The payload will either be a list object or a string. If you mutate
- the list object, you modify the message's payload in place. Optional
- i returns that index into the payload.
-
- Optional decode is a flag indicating whether the payload should be
- decoded or not, according to the Content-Transfer-Encoding header
- (default is False).
-
- When True and the message is not a multipart, the payload will be
- decoded if this header's value is `quoted-printable' or `base64'. If
- some other encoding is used, or the header is missing, or if the
- payload has bogus data (i.e. bogus base64 or uuencoded data), the
- payload is returned as-is.
-
- If the message is a multipart and the decode flag is True, then None
- is returned.
- """
- # Here is the logic table for this code, based on the email5.0.0 code:
- # i decode is_multipart result
- # ------ ------ ------------ ------------------------------
- # None True True None
- # i True True None
- # None False True _payload (a list)
- # i False True _payload element i (a Message)
- # i False False error (not a list)
- # i True False error (not a list)
- # None False False _payload
- # None True False _payload decoded (bytes)
- # Note that Barry planned to factor out the 'decode' case, but that
- # isn't so easy now that we handle the 8 bit data, which needs to be
- # converted in both the decode and non-decode path.
- if self.is_multipart():
- if decode:
- return None
- if i is None:
- return self._payload
- else:
- return self._payload[i]
- # For backward compatibility, Use isinstance and this error message
- # instead of the more logical is_multipart test.
- if i is not None and not isinstance(self._payload, list):
- raise TypeError('Expected list, got %s' % type(self._payload))
- payload = self._payload
- # cte might be a Header, so for now stringify it.
- cte = str(self.get('content-transfer-encoding', '')).lower()
- # payload may be bytes here.
- if isinstance(payload, str):
- payload = str(payload) # for Python-Future, so surrogateescape works
- if utils._has_surrogates(payload):
- bpayload = payload.encode('ascii', 'surrogateescape')
- if not decode:
- try:
- payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
- except LookupError:
- payload = bpayload.decode('ascii', 'replace')
- elif decode:
- try:
- bpayload = payload.encode('ascii')
- except UnicodeError:
- # This won't happen for RFC compliant messages (messages
- # containing only ASCII codepoints in the unicode input).
- # If it does happen, turn the string into bytes in a way
- # guaranteed not to fail.
- bpayload = payload.encode('raw-unicode-escape')
- if not decode:
- return payload
- if cte == 'quoted-printable':
- return utils._qdecode(bpayload)
- elif cte == 'base64':
- # XXX: this is a bit of a hack; decode_b should probably be factored
- # out somewhere, but I haven't figured out where yet.
- value, defects = decode_b(b''.join(bpayload.splitlines()))
- for defect in defects:
- self.policy.handle_defect(self, defect)
- return value
- elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
- in_file = BytesIO(bpayload)
- out_file = BytesIO()
- try:
- uu.decode(in_file, out_file, quiet=True)
- return out_file.getvalue()
- except uu.Error:
- # Some decoding problem
- return bpayload
- if isinstance(payload, str):
- return bpayload
- return payload
-
- def set_payload(self, payload, charset=None):
- """Set the payload to the given value.
-
- Optional charset sets the message's default character set. See
- set_charset() for details.
- """
- self._payload = payload
- if charset is not None:
- self.set_charset(charset)
-
- def set_charset(self, charset):
- """Set the charset of the payload to a given character set.
-
- charset can be a Charset instance, a string naming a character set, or
- None. If it is a string it will be converted to a Charset instance.
- If charset is None, the charset parameter will be removed from the
- Content-Type field. Anything else will generate a TypeError.
-
- The message will be assumed to be of type text/* encoded with
- charset.input_charset. It will be converted to charset.output_charset
- and encoded properly, if needed, when generating the plain text
- representation of the message. MIME headers (MIME-Version,
- Content-Type, Content-Transfer-Encoding) will be added as needed.
- """
- if charset is None:
- self.del_param('charset')
- self._charset = None
- return
- if not isinstance(charset, Charset):
- charset = Charset(charset)
- self._charset = charset
- if 'MIME-Version' not in self:
- self.add_header('MIME-Version', '1.0')
- if 'Content-Type' not in self:
- self.add_header('Content-Type', 'text/plain',
- charset=charset.get_output_charset())
- else:
- self.set_param('charset', charset.get_output_charset())
- if charset != charset.get_output_charset():
- self._payload = charset.body_encode(self._payload)
- if 'Content-Transfer-Encoding' not in self:
- cte = charset.get_body_encoding()
- try:
- cte(self)
- except TypeError:
- self._payload = charset.body_encode(self._payload)
- self.add_header('Content-Transfer-Encoding', cte)
-
- def get_charset(self):
- """Return the Charset instance associated with the message's payload.
- """
- return self._charset
-
- #
- # MAPPING INTERFACE (partial)
- #
- def __len__(self):
- """Return the total number of headers, including duplicates."""
- return len(self._headers)
-
- def __getitem__(self, name):
- """Get a header value.
-
- Return None if the header is missing instead of raising an exception.
-
- Note that if the header appeared multiple times, exactly which
- occurrence gets returned is undefined. Use get_all() to get all
- the values matching a header field name.
- """
- return self.get(name)
-
- def __setitem__(self, name, val):
- """Set the value of a header.
-
- Note: this does not overwrite an existing header with the same field
- name. Use __delitem__() first to delete any existing headers.
- """
- max_count = self.policy.header_max_count(name)
- if max_count:
- lname = name.lower()
- found = 0
- for k, v in self._headers:
- if k.lower() == lname:
- found += 1
- if found >= max_count:
- raise ValueError("There may be at most {} {} headers "
- "in a message".format(max_count, name))
- self._headers.append(self.policy.header_store_parse(name, val))
-
- def __delitem__(self, name):
- """Delete all occurrences of a header, if present.
-
- Does not raise an exception if the header is missing.
- """
- name = name.lower()
- newheaders = list()
- for k, v in self._headers:
- if k.lower() != name:
- newheaders.append((k, v))
- self._headers = newheaders
-
- def __contains__(self, name):
- return name.lower() in [k.lower() for k, v in self._headers]
-
- def __iter__(self):
- for field, value in self._headers:
- yield field
-
- def keys(self):
- """Return a list of all the message's header field names.
-
- These will be sorted in the order they appeared in the original
- message, or were added to the message, and may contain duplicates.
- Any fields deleted and re-inserted are always appended to the header
- list.
- """
- return [k for k, v in self._headers]
-
- def values(self):
- """Return a list of all the message's header values.
-
- These will be sorted in the order they appeared in the original
- message, or were added to the message, and may contain duplicates.
- Any fields deleted and re-inserted are always appended to the header
- list.
- """
- return [self.policy.header_fetch_parse(k, v)
- for k, v in self._headers]
-
- def items(self):
- """Get all the message's header fields and values.
-
- These will be sorted in the order they appeared in the original
- message, or were added to the message, and may contain duplicates.
- Any fields deleted and re-inserted are always appended to the header
- list.
- """
- return [(k, self.policy.header_fetch_parse(k, v))
- for k, v in self._headers]
-
- def get(self, name, failobj=None):
- """Get a header value.
-
- Like __getitem__() but return failobj instead of None when the field
- is missing.
- """
- name = name.lower()
- for k, v in self._headers:
- if k.lower() == name:
- return self.policy.header_fetch_parse(k, v)
- return failobj
-
- #
- # "Internal" methods (public API, but only intended for use by a parser
- # or generator, not normal application code.
- #
-
- def set_raw(self, name, value):
- """Store name and value in the model without modification.
-
- This is an "internal" API, intended only for use by a parser.
- """
- self._headers.append((name, value))
-
- def raw_items(self):
- """Return the (name, value) header pairs without modification.
-
- This is an "internal" API, intended only for use by a generator.
- """
- return iter(self._headers.copy())
-
- #
- # Additional useful stuff
- #
-
- def get_all(self, name, failobj=None):
- """Return a list of all the values for the named field.
-
- These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
-
- If no such fields exist, failobj is returned (defaults to None).
- """
- values = []
- name = name.lower()
- for k, v in self._headers:
- if k.lower() == name:
- values.append(self.policy.header_fetch_parse(k, v))
- if not values:
- return failobj
- return values
-
- def add_header(self, _name, _value, **_params):
- """Extended header setting.
-
- name is the header field to add. keyword arguments can be used to set
- additional parameters for the header field, with underscores converted
- to dashes. Normally the parameter will be added as key="value" unless
- value is None, in which case only the key will be added. If a
- parameter value contains non-ASCII characters it can be specified as a
- three-tuple of (charset, language, value), in which case it will be
- encoded according to RFC2231 rules. Otherwise it will be encoded using
- the utf-8 charset and a language of ''.
-
- Examples:
-
- msg.add_header('content-disposition', 'attachment', filename='bud.gif')
- msg.add_header('content-disposition', 'attachment',
- filename=('utf-8', '', 'Fußballer.ppt'))
- msg.add_header('content-disposition', 'attachment',
- filename='Fußballer.ppt'))
- """
- parts = []
- for k, v in _params.items():
- if v is None:
- parts.append(k.replace('_', '-'))
- else:
- parts.append(_formatparam(k.replace('_', '-'), v))
- if _value is not None:
- parts.insert(0, _value)
- self[_name] = SEMISPACE.join(parts)
-
- def replace_header(self, _name, _value):
- """Replace a header.
-
- Replace the first matching header found in the message, retaining
- header order and case. If no matching header was found, a KeyError is
- raised.
- """
- _name = _name.lower()
- for i, (k, v) in zip(range(len(self._headers)), self._headers):
- if k.lower() == _name:
- self._headers[i] = self.policy.header_store_parse(k, _value)
- break
- else:
- raise KeyError(_name)
-
- #
- # Use these three methods instead of the three above.
- #
-
- def get_content_type(self):
- """Return the message's content type.
-
- The returned string is coerced to lower case of the form
- `maintype/subtype'. If there was no Content-Type header in the
- message, the default type as given by get_default_type() will be
- returned. Since according to RFC 2045, messages always have a default
- type this will always return a value.
-
- RFC 2045 defines a message's default type to be text/plain unless it
- appears inside a multipart/digest container, in which case it would be
- message/rfc822.
- """
- missing = object()
- value = self.get('content-type', missing)
- if value is missing:
- # This should have no parameters
- return self.get_default_type()
- ctype = _splitparam(value)[0].lower()
- # RFC 2045, section 5.2 says if its invalid, use text/plain
- if ctype.count('/') != 1:
- return 'text/plain'
- return ctype
-
- def get_content_maintype(self):
- """Return the message's main content type.
-
- This is the `maintype' part of the string returned by
- get_content_type().
- """
- ctype = self.get_content_type()
- return ctype.split('/')[0]
-
- def get_content_subtype(self):
- """Returns the message's sub-content type.
-
- This is the `subtype' part of the string returned by
- get_content_type().
- """
- ctype = self.get_content_type()
- return ctype.split('/')[1]
-
- def get_default_type(self):
- """Return the `default' content type.
-
- Most messages have a default content type of text/plain, except for
- messages that are subparts of multipart/digest containers. Such
- subparts have a default content type of message/rfc822.
- """
- return self._default_type
-
- def set_default_type(self, ctype):
- """Set the `default' content type.
-
- ctype should be either "text/plain" or "message/rfc822", although this
- is not enforced. The default content type is not stored in the
- Content-Type header.
- """
- self._default_type = ctype
-
- def _get_params_preserve(self, failobj, header):
- # Like get_params() but preserves the quoting of values. BAW:
- # should this be part of the public interface?
- missing = object()
- value = self.get(header, missing)
- if value is missing:
- return failobj
- params = []
- for p in _parseparam(value):
- try:
- name, val = p.split('=', 1)
- name = name.strip()
- val = val.strip()
- except ValueError:
- # Must have been a bare attribute
- name = p.strip()
- val = ''
- params.append((name, val))
- params = utils.decode_params(params)
- return params
-
- def get_params(self, failobj=None, header='content-type', unquote=True):
- """Return the message's Content-Type parameters, as a list.
-
- The elements of the returned list are 2-tuples of key/value pairs, as
- split on the `=' sign. The left hand side of the `=' is the key,
- while the right hand side is the value. If there is no `=' sign in
- the parameter the value is the empty string. The value is as
- described in the get_param() method.
-
- Optional failobj is the object to return if there is no Content-Type
- header. Optional header is the header to search instead of
- Content-Type. If unquote is True, the value is unquoted.
- """
- missing = object()
- params = self._get_params_preserve(missing, header)
- if params is missing:
- return failobj
- if unquote:
- return [(k, _unquotevalue(v)) for k, v in params]
- else:
- return params
-
- def get_param(self, param, failobj=None, header='content-type',
- unquote=True):
- """Return the parameter value if found in the Content-Type header.
-
- Optional failobj is the object to return if there is no Content-Type
- header, or the Content-Type header has no such parameter. Optional
- header is the header to search instead of Content-Type.
-
- Parameter keys are always compared case insensitively. The return
- value can either be a string, or a 3-tuple if the parameter was RFC
- 2231 encoded. When it's a 3-tuple, the elements of the value are of
- the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
- LANGUAGE can be None, in which case you should consider VALUE to be
- encoded in the us-ascii charset. You can usually ignore LANGUAGE.
- The parameter value (either the returned string, or the VALUE item in
- the 3-tuple) is always unquoted, unless unquote is set to False.
-
- If your application doesn't care whether the parameter was RFC 2231
- encoded, it can turn the return value into a string as follows:
-
- param = msg.get_param('foo')
- param = email.utils.collapse_rfc2231_value(rawparam)
-
- """
- if header not in self:
- return failobj
- for k, v in self._get_params_preserve(failobj, header):
- if k.lower() == param.lower():
- if unquote:
- return _unquotevalue(v)
- else:
- return v
- return failobj
-
- def set_param(self, param, value, header='Content-Type', requote=True,
- charset=None, language=''):
- """Set a parameter in the Content-Type header.
-
- If the parameter already exists in the header, its value will be
- replaced with the new value.
-
- If header is Content-Type and has not yet been defined for this
- message, it will be set to "text/plain" and the new parameter and
- value will be appended as per RFC 2045.
-
- An alternate header can specified in the header argument, and all
- parameters will be quoted as necessary unless requote is False.
-
- If charset is specified, the parameter will be encoded according to RFC
- 2231. Optional language specifies the RFC 2231 language, defaulting
- to the empty string. Both charset and language should be strings.
- """
- if not isinstance(value, tuple) and charset:
- value = (charset, language, value)
-
- if header not in self and header.lower() == 'content-type':
- ctype = 'text/plain'
- else:
- ctype = self.get(header)
- if not self.get_param(param, header=header):
- if not ctype:
- ctype = _formatparam(param, value, requote)
- else:
- ctype = SEMISPACE.join(
- [ctype, _formatparam(param, value, requote)])
- else:
- ctype = ''
- for old_param, old_value in self.get_params(header=header,
- unquote=requote):
- append_param = ''
- if old_param.lower() == param.lower():
- append_param = _formatparam(param, value, requote)
- else:
- append_param = _formatparam(old_param, old_value, requote)
- if not ctype:
- ctype = append_param
- else:
- ctype = SEMISPACE.join([ctype, append_param])
- if ctype != self.get(header):
- del self[header]
- self[header] = ctype
-
- def del_param(self, param, header='content-type', requote=True):
- """Remove the given parameter completely from the Content-Type header.
-
- The header will be re-written in place without the parameter or its
- value. All values will be quoted as necessary unless requote is
- False. Optional header specifies an alternative to the Content-Type
- header.
- """
- if header not in self:
- return
- new_ctype = ''
- for p, v in self.get_params(header=header, unquote=requote):
- if p.lower() != param.lower():
- if not new_ctype:
- new_ctype = _formatparam(p, v, requote)
- else:
- new_ctype = SEMISPACE.join([new_ctype,
- _formatparam(p, v, requote)])
- if new_ctype != self.get(header):
- del self[header]
- self[header] = new_ctype
-
- def set_type(self, type, header='Content-Type', requote=True):
- """Set the main type and subtype for the Content-Type header.
-
- type must be a string in the form "maintype/subtype", otherwise a
- ValueError is raised.
-
- This method replaces the Content-Type header, keeping all the
- parameters in place. If requote is False, this leaves the existing
- header's quoting as is. Otherwise, the parameters will be quoted (the
- default).
-
- An alternative header can be specified in the header argument. When
- the Content-Type header is set, we'll always also add a MIME-Version
- header.
- """
- # BAW: should we be strict?
- if not type.count('/') == 1:
- raise ValueError
- # Set the Content-Type, you get a MIME-Version
- if header.lower() == 'content-type':
- del self['mime-version']
- self['MIME-Version'] = '1.0'
- if header not in self:
- self[header] = type
- return
- params = self.get_params(header=header, unquote=requote)
- del self[header]
- self[header] = type
- # Skip the first param; it's the old type.
- for p, v in params[1:]:
- self.set_param(p, v, header, requote)
-
- def get_filename(self, failobj=None):
- """Return the filename associated with the payload if present.
-
- The filename is extracted from the Content-Disposition header's
- `filename' parameter, and it is unquoted. If that header is missing
- the `filename' parameter, this method falls back to looking for the
- `name' parameter.
- """
- missing = object()
- filename = self.get_param('filename', missing, 'content-disposition')
- if filename is missing:
- filename = self.get_param('name', missing, 'content-type')
- if filename is missing:
- return failobj
- return utils.collapse_rfc2231_value(filename).strip()
-
- def get_boundary(self, failobj=None):
- """Return the boundary associated with the payload if present.
-
- The boundary is extracted from the Content-Type header's `boundary'
- parameter, and it is unquoted.
- """
- missing = object()
- boundary = self.get_param('boundary', missing)
- if boundary is missing:
- return failobj
- # RFC 2046 says that boundaries may begin but not end in w/s
- return utils.collapse_rfc2231_value(boundary).rstrip()
-
- def set_boundary(self, boundary):
- """Set the boundary parameter in Content-Type to 'boundary'.
-
- This is subtly different than deleting the Content-Type header and
- adding a new one with a new boundary parameter via add_header(). The
- main difference is that using the set_boundary() method preserves the
- order of the Content-Type header in the original message.
-
- HeaderParseError is raised if the message has no Content-Type header.
- """
- missing = object()
- params = self._get_params_preserve(missing, 'content-type')
- if params is missing:
- # There was no Content-Type header, and we don't know what type
- # to set it to, so raise an exception.
- raise errors.HeaderParseError('No Content-Type header found')
+# -*- coding: utf-8 -*-
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Basic message object for the email package object model."""
+from __future__ import absolute_import, division, unicode_literals
+from future.builtins import list, range, str, zip
+
+__all__ = ['Message']
+
+import re
+import uu
+import base64
+import binascii
+from io import BytesIO, StringIO
+
+# Intrapackage imports
+from future.utils import as_native_str
+from future.backports.email import utils
+from future.backports.email import errors
+from future.backports.email._policybase import compat32
+from future.backports.email import charset as _charset
+from future.backports.email._encoded_words import decode_b
+Charset = _charset.Charset
+
+SEMISPACE = '; '
+
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+def _splitparam(param):
+ # Split header parameters. BAW: this may be too simple. It isn't
+ # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+ # found in the wild. We may eventually need a full fledged parser.
+ # RDM: we might have a Header here; for now just stringify it.
+ a, sep, b = str(param).partition(';')
+ if not sep:
+ return a.strip(), None
+ return a.strip(), b.strip()
+
+def _formatparam(param, value=None, quote=True):
+ """Convenience function to format and return a key=value pair.
+
+ This will quote the value if needed or if quote is true. If value is a
+ three tuple (charset, language, value), it will be encoded according
+ to RFC2231 rules. If it contains non-ascii characters it will likewise
+ be encoded according to RFC2231 rules, using the utf-8 charset and
+ a null language.
+ """
+ if value is not None and len(value) > 0:
+ # A tuple is used for RFC 2231 encoded parameter values where items
+ # are (charset, language, value). charset is a string, not a Charset
+ # instance. RFC 2231 encoded values are never quoted, per RFC.
+ if isinstance(value, tuple):
+ # Encode as per RFC 2231
+ param += '*'
+ value = utils.encode_rfc2231(value[2], value[0], value[1])
+ return '%s=%s' % (param, value)
+ else:
+ try:
+ value.encode('ascii')
+ except UnicodeEncodeError:
+ param += '*'
+ value = utils.encode_rfc2231(value, 'utf-8', '')
+ return '%s=%s' % (param, value)
+ # BAW: Please check this. I think that if quote is set it should
+ # force quoting even if not necessary.
+ if quote or tspecials.search(value):
+ return '%s="%s"' % (param, utils.quote(value))
+ else:
+ return '%s=%s' % (param, value)
+ else:
+ return param
+
+def _parseparam(s):
+ # RDM This might be a Header, so for now stringify it.
+ s = ';' + str(s)
+ plist = []
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ if '=' in f:
+ i = f.index('=')
+ f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ plist.append(f.strip())
+ s = s[end:]
+ return plist
+
+
+def _unquotevalue(value):
+ # This is different than utils.collapse_rfc2231_value() because it doesn't
+ # try to convert the value to a unicode. Message.get_param() and
+ # Message.get_params() are both currently defined to return the tuple in
+ # the face of RFC 2231 parameters.
+ if isinstance(value, tuple):
+ return value[0], value[1], utils.unquote(value[2])
+ else:
+ return utils.unquote(value)
+
+
+class Message(object):
+ """Basic message object.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
+
+ Message objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrence of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+ """
+ def __init__(self, policy=compat32):
+ self.policy = policy
+ self._headers = list()
+ self._unixfrom = None
+ self._payload = None
+ self._charset = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+ self.defects = []
+ # Default content type
+ self._default_type = 'text/plain'
+
+ @as_native_str(encoding='utf-8')
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and envelope header.
+ """
+ return self.as_string()
+
+ def as_string(self, unixfrom=False, maxheaderlen=0):
+ """Return the entire formatted message as a (unicode) string.
+ Optional `unixfrom' when True, means include the Unix From_ envelope
+ header.
+
+ This is a convenience method and may not generate the message exactly
+ as you intend. For more flexibility, use the flatten() method of a
+ Generator instance.
+ """
+ from future.backports.email.generator import Generator
+ fp = StringIO()
+ g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return True if the message consists of multiple parts."""
+ return isinstance(self._payload, list)
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def attach(self, payload):
+ """Add the given payload to the current payload.
+
+ The current payload will always be a list of objects after this method
+ is called. If you want to set the payload to a scalar object, use
+ set_payload() instead.
+ """
+ if self._payload is None:
+ self._payload = [payload]
+ else:
+ self._payload.append(payload)
+
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
+
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
+ """
+ # Here is the logic table for this code, based on the email5.0.0 code:
+ # i decode is_multipart result
+ # ------ ------ ------------ ------------------------------
+ # None True True None
+ # i True True None
+ # None False True _payload (a list)
+ # i False True _payload element i (a Message)
+ # i False False error (not a list)
+ # i True False error (not a list)
+ # None False False _payload
+ # None True False _payload decoded (bytes)
+ # Note that Barry planned to factor out the 'decode' case, but that
+ # isn't so easy now that we handle the 8 bit data, which needs to be
+ # converted in both the decode and non-decode path.
+ if self.is_multipart():
+ if decode:
+ return None
+ if i is None:
+ return self._payload
+ else:
+ return self._payload[i]
+ # For backward compatibility, Use isinstance and this error message
+ # instead of the more logical is_multipart test.
+ if i is not None and not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ payload = self._payload
+ # cte might be a Header, so for now stringify it.
+ cte = str(self.get('content-transfer-encoding', '')).lower()
+ # payload may be bytes here.
+ if isinstance(payload, str):
+ payload = str(payload) # for Python-Future, so surrogateescape works
+ if utils._has_surrogates(payload):
+ bpayload = payload.encode('ascii', 'surrogateescape')
+ if not decode:
+ try:
+ payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+ except LookupError:
+ payload = bpayload.decode('ascii', 'replace')
+ elif decode:
+ try:
+ bpayload = payload.encode('ascii')
+ except UnicodeError:
+ # This won't happen for RFC compliant messages (messages
+ # containing only ASCII codepoints in the unicode input).
+ # If it does happen, turn the string into bytes in a way
+ # guaranteed not to fail.
+ bpayload = payload.encode('raw-unicode-escape')
+ if not decode:
+ return payload
+ if cte == 'quoted-printable':
+ return utils._qdecode(bpayload)
+ elif cte == 'base64':
+ # XXX: this is a bit of a hack; decode_b should probably be factored
+ # out somewhere, but I haven't figured out where yet.
+ value, defects = decode_b(b''.join(bpayload.splitlines()))
+ for defect in defects:
+ self.policy.handle_defect(self, defect)
+ return value
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ in_file = BytesIO(bpayload)
+ out_file = BytesIO()
+ try:
+ uu.decode(in_file, out_file, quiet=True)
+ return out_file.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return bpayload
+ if isinstance(payload, str):
+ return bpayload
+ return payload
+
+ def set_payload(self, payload, charset=None):
+ """Set the payload to the given value.
+
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
+ self._payload = payload
+ if charset is not None:
+ self.set_charset(charset)
+
+ def set_charset(self, charset):
+ """Set the charset of the payload to a given character set.
+
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
+
+ The message will be assumed to be of type text/* encoded with
+ charset.input_charset. It will be converted to charset.output_charset
+ and encoded properly, if needed, when generating the plain text
+ representation of the message. MIME headers (MIME-Version,
+ Content-Type, Content-Transfer-Encoding) will be added as needed.
+ """
+ if charset is None:
+ self.del_param('charset')
+ self._charset = None
+ return
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ if 'MIME-Version' not in self:
+ self.add_header('MIME-Version', '1.0')
+ if 'Content-Type' not in self:
+ self.add_header('Content-Type', 'text/plain',
+ charset=charset.get_output_charset())
+ else:
+ self.set_param('charset', charset.get_output_charset())
+ if charset != charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
+ if 'Content-Transfer-Encoding' not in self:
+ cte = charset.get_body_encoding()
+ try:
+ cte(self)
+ except TypeError:
+ self._payload = charset.body_encode(self._payload)
+ self.add_header('Content-Transfer-Encoding', cte)
+
+ def get_charset(self):
+ """Return the Charset instance associated with the message's payload.
+ """
+ return self._charset
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Return the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrence gets returned is undefined. Use get_all() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ max_count = self.policy.header_max_count(name)
+ if max_count:
+ lname = name.lower()
+ found = 0
+ for k, v in self._headers:
+ if k.lower() == lname:
+ found += 1
+ if found >= max_count:
+ raise ValueError("There may be at most {} {} headers "
+ "in a message".format(max_count, name))
+ self._headers.append(self.policy.header_store_parse(name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = list()
+ for k, v in self._headers:
+ if k.lower() != name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
+
+ def __iter__(self):
+ for field, value in self._headers:
+ yield field
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [self.policy.header_fetch_parse(k, v)
+ for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [(k, self.policy.header_fetch_parse(k, v))
+ for k, v in self._headers]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return self.policy.header_fetch_parse(k, v)
+ return failobj
+
+ #
+ # "Internal" methods (public API, but only intended for use by a parser
+ # or generator, not normal application code.
+ #
+
+ def set_raw(self, name, value):
+ """Store name and value in the model without modification.
+
+ This is an "internal" API, intended only for use by a parser.
+ """
+ self._headers.append((name, value))
+
+ def raw_items(self):
+ """Return the (name, value) header pairs without modification.
+
+ This is an "internal" API, intended only for use by a generator.
+ """
+ return iter(self._headers.copy())
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+
+ If no such fields exist, failobj is returned (defaults to None).
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(self.policy.header_fetch_parse(k, v))
+ if not values:
+ return failobj
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added. If a
+ parameter value contains non-ASCII characters it can be specified as a
+ three-tuple of (charset, language, value), in which case it will be
+ encoded according to RFC2231 rules. Otherwise it will be encoded using
+ the utf-8 charset and a language of ''.
+
+ Examples:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ msg.add_header('content-disposition', 'attachment',
+ filename=('utf-8', '', 'Fußballer.ppt'))
+ msg.add_header('content-disposition', 'attachment',
+ filename='Fußballer.ppt'))
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append(_formatparam(k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self[_name] = SEMISPACE.join(parts)
+
+ def replace_header(self, _name, _value):
+ """Replace a header.
+
+ Replace the first matching header found in the message, retaining
+ header order and case. If no matching header was found, a KeyError is
+ raised.
+ """
+ _name = _name.lower()
+ for i, (k, v) in zip(range(len(self._headers)), self._headers):
+ if k.lower() == _name:
+ self._headers[i] = self.policy.header_store_parse(k, _value)
+ break
+ else:
+ raise KeyError(_name)
+
+ #
+ # Use these three methods instead of the three above.
+ #
+
+ def get_content_type(self):
+ """Return the message's content type.
+
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
+ """
+ missing = object()
+ value = self.get('content-type', missing)
+ if value is missing:
+ # This should have no parameters
+ return self.get_default_type()
+ ctype = _splitparam(value)[0].lower()
+ # RFC 2045, section 5.2 says if its invalid, use text/plain
+ if ctype.count('/') != 1:
+ return 'text/plain'
+ return ctype
+
+ def get_content_maintype(self):
+ """Return the message's main content type.
+
+ This is the `maintype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[0]
+
+ def get_content_subtype(self):
+ """Returns the message's sub-content type.
+
+ This is the `subtype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[1]
+
+ def get_default_type(self):
+ """Return the `default' content type.
+
+ Most messages have a default content type of text/plain, except for
+ messages that are subparts of multipart/digest containers. Such
+ subparts have a default content type of message/rfc822.
+ """
+ return self._default_type
+
+ def set_default_type(self, ctype):
+ """Set the `default' content type.
+
+ ctype should be either "text/plain" or "message/rfc822", although this
+ is not enforced. The default content type is not stored in the
+ Content-Type header.
+ """
+ self._default_type = ctype
+
+ def _get_params_preserve(self, failobj, header):
+ # Like get_params() but preserves the quoting of values. BAW:
+ # should this be part of the public interface?
+ missing = object()
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ params = []
+ for p in _parseparam(value):
+ try:
+ name, val = p.split('=', 1)
+ name = name.strip()
+ val = val.strip()
+ except ValueError:
+ # Must have been a bare attribute
+ name = p.strip()
+ val = ''
+ params.append((name, val))
+ params = utils.decode_params(params)
+ return params
+
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
+
+ The elements of the returned list are 2-tuples of key/value pairs, as
+ split on the `=' sign. The left hand side of the `=' is the key,
+ while the right hand side is the value. If there is no `=' sign in
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header. Optional header is the header to search instead of
+ Content-Type. If unquote is True, the value is unquoted.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, header)
+ if params is missing:
+ return failobj
+ if unquote:
+ return [(k, _unquotevalue(v)) for k, v in params]
+ else:
+ return params
+
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+ The parameter value (either the returned string, or the VALUE item in
+ the 3-tuple) is always unquoted, unless unquote is set to False.
+
+ If your application doesn't care whether the parameter was RFC 2231
+ encoded, it can turn the return value into a string as follows:
+
+ param = msg.get_param('foo')
+ param = email.utils.collapse_rfc2231_value(rawparam)
+
+ """
+ if header not in self:
+ return failobj
+ for k, v in self._get_params_preserve(failobj, header):
+ if k.lower() == param.lower():
+ if unquote:
+ return _unquotevalue(v)
+ else:
+ return v
+ return failobj
+
+ def set_param(self, param, value, header='Content-Type', requote=True,
+ charset=None, language=''):
+ """Set a parameter in the Content-Type header.
+
+ If the parameter already exists in the header, its value will be
+ replaced with the new value.
+
+ If header is Content-Type and has not yet been defined for this
+ message, it will be set to "text/plain" and the new parameter and
+ value will be appended as per RFC 2045.
+
+ An alternate header can specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
+
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
+ """
+ if not isinstance(value, tuple) and charset:
+ value = (charset, language, value)
+
+ if header not in self and header.lower() == 'content-type':
+ ctype = 'text/plain'
+ else:
+ ctype = self.get(header)
+ if not self.get_param(param, header=header):
+ if not ctype:
+ ctype = _formatparam(param, value, requote)
+ else:
+ ctype = SEMISPACE.join(
+ [ctype, _formatparam(param, value, requote)])
+ else:
+ ctype = ''
+ for old_param, old_value in self.get_params(header=header,
+ unquote=requote):
+ append_param = ''
+ if old_param.lower() == param.lower():
+ append_param = _formatparam(param, value, requote)
+ else:
+ append_param = _formatparam(old_param, old_value, requote)
+ if not ctype:
+ ctype = append_param
+ else:
+ ctype = SEMISPACE.join([ctype, append_param])
+ if ctype != self.get(header):
+ del self[header]
+ self[header] = ctype
+
+ def del_param(self, param, header='content-type', requote=True):
+ """Remove the given parameter completely from the Content-Type header.
+
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
+ """
+ if header not in self:
+ return
+ new_ctype = ''
+ for p, v in self.get_params(header=header, unquote=requote):
+ if p.lower() != param.lower():
+ if not new_ctype:
+ new_ctype = _formatparam(p, v, requote)
+ else:
+ new_ctype = SEMISPACE.join([new_ctype,
+ _formatparam(p, v, requote)])
+ if new_ctype != self.get(header):
+ del self[header]
+ self[header] = new_ctype
+
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
+
+ type must be a string in the form "maintype/subtype", otherwise a
+ ValueError is raised.
+
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
+ header's quoting as is. Otherwise, the parameters will be quoted (the
+ default).
+
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
+ header.
+ """
+ # BAW: should we be strict?
+ if not type.count('/') == 1:
+ raise ValueError
+ # Set the Content-Type, you get a MIME-Version
+ if header.lower() == 'content-type':
+ del self['mime-version']
+ self['MIME-Version'] = '1.0'
+ if header not in self:
+ self[header] = type
+ return
+ params = self.get_params(header=header, unquote=requote)
+ del self[header]
+ self[header] = type
+ # Skip the first param; it's the old type.
+ for p, v in params[1:]:
+ self.set_param(p, v, header, requote)
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition header's
+ `filename' parameter, and it is unquoted. If that header is missing
+ the `filename' parameter, this method falls back to looking for the
+ `name' parameter.
+ """
+ missing = object()
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ filename = self.get_param('name', missing, 'content-type')
+ if filename is missing:
+ return failobj
+ return utils.collapse_rfc2231_value(filename).strip()
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = object()
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ # RFC 2046 says that boundaries may begin but not end in w/s
+ return utils.collapse_rfc2231_value(boundary).rstrip()
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type to 'boundary'.
+
+ This is subtly different than deleting the Content-Type header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type header.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, 'content-type')
+ if params is missing:
+ # There was no Content-Type header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise errors.HeaderParseError('No Content-Type header found')
newparams = list()
- foundp = False
- for pk, pv in params:
- if pk.lower() == 'boundary':
- newparams.append(('boundary', '"%s"' % boundary))
- foundp = True
- else:
- newparams.append((pk, pv))
- if not foundp:
- # The original Content-Type header had no boundary attribute.
- # Tack one on the end. BAW: should we raise an exception
- # instead???
- newparams.append(('boundary', '"%s"' % boundary))
- # Replace the existing Content-Type header with the new value
+ foundp = False
+ for pk, pv in params:
+ if pk.lower() == 'boundary':
+ newparams.append(('boundary', '"%s"' % boundary))
+ foundp = True
+ else:
+ newparams.append((pk, pv))
+ if not foundp:
+ # The original Content-Type header had no boundary attribute.
+ # Tack one on the end. BAW: should we raise an exception
+ # instead???
+ newparams.append(('boundary', '"%s"' % boundary))
+ # Replace the existing Content-Type header with the new value
newheaders = list()
- for h, v in self._headers:
- if h.lower() == 'content-type':
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
parts = list()
- for k, v in newparams:
- if v == '':
- parts.append(k)
- else:
- parts.append('%s=%s' % (k, v))
- val = SEMISPACE.join(parts)
- newheaders.append(self.policy.header_store_parse(h, val))
-
- else:
- newheaders.append((h, v))
- self._headers = newheaders
-
- def get_content_charset(self, failobj=None):
- """Return the charset parameter of the Content-Type header.
-
- The returned string is always coerced to lower case. If there is no
- Content-Type header, or if that header has no charset parameter,
- failobj is returned.
- """
- missing = object()
- charset = self.get_param('charset', missing)
- if charset is missing:
- return failobj
- if isinstance(charset, tuple):
- # RFC 2231 encoded, so decode it, and it better end up as ascii.
- pcharset = charset[0] or 'us-ascii'
- try:
- # LookupError will be raised if the charset isn't known to
- # Python. UnicodeError will be raised if the encoded text
- # contains a character not in the charset.
- as_bytes = charset[2].encode('raw-unicode-escape')
- charset = str(as_bytes, pcharset)
- except (LookupError, UnicodeError):
- charset = charset[2]
- # charset characters must be in us-ascii range
- try:
- charset.encode('us-ascii')
- except UnicodeError:
- return failobj
- # RFC 2046, $4.1.2 says charsets are not case sensitive
- return charset.lower()
-
- def get_charsets(self, failobj=None):
- """Return a list containing the charset(s) used in this message.
-
- The returned list of items describes the Content-Type headers'
- charset parameter for this message and all the subparts in its
- payload.
-
- Each item will either be a string (the value of the charset parameter
- in the Content-Type header of that part) or the value of the
- 'failobj' parameter (defaults to None), if the part does not have a
- main MIME type of "text", or the charset is not defined.
-
- The list will contain one string for each part of the message, plus
- one for the container message (i.e. self), so that a non-multipart
- message will still return a list of length 1.
- """
- return [part.get_content_charset(failobj) for part in self.walk()]
-
- # I.e. def walk(self): ...
- from future.backports.email.iterators import walk
+ for k, v in newparams:
+ if v == '':
+ parts.append(k)
+ else:
+ parts.append('%s=%s' % (k, v))
+ val = SEMISPACE.join(parts)
+ newheaders.append(self.policy.header_store_parse(h, val))
+
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
+ """
+ missing = object()
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, tuple):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ pcharset = charset[0] or 'us-ascii'
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ as_bytes = charset[2].encode('raw-unicode-escape')
+ charset = str(as_bytes, pcharset)
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset characters must be in us-ascii range
+ try:
+ charset.encode('us-ascii')
+ except UnicodeError:
+ return failobj
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_content_charset(failobj) for part in self.walk()]
+
+ # I.e. def walk(self): ...
+ from future.backports.email.iterators import walk
diff --git a/contrib/python/future/future/backports/email/mime/application.py b/contrib/python/future/future/backports/email/mime/application.py
index 42ee4a5c64..5cbfb174af 100644
--- a/contrib/python/future/future/backports/email/mime/application.py
+++ b/contrib/python/future/future/backports/email/mime/application.py
@@ -1,39 +1,39 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Keith Dart
-# Contact: email-sig@python.org
-
-"""Class representing application/* type MIME documents."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-from future.backports.email import encoders
-from future.backports.email.mime.nonmultipart import MIMENonMultipart
-
-__all__ = ["MIMEApplication"]
-
-
-class MIMEApplication(MIMENonMultipart):
- """Class for generating application/* MIME documents."""
-
- def __init__(self, _data, _subtype='octet-stream',
- _encoder=encoders.encode_base64, **_params):
- """Create an application/* type MIME document.
-
- _data is a string containing the raw application data.
-
- _subtype is the MIME content type subtype, defaulting to
- 'octet-stream'.
-
- _encoder is a function which will perform the actual encoding for
- transport of the application data, defaulting to base64 encoding.
-
- Any additional keyword arguments are passed to the base class
- constructor, which turns them into parameters on the Content-Type
- header.
- """
- if _subtype is None:
- raise TypeError('Invalid application MIME subtype')
- MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
- self.set_payload(_data)
- _encoder(self)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Keith Dart
+# Contact: email-sig@python.org
+
+"""Class representing application/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+__all__ = ["MIMEApplication"]
+
+
+class MIMEApplication(MIMENonMultipart):
+ """Class for generating application/* MIME documents."""
+
+ def __init__(self, _data, _subtype='octet-stream',
+ _encoder=encoders.encode_base64, **_params):
+ """Create an application/* type MIME document.
+
+ _data is a string containing the raw application data.
+
+ _subtype is the MIME content type subtype, defaulting to
+ 'octet-stream'.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the application data, defaulting to base64 encoding.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ raise TypeError('Invalid application MIME subtype')
+ MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
+ self.set_payload(_data)
+ _encoder(self)
diff --git a/contrib/python/future/future/backports/email/mime/audio.py b/contrib/python/future/future/backports/email/mime/audio.py
index b35d7b8631..4989c11420 100644
--- a/contrib/python/future/future/backports/email/mime/audio.py
+++ b/contrib/python/future/future/backports/email/mime/audio.py
@@ -1,74 +1,74 @@
-# Copyright (C) 2001-2007 Python Software Foundation
-# Author: Anthony Baxter
-# Contact: email-sig@python.org
-
-"""Class representing audio/* type MIME documents."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMEAudio']
-
-import sndhdr
-
-from io import BytesIO
-from future.backports.email import encoders
-from future.backports.email.mime.nonmultipart import MIMENonMultipart
-
-
-_sndhdr_MIMEmap = {'au' : 'basic',
- 'wav' :'x-wav',
- 'aiff':'x-aiff',
- 'aifc':'x-aiff',
- }
-
-# There are others in sndhdr that don't have MIME types. :(
-# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
-def _whatsnd(data):
- """Try to identify a sound file type.
-
- sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
- we re-do it here. It would be easier to reverse engineer the Unix 'file'
- command and use the standard 'magic' file, as shipped with a modern Unix.
- """
- hdr = data[:512]
- fakefile = BytesIO(hdr)
- for testfn in sndhdr.tests:
- res = testfn(hdr, fakefile)
- if res is not None:
- return _sndhdr_MIMEmap.get(res[0])
- return None
-
-
-class MIMEAudio(MIMENonMultipart):
- """Class for generating audio/* MIME documents."""
-
- def __init__(self, _audiodata, _subtype=None,
- _encoder=encoders.encode_base64, **_params):
- """Create an audio/* type MIME document.
-
- _audiodata is a string containing the raw audio data. If this data
- can be decoded by the standard Python `sndhdr' module, then the
- subtype will be automatically included in the Content-Type header.
- Otherwise, you can specify the specific audio subtype via the
- _subtype parameter. If _subtype is not given, and no subtype can be
- guessed, a TypeError is raised.
-
- _encoder is a function which will perform the actual encoding for
- transport of the image data. It takes one argument, which is this
- Image instance. It should use get_payload() and set_payload() to
- change the payload to the encoded form. It should also add any
- Content-Transfer-Encoding or other headers to the message as
- necessary. The default encoding is Base64.
-
- Any additional keyword arguments are passed to the base class
- constructor, which turns them into parameters on the Content-Type
- header.
- """
- if _subtype is None:
- _subtype = _whatsnd(_audiodata)
- if _subtype is None:
- raise TypeError('Could not find audio MIME subtype')
- MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
- self.set_payload(_audiodata)
- _encoder(self)
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from io import BytesIO
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = BytesIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+
+
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an audio/* type MIME document.
+
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/contrib/python/future/future/backports/email/mime/base.py b/contrib/python/future/future/backports/email/mime/base.py
index b9ede1cf5e..e77f3ca4ae 100644
--- a/contrib/python/future/future/backports/email/mime/base.py
+++ b/contrib/python/future/future/backports/email/mime/base.py
@@ -1,25 +1,25 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Base class for MIME specializations."""
-from __future__ import absolute_import, division, unicode_literals
-from future.backports.email import message
-
-__all__ = ['MIMEBase']
-
-
-class MIMEBase(message.Message):
- """Base class for MIME specializations."""
-
- def __init__(self, _maintype, _subtype, **_params):
- """This constructor adds a Content-Type: and a MIME-Version: header.
-
- The Content-Type: header is taken from the _maintype and _subtype
- arguments. Additional parameters for this header are taken from the
- keyword arguments.
- """
- message.Message.__init__(self)
- ctype = '%s/%s' % (_maintype, _subtype)
- self.add_header('Content-Type', ctype, **_params)
- self['MIME-Version'] = '1.0'
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+from __future__ import absolute_import, division, unicode_literals
+from future.backports.email import message
+
+__all__ = ['MIMEBase']
+
+
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _maintype, _subtype, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ message.Message.__init__(self)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/contrib/python/future/future/backports/email/mime/image.py b/contrib/python/future/future/backports/email/mime/image.py
index 0998ddb81a..a03602464a 100644
--- a/contrib/python/future/future/backports/email/mime/image.py
+++ b/contrib/python/future/future/backports/email/mime/image.py
@@ -1,48 +1,48 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Class representing image/* type MIME documents."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMEImage']
-
-import imghdr
-
-from future.backports.email import encoders
-from future.backports.email.mime.nonmultipart import MIMENonMultipart
-
-
-class MIMEImage(MIMENonMultipart):
- """Class for generating image/* type MIME documents."""
-
- def __init__(self, _imagedata, _subtype=None,
- _encoder=encoders.encode_base64, **_params):
- """Create an image/* type MIME document.
-
- _imagedata is a string containing the raw image data. If this data
- can be decoded by the standard Python `imghdr' module, then the
- subtype will be automatically included in the Content-Type header.
- Otherwise, you can specify the specific image subtype via the _subtype
- parameter.
-
- _encoder is a function which will perform the actual encoding for
- transport of the image data. It takes one argument, which is this
- Image instance. It should use get_payload() and set_payload() to
- change the payload to the encoded form. It should also add any
- Content-Transfer-Encoding or other headers to the message as
- necessary. The default encoding is Base64.
-
- Any additional keyword arguments are passed to the base class
- constructor, which turns them into parameters on the Content-Type
- header.
- """
- if _subtype is None:
- _subtype = imghdr.what(None, _imagedata)
- if _subtype is None:
- raise TypeError('Could not guess image MIME subtype')
- MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
- self.set_payload(_imagedata)
- _encoder(self)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+from future.backports.email import encoders
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/contrib/python/future/future/backports/email/mime/message.py b/contrib/python/future/future/backports/email/mime/message.py
index d38883505b..7f92075150 100644
--- a/contrib/python/future/future/backports/email/mime/message.py
+++ b/contrib/python/future/future/backports/email/mime/message.py
@@ -1,36 +1,36 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Class representing message/* MIME documents."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMEMessage']
-
-from future.backports.email import message
-from future.backports.email.mime.nonmultipart import MIMENonMultipart
-
-
-class MIMEMessage(MIMENonMultipart):
- """Class representing message/* MIME documents."""
-
- def __init__(self, _msg, _subtype='rfc822'):
- """Create a message/* type MIME document.
-
- _msg is a message object and must be an instance of Message, or a
- derived class of Message, otherwise a TypeError is raised.
-
- Optional _subtype defines the subtype of the contained message. The
- default is "rfc822" (this is defined by the MIME standard, even though
- the term "rfc822" is technically outdated by RFC 2822).
- """
- MIMENonMultipart.__init__(self, 'message', _subtype)
- if not isinstance(_msg, message.Message):
- raise TypeError('Argument is not an instance of Message')
- # It's convenient to use this base class method. We need to do it
- # this way or we'll get an exception
- message.Message.attach(self, _msg)
- # And be sure our default type is set correctly
- self.set_default_type('message/rfc822')
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEMessage']
+
+from future.backports.email import message
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+
+ def __init__(self, _msg, _subtype='rfc822'):
+ """Create a message/* type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/contrib/python/future/future/backports/email/mime/multipart.py b/contrib/python/future/future/backports/email/mime/multipart.py
index e24858354a..6d7ed3dcb9 100644
--- a/contrib/python/future/future/backports/email/mime/multipart.py
+++ b/contrib/python/future/future/backports/email/mime/multipart.py
@@ -1,49 +1,49 @@
-# Copyright (C) 2002-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Base class for MIME multipart/* type messages."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMEMultipart']
-
-from future.backports.email.mime.base import MIMEBase
-
-
-class MIMEMultipart(MIMEBase):
- """Base class for MIME multipart/* type messages."""
-
- def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
- **_params):
- """Creates a multipart/* type message.
-
- By default, creates a multipart/mixed message, with proper
- Content-Type and MIME-Version headers.
-
- _subtype is the subtype of the multipart content type, defaulting to
- `mixed'.
-
- boundary is the multipart boundary string. By default it is
- calculated as needed.
-
- _subparts is a sequence of initial subparts for the payload. It
- must be an iterable object, such as a list. You can always
- attach new subparts to the message by using the attach() method.
-
- Additional parameters for the Content-Type header are taken from the
- keyword arguments (or passed into the _params argument).
- """
- MIMEBase.__init__(self, 'multipart', _subtype, **_params)
-
- # Initialise _payload to an empty list as the Message superclass's
- # implementation of is_multipart assumes that _payload is a list for
- # multipart messages.
- self._payload = []
-
- if _subparts:
- for p in _subparts:
- self.attach(p)
- if boundary:
- self.set_boundary(boundary)
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME multipart/* type messages."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEMultipart']
+
+from future.backports.email.mime.base import MIMEBase
+
+
+class MIMEMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
+ **_params):
+ """Creates a multipart/* type message.
+
+ By default, creates a multipart/mixed message, with proper
+ Content-Type and MIME-Version headers.
+
+ _subtype is the subtype of the multipart content type, defaulting to
+ `mixed'.
+
+ boundary is the multipart boundary string. By default it is
+ calculated as needed.
+
+ _subparts is a sequence of initial subparts for the payload. It
+ must be an iterable object, such as a list. You can always
+ attach new subparts to the message by using the attach() method.
+
+ Additional parameters for the Content-Type header are taken from the
+ keyword arguments (or passed into the _params argument).
+ """
+ MIMEBase.__init__(self, 'multipart', _subtype, **_params)
+
+ # Initialise _payload to an empty list as the Message superclass's
+ # implementation of is_multipart assumes that _payload is a list for
+ # multipart messages.
+ self._payload = []
+
+ if _subparts:
+ for p in _subparts:
+ self.attach(p)
+ if boundary:
+ self.set_boundary(boundary)
diff --git a/contrib/python/future/future/backports/email/mime/nonmultipart.py b/contrib/python/future/future/backports/email/mime/nonmultipart.py
index 6dad586784..08c37c36d1 100644
--- a/contrib/python/future/future/backports/email/mime/nonmultipart.py
+++ b/contrib/python/future/future/backports/email/mime/nonmultipart.py
@@ -1,24 +1,24 @@
-# Copyright (C) 2002-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Base class for MIME type messages that are not multipart."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMENonMultipart']
-
-from future.backports.email import errors
-from future.backports.email.mime.base import MIMEBase
-
-
-class MIMENonMultipart(MIMEBase):
- """Base class for MIME multipart/* type messages."""
-
- def attach(self, payload):
- # The public API prohibits attaching multiple subparts to MIMEBase
- # derived subtypes since none of them are, by definition, of content
- # type multipart/*
- raise errors.MultipartConversionError(
- 'Cannot attach additional subparts to non-multipart/*')
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMENonMultipart']
+
+from future.backports.email import errors
+from future.backports.email.mime.base import MIMEBase
+
+
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/contrib/python/future/future/backports/email/mime/text.py b/contrib/python/future/future/backports/email/mime/text.py
index 0896d97516..6269f4a68a 100644
--- a/contrib/python/future/future/backports/email/mime/text.py
+++ b/contrib/python/future/future/backports/email/mime/text.py
@@ -1,44 +1,44 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Class representing text/* type MIME documents."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['MIMEText']
-
-from future.backports.email.encoders import encode_7or8bit
-from future.backports.email.mime.nonmultipart import MIMENonMultipart
-
-
-class MIMEText(MIMENonMultipart):
- """Class for generating text/* type MIME documents."""
-
- def __init__(self, _text, _subtype='plain', _charset=None):
- """Create a text/* type MIME document.
-
- _text is the string for this message object.
-
- _subtype is the MIME sub content type, defaulting to "plain".
-
- _charset is the character set parameter added to the Content-Type
- header. This defaults to "us-ascii". Note that as a side-effect, the
- Content-Transfer-Encoding header will also be set.
- """
-
- # If no _charset was specified, check to see if there are non-ascii
- # characters present. If not, use 'us-ascii', otherwise use utf-8.
- # XXX: This can be removed once #7304 is fixed.
- if _charset is None:
- try:
- _text.encode('us-ascii')
- _charset = 'us-ascii'
- except UnicodeEncodeError:
- _charset = 'utf-8'
-
- MIMENonMultipart.__init__(self, 'text', _subtype,
- **{'charset': _charset})
-
- self.set_payload(_text, _charset)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['MIMEText']
+
+from future.backports.email.encoders import encode_7or8bit
+from future.backports.email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _subtype='plain', _charset=None):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object.
+
+ _subtype is the MIME sub content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+
+ # If no _charset was specified, check to see if there are non-ascii
+ # characters present. If not, use 'us-ascii', otherwise use utf-8.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset is None:
+ try:
+ _text.encode('us-ascii')
+ _charset = 'us-ascii'
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
+ MIMENonMultipart.__init__(self, 'text', _subtype,
+ **{'charset': _charset})
+
+ self.set_payload(_text, _charset)
diff --git a/contrib/python/future/future/backports/email/parser.py b/contrib/python/future/future/backports/email/parser.py
index 7bff57028f..df1c6e2868 100644
--- a/contrib/python/future/future/backports/email/parser.py
+++ b/contrib/python/future/future/backports/email/parser.py
@@ -1,135 +1,135 @@
-# Copyright (C) 2001-2007 Python Software Foundation
-# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
-# Contact: email-sig@python.org
-
-"""A parser of RFC 2822 and MIME email messages."""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
-
-import warnings
-from io import StringIO, TextIOWrapper
-
-from future.backports.email.feedparser import FeedParser, BytesFeedParser
-from future.backports.email.message import Message
-from future.backports.email._policybase import compat32
-
-
-class Parser(object):
- def __init__(self, _class=Message, **_3to2kwargs):
- """Parser of RFC 2822 and MIME email messages.
-
- Creates an in-memory object tree representing the email message, which
- can then be manipulated and turned over to a Generator to return the
- textual representation of the message.
-
- The string must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceeded by a `Unix-from' header. The
- header block is terminated either by the end of the string or by a
- blank line.
-
- _class is the class to instantiate for new message objects when they
- must be created. This class must have a constructor that can take
- zero arguments. Default is Message.Message.
-
- The policy keyword specifies a policy object that controls a number of
- aspects of the parser's operation. The default policy maintains
- backward compatibility.
-
- """
- if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
- else: policy = compat32
- self._class = _class
- self.policy = policy
-
- def parse(self, fp, headersonly=False):
- """Create a message structure from the data in a file.
-
- Reads all the data from the file and returns the root of the message
- structure. Optional headersonly is a flag specifying whether to stop
- parsing after reading the headers or not. The default is False,
- meaning it parses the entire contents of the file.
- """
- feedparser = FeedParser(self._class, policy=self.policy)
- if headersonly:
- feedparser._set_headersonly()
- while True:
- data = fp.read(8192)
- if not data:
- break
- feedparser.feed(data)
- return feedparser.close()
-
- def parsestr(self, text, headersonly=False):
- """Create a message structure from a string.
-
- Returns the root of the message structure. Optional headersonly is a
- flag specifying whether to stop parsing after reading the headers or
- not. The default is False, meaning it parses the entire contents of
- the file.
- """
- return self.parse(StringIO(text), headersonly=headersonly)
-
-
-
-class HeaderParser(Parser):
- def parse(self, fp, headersonly=True):
- return Parser.parse(self, fp, True)
-
- def parsestr(self, text, headersonly=True):
- return Parser.parsestr(self, text, True)
-
-
-class BytesParser(object):
-
- def __init__(self, *args, **kw):
- """Parser of binary RFC 2822 and MIME email messages.
-
- Creates an in-memory object tree representing the email message, which
- can then be manipulated and turned over to a Generator to return the
- textual representation of the message.
-
- The input must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceeded by a `Unix-from' header. The
- header block is terminated either by the end of the input or by a
- blank line.
-
- _class is the class to instantiate for new message objects when they
- must be created. This class must have a constructor that can take
- zero arguments. Default is Message.Message.
- """
- self.parser = Parser(*args, **kw)
-
- def parse(self, fp, headersonly=False):
- """Create a message structure from the data in a binary file.
-
- Reads all the data from the file and returns the root of the message
- structure. Optional headersonly is a flag specifying whether to stop
- parsing after reading the headers or not. The default is False,
- meaning it parses the entire contents of the file.
- """
- fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
- with fp:
- return self.parser.parse(fp, headersonly)
-
-
- def parsebytes(self, text, headersonly=False):
- """Create a message structure from a byte string.
-
- Returns the root of the message structure. Optional headersonly is a
- flag specifying whether to stop parsing after reading the headers or
- not. The default is False, meaning it parses the entire contents of
- the file.
- """
- text = text.decode('ASCII', errors='surrogateescape')
- return self.parser.parsestr(text, headersonly)
-
-
-class BytesHeaderParser(BytesParser):
- def parse(self, fp, headersonly=True):
- return BytesParser.parse(self, fp, headersonly=True)
-
- def parsebytes(self, text, headersonly=True):
- return BytesParser.parsebytes(self, text, headersonly=True)
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+
+"""A parser of RFC 2822 and MIME email messages."""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
+
+import warnings
+from io import StringIO, TextIOWrapper
+
+from future.backports.email.feedparser import FeedParser, BytesFeedParser
+from future.backports.email.message import Message
+from future.backports.email._policybase import compat32
+
+
+class Parser(object):
+ def __init__(self, _class=Message, **_3to2kwargs):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
+ else: policy = compat32
+ self._class = _class
+ self.policy = policy
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class, policy=self.policy)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+
+
+
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
+
+
+class BytesParser(object):
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ with fp:
+ return self.parser.parse(fp, headersonly)
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+ def parse(self, fp, headersonly=True):
+ return BytesParser.parse(self, fp, headersonly=True)
+
+ def parsebytes(self, text, headersonly=True):
+ return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/contrib/python/future/future/backports/email/policy.py b/contrib/python/future/future/backports/email/policy.py
index ea02f173e2..2f609a23ae 100644
--- a/contrib/python/future/future/backports/email/policy.py
+++ b/contrib/python/future/future/backports/email/policy.py
@@ -1,193 +1,193 @@
-"""This will be the home for the policy that hooks in the new
-code that adds all the email6 features.
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import super
-
-from future.standard_library.email._policybase import (Policy, Compat32,
- compat32, _extend_docstrings)
-from future.standard_library.email.utils import _has_surrogates
-from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry
-
-__all__ = [
- 'Compat32',
- 'compat32',
- 'Policy',
- 'EmailPolicy',
- 'default',
- 'strict',
- 'SMTP',
- 'HTTP',
- ]
-
-@_extend_docstrings
-class EmailPolicy(Policy):
-
- """+
- PROVISIONAL
-
- The API extensions enabled by this policy are currently provisional.
- Refer to the documentation for details.
-
- This policy adds new header parsing and folding algorithms. Instead of
- simple strings, headers are custom objects with custom attributes
- depending on the type of the field. The folding algorithm fully
- implements RFCs 2047 and 5322.
-
- In addition to the settable attributes listed above that apply to
- all Policies, this policy adds the following additional attributes:
-
- refold_source -- if the value for a header in the Message object
- came from the parsing of some source, this attribute
- indicates whether or not a generator should refold
- that value when transforming the message back into
- stream form. The possible values are:
-
- none -- all source values use original folding
- long -- source values that have any line that is
- longer than max_line_length will be
- refolded
- all -- all values are refolded.
-
- The default is 'long'.
-
- header_factory -- a callable that takes two arguments, 'name' and
- 'value', where 'name' is a header field name and
- 'value' is an unfolded header field value, and
- returns a string-like object that represents that
- header. A default header_factory is provided that
- understands some of the RFC5322 header field types.
- (Currently address fields and date fields have
- special treatment, while all other fields are
- treated as unstructured. This list will be
- completed before the extension is marked stable.)
- """
-
- refold_source = 'long'
- header_factory = HeaderRegistry()
-
- def __init__(self, **kw):
- # Ensure that each new instance gets a unique header factory
- # (as opposed to clones, which share the factory).
- if 'header_factory' not in kw:
- object.__setattr__(self, 'header_factory', HeaderRegistry())
- super().__init__(**kw)
-
- def header_max_count(self, name):
- """+
- The implementation for this class returns the max_count attribute from
- the specialized header class that would be used to construct a header
- of type 'name'.
- """
- return self.header_factory[name].max_count
-
- # The logic of the next three methods is chosen such that it is possible to
- # switch a Message object between a Compat32 policy and a policy derived
- # from this class and have the results stay consistent. This allows a
- # Message object constructed with this policy to be passed to a library
- # that only handles Compat32 objects, or to receive such an object and
- # convert it to use the newer style by just changing its policy. It is
- # also chosen because it postpones the relatively expensive full rfc5322
- # parse until as late as possible when parsing from source, since in many
- # applications only a few headers will actually be inspected.
-
- def header_source_parse(self, sourcelines):
- """+
- The name is parsed as everything up to the ':' and returned unmodified.
- The value is determined by stripping leading whitespace off the
- remainder of the first line, joining all subsequent lines together, and
- stripping any trailing carriage return or linefeed characters. (This
- is the same as Compat32).
-
- """
- name, value = sourcelines[0].split(':', 1)
- value = value.lstrip(' \t') + ''.join(sourcelines[1:])
- return (name, value.rstrip('\r\n'))
-
- def header_store_parse(self, name, value):
- """+
- The name is returned unchanged. If the input value has a 'name'
- attribute and it matches the name ignoring case, the value is returned
- unchanged. Otherwise the name and value are passed to header_factory
- method, and the resulting custom header object is returned as the
- value. In this case a ValueError is raised if the input value contains
- CR or LF characters.
-
- """
- if hasattr(value, 'name') and value.name.lower() == name.lower():
- return (name, value)
- if isinstance(value, str) and len(value.splitlines())>1:
- raise ValueError("Header values may not contain linefeed "
- "or carriage return characters")
- return (name, self.header_factory(name, value))
-
- def header_fetch_parse(self, name, value):
- """+
- If the value has a 'name' attribute, it is returned to unmodified.
- Otherwise the name and the value with any linesep characters removed
- are passed to the header_factory method, and the resulting custom
- header object is returned. Any surrogateescaped bytes get turned
- into the unicode unknown-character glyph.
-
- """
- if hasattr(value, 'name'):
- return value
- return self.header_factory(name, ''.join(value.splitlines()))
-
- def fold(self, name, value):
- """+
- Header folding is controlled by the refold_source policy setting. A
- value is considered to be a 'source value' if and only if it does not
- have a 'name' attribute (having a 'name' attribute means it is a header
- object of some sort). If a source value needs to be refolded according
- to the policy, it is converted into a custom header object by passing
- the name and the value with any linesep characters removed to the
- header_factory method. Folding of a custom header object is done by
- calling its fold method with the current policy.
-
- Source values are split into lines using splitlines. If the value is
- not to be refolded, the lines are rejoined using the linesep from the
- policy and returned. The exception is lines containing non-ascii
- binary data. In that case the value is refolded regardless of the
- refold_source setting, which causes the binary data to be CTE encoded
- using the unknown-8bit charset.
-
- """
- return self._fold(name, value, refold_binary=True)
-
- def fold_binary(self, name, value):
- """+
- The same as fold if cte_type is 7bit, except that the returned value is
- bytes.
-
- If cte_type is 8bit, non-ASCII binary data is converted back into
- bytes. Headers with binary data are not refolded, regardless of the
- refold_header setting, since there is no way to know whether the binary
- data consists of single byte characters or multibyte characters.
-
- """
- folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
- return folded.encode('ascii', 'surrogateescape')
-
- def _fold(self, name, value, refold_binary=False):
- if hasattr(value, 'name'):
- return value.fold(policy=self)
- maxlen = self.max_line_length if self.max_line_length else float('inf')
- lines = value.splitlines()
- refold = (self.refold_source == 'all' or
- self.refold_source == 'long' and
- (lines and len(lines[0])+len(name)+2 > maxlen or
- any(len(x) > maxlen for x in lines[1:])))
- if refold or refold_binary and _has_surrogates(value):
- return self.header_factory(name, ''.join(lines)).fold(policy=self)
- return name + ': ' + self.linesep.join(lines) + self.linesep
-
-
-default = EmailPolicy()
-# Make the default policy use the class default header_factory
-del default.header_factory
-strict = default.clone(raise_on_defect=True)
-SMTP = default.clone(linesep='\r\n')
-HTTP = default.clone(linesep='\r\n', max_line_length=None)
+"""This will be the home for the policy that hooks in the new
+code that adds all the email6 features.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+
+from future.standard_library.email._policybase import (Policy, Compat32,
+ compat32, _extend_docstrings)
+from future.standard_library.email.utils import _has_surrogates
+from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry
+
+__all__ = [
+ 'Compat32',
+ 'compat32',
+ 'Policy',
+ 'EmailPolicy',
+ 'default',
+ 'strict',
+ 'SMTP',
+ 'HTTP',
+ ]
+
+@_extend_docstrings
+class EmailPolicy(Policy):
+
+ """+
+ PROVISIONAL
+
+ The API extensions enabled by this policy are currently provisional.
+ Refer to the documentation for details.
+
+ This policy adds new header parsing and folding algorithms. Instead of
+ simple strings, headers are custom objects with custom attributes
+ depending on the type of the field. The folding algorithm fully
+ implements RFCs 2047 and 5322.
+
+ In addition to the settable attributes listed above that apply to
+ all Policies, this policy adds the following additional attributes:
+
+ refold_source -- if the value for a header in the Message object
+ came from the parsing of some source, this attribute
+ indicates whether or not a generator should refold
+ that value when transforming the message back into
+ stream form. The possible values are:
+
+ none -- all source values use original folding
+ long -- source values that have any line that is
+ longer than max_line_length will be
+ refolded
+ all -- all values are refolded.
+
+ The default is 'long'.
+
+ header_factory -- a callable that takes two arguments, 'name' and
+ 'value', where 'name' is a header field name and
+ 'value' is an unfolded header field value, and
+ returns a string-like object that represents that
+ header. A default header_factory is provided that
+ understands some of the RFC5322 header field types.
+ (Currently address fields and date fields have
+ special treatment, while all other fields are
+ treated as unstructured. This list will be
+ completed before the extension is marked stable.)
+ """
+
+ refold_source = 'long'
+ header_factory = HeaderRegistry()
+
+ def __init__(self, **kw):
+ # Ensure that each new instance gets a unique header factory
+ # (as opposed to clones, which share the factory).
+ if 'header_factory' not in kw:
+ object.__setattr__(self, 'header_factory', HeaderRegistry())
+ super().__init__(**kw)
+
+ def header_max_count(self, name):
+ """+
+ The implementation for this class returns the max_count attribute from
+ the specialized header class that would be used to construct a header
+ of type 'name'.
+ """
+ return self.header_factory[name].max_count
+
+ # The logic of the next three methods is chosen such that it is possible to
+ # switch a Message object between a Compat32 policy and a policy derived
+ # from this class and have the results stay consistent. This allows a
+ # Message object constructed with this policy to be passed to a library
+ # that only handles Compat32 objects, or to receive such an object and
+ # convert it to use the newer style by just changing its policy. It is
+ # also chosen because it postpones the relatively expensive full rfc5322
+ # parse until as late as possible when parsing from source, since in many
+ # applications only a few headers will actually be inspected.
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters. (This
+ is the same as Compat32).
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name is returned unchanged. If the input value has a 'name'
+ attribute and it matches the name ignoring case, the value is returned
+ unchanged. Otherwise the name and value are passed to header_factory
+ method, and the resulting custom header object is returned as the
+ value. In this case a ValueError is raised if the input value contains
+ CR or LF characters.
+
+ """
+ if hasattr(value, 'name') and value.name.lower() == name.lower():
+ return (name, value)
+ if isinstance(value, str) and len(value.splitlines())>1:
+ raise ValueError("Header values may not contain linefeed "
+ "or carriage return characters")
+ return (name, self.header_factory(name, value))
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value has a 'name' attribute, it is returned to unmodified.
+ Otherwise the name and the value with any linesep characters removed
+ are passed to the header_factory method, and the resulting custom
+ header object is returned. Any surrogateescaped bytes get turned
+ into the unicode unknown-character glyph.
+
+ """
+ if hasattr(value, 'name'):
+ return value
+ return self.header_factory(name, ''.join(value.splitlines()))
+
+ def fold(self, name, value):
+ """+
+ Header folding is controlled by the refold_source policy setting. A
+ value is considered to be a 'source value' if and only if it does not
+ have a 'name' attribute (having a 'name' attribute means it is a header
+ object of some sort). If a source value needs to be refolded according
+ to the policy, it is converted into a custom header object by passing
+ the name and the value with any linesep characters removed to the
+ header_factory method. Folding of a custom header object is done by
+ calling its fold method with the current policy.
+
+ Source values are split into lines using splitlines. If the value is
+ not to be refolded, the lines are rejoined using the linesep from the
+ policy and returned. The exception is lines containing non-ascii
+ binary data. In that case the value is refolded regardless of the
+ refold_source setting, which causes the binary data to be CTE encoded
+ using the unknown-8bit charset.
+
+ """
+ return self._fold(name, value, refold_binary=True)
+
+ def fold_binary(self, name, value):
+ """+
+ The same as fold if cte_type is 7bit, except that the returned value is
+ bytes.
+
+ If cte_type is 8bit, non-ASCII binary data is converted back into
+ bytes. Headers with binary data are not refolded, regardless of the
+ refold_header setting, since there is no way to know whether the binary
+ data consists of single byte characters or multibyte characters.
+
+ """
+ folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, refold_binary=False):
+ if hasattr(value, 'name'):
+ return value.fold(policy=self)
+ maxlen = self.max_line_length if self.max_line_length else float('inf')
+ lines = value.splitlines()
+ refold = (self.refold_source == 'all' or
+ self.refold_source == 'long' and
+ (lines and len(lines[0])+len(name)+2 > maxlen or
+ any(len(x) > maxlen for x in lines[1:])))
+ if refold or refold_binary and _has_surrogates(value):
+ return self.header_factory(name, ''.join(lines)).fold(policy=self)
+ return name + ': ' + self.linesep.join(lines) + self.linesep
+
+
+default = EmailPolicy()
+# Make the default policy use the class default header_factory
+del default.header_factory
+strict = default.clone(raise_on_defect=True)
+SMTP = default.clone(linesep='\r\n')
+HTTP = default.clone(linesep='\r\n', max_line_length=None)
diff --git a/contrib/python/future/future/backports/email/quoprimime.py b/contrib/python/future/future/backports/email/quoprimime.py
index d295aae1a5..b69d158bc4 100644
--- a/contrib/python/future/future/backports/email/quoprimime.py
+++ b/contrib/python/future/future/backports/email/quoprimime.py
@@ -1,326 +1,326 @@
-# Copyright (C) 2001-2006 Python Software Foundation
-# Author: Ben Gertzfield
-# Contact: email-sig@python.org
-
-"""Quoted-printable content transfer encoding per RFCs 2045-2047.
-
-This module handles the content transfer encoding method defined in RFC 2045
-to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
-safely encode text that is in a character set similar to the 7-bit US ASCII
-character set, but that includes some 8-bit characters that are normally not
-allowed in email bodies or headers.
-
-Quoted-printable is very space-inefficient for encoding binary files; use the
-email.base64mime module for that instead.
-
-This module provides an interface to encode and decode both headers and bodies
-with quoted-printable encoding.
-
-RFC 2045 defines a method for including character set information in an
-`encoded-word' in a header. This method is commonly used for 8-bit real names
-in To:/From:/Cc: etc. fields, as well as Subject: lines.
-
-This module does not do the line wrapping or end-of-line character
-conversion necessary for proper internationalized headers; it only
-does dumb encoding and decoding. To deal with the various line
-wrapping issues, use the email.header module.
-"""
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import bytes, chr, dict, int, range, super
-
-__all__ = [
- 'body_decode',
- 'body_encode',
- 'body_length',
- 'decode',
- 'decodestring',
- 'header_decode',
- 'header_encode',
- 'header_length',
- 'quote',
- 'unquote',
- ]
-
-import re
-import io
-
-from string import ascii_letters, digits, hexdigits
-
-CRLF = '\r\n'
-NL = '\n'
-EMPTYSTRING = ''
-
-# Build a mapping of octets to the expansion of that octet. Since we're only
-# going to have 256 of these things, this isn't terribly inefficient
-# space-wise. Remember that headers and bodies have different sets of safe
-# characters. Initialize both maps with the full expansion, and then override
-# the safe bytes with the more compact form.
-_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
-_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
-
-# Safe header bytes which need no encoding.
-for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')):
- _QUOPRI_HEADER_MAP[c] = chr(c)
-# Headers have one other special encoding; spaces become underscores.
-_QUOPRI_HEADER_MAP[ord(' ')] = '_'
-
-# Safe body bytes which need no encoding.
-for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'
- b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
- b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
- _QUOPRI_BODY_MAP[c] = chr(c)
-
-
-
-# Helpers
-def header_check(octet):
- """Return True if the octet should be escaped with header quopri."""
- return chr(octet) != _QUOPRI_HEADER_MAP[octet]
-
-
-def body_check(octet):
- """Return True if the octet should be escaped with body quopri."""
- return chr(octet) != _QUOPRI_BODY_MAP[octet]
-
-
-def header_length(bytearray):
- """Return a header quoted-printable encoding length.
-
- Note that this does not include any RFC 2047 chrome added by
- `header_encode()`.
-
- :param bytearray: An array of bytes (a.k.a. octets).
- :return: The length in bytes of the byte array when it is encoded with
- quoted-printable for headers.
- """
- return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
-
-
-def body_length(bytearray):
- """Return a body quoted-printable encoding length.
-
- :param bytearray: An array of bytes (a.k.a. octets).
- :return: The length in bytes of the byte array when it is encoded with
- quoted-printable for bodies.
- """
- return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
-
-
-def _max_append(L, s, maxlen, extra=''):
- if not isinstance(s, str):
- s = chr(s)
- if not L:
- L.append(s.lstrip())
- elif len(L[-1]) + len(s) <= maxlen:
- L[-1] += extra + s
- else:
- L.append(s.lstrip())
-
-
-def unquote(s):
- """Turn a string in the form =AB to the ASCII character with value 0xab"""
- return chr(int(s[1:3], 16))
-
-
-def quote(c):
- return '=%02X' % ord(c)
-
-
-
-def header_encode(header_bytes, charset='iso-8859-1'):
- """Encode a single header line with quoted-printable (like) encoding.
-
- Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
- used specifically for email header fields to allow charsets with mostly 7
- bit characters (and some 8 bit) to remain more or less readable in non-RFC
- 2045 aware mail clients.
-
- charset names the character set to use in the RFC 2046 header. It
- defaults to iso-8859-1.
- """
- # Return empty headers as an empty string.
- if not header_bytes:
- return ''
- # Iterate over every byte, encoding if necessary.
- encoded = []
- for octet in header_bytes:
- encoded.append(_QUOPRI_HEADER_MAP[octet])
- # Now add the RFC chrome to each encoded chunk and glue the chunks
- # together.
- return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
-
-
-class _body_accumulator(io.StringIO):
-
- def __init__(self, maxlinelen, eol, *args, **kw):
- super().__init__(*args, **kw)
- self.eol = eol
- self.maxlinelen = self.room = maxlinelen
-
- def write_str(self, s):
- """Add string s to the accumulated body."""
- self.write(s)
- self.room -= len(s)
-
- def newline(self):
- """Write eol, then start new line."""
- self.write_str(self.eol)
- self.room = self.maxlinelen
-
- def write_soft_break(self):
- """Write a soft break, then start a new line."""
- self.write_str('=')
- self.newline()
-
- def write_wrapped(self, s, extra_room=0):
- """Add a soft line break if needed, then write s."""
- if self.room < len(s) + extra_room:
- self.write_soft_break()
- self.write_str(s)
-
- def write_char(self, c, is_last_char):
- if not is_last_char:
- # Another character follows on this line, so we must leave
- # extra room, either for it or a soft break, and whitespace
- # need not be quoted.
- self.write_wrapped(c, extra_room=1)
- elif c not in ' \t':
- # For this and remaining cases, no more characters follow,
- # so there is no need to reserve extra room (since a hard
- # break will immediately follow).
- self.write_wrapped(c)
- elif self.room >= 3:
- # It's a whitespace character at end-of-line, and we have room
- # for the three-character quoted encoding.
- self.write(quote(c))
- elif self.room == 2:
- # There's room for the whitespace character and a soft break.
- self.write(c)
- self.write_soft_break()
- else:
- # There's room only for a soft break. The quoted whitespace
- # will be the only content on the subsequent line.
- self.write_soft_break()
- self.write(quote(c))
-
-
-def body_encode(body, maxlinelen=76, eol=NL):
- """Encode with quoted-printable, wrapping at maxlinelen characters.
-
- Each line of encoded text will end with eol, which defaults to "\\n". Set
- this to "\\r\\n" if you will be using the result of this function directly
- in an email.
-
- Each line will be wrapped at, at most, maxlinelen characters before the
- eol string (maxlinelen defaults to 76 characters, the maximum value
- permitted by RFC 2045). Long lines will have the 'soft line break'
- quoted-printable character "=" appended to them, so the decoded text will
- be identical to the original text.
-
- The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
- followed by a soft line break. Smaller values will generate a
- ValueError.
-
- """
-
- if maxlinelen < 4:
- raise ValueError("maxlinelen must be at least 4")
- if not body:
- return body
-
- # The last line may or may not end in eol, but all other lines do.
- last_has_eol = (body[-1] in '\r\n')
-
- # This accumulator will make it easier to build the encoded body.
- encoded_body = _body_accumulator(maxlinelen, eol)
-
- lines = body.splitlines()
- last_line_no = len(lines) - 1
- for line_no, line in enumerate(lines):
- last_char_index = len(line) - 1
- for i, c in enumerate(line):
- if body_check(ord(c)):
- c = quote(c)
- encoded_body.write_char(c, i==last_char_index)
- # Add an eol if input line had eol. All input lines have eol except
- # possibly the last one.
- if line_no < last_line_no or last_has_eol:
- encoded_body.newline()
-
- return encoded_body.getvalue()
-
-
-
-# BAW: I'm not sure if the intent was for the signature of this function to be
-# the same as base64MIME.decode() or not...
-def decode(encoded, eol=NL):
- """Decode a quoted-printable string.
-
- Lines are separated with eol, which defaults to \\n.
- """
- if not encoded:
- return encoded
- # BAW: see comment in encode() above. Again, we're building up the
- # decoded string with string concatenation, which could be done much more
- # efficiently.
- decoded = ''
-
- for line in encoded.splitlines():
- line = line.rstrip()
- if not line:
- decoded += eol
- continue
-
- i = 0
- n = len(line)
- while i < n:
- c = line[i]
- if c != '=':
- decoded += c
- i += 1
- # Otherwise, c == "=". Are we at the end of the line? If so, add
- # a soft line break.
- elif i+1 == n:
- i += 1
- continue
- # Decode if in form =AB
- elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
- decoded += unquote(line[i:i+3])
- i += 3
- # Otherwise, not in form =AB, pass literally
- else:
- decoded += c
- i += 1
-
- if i == n:
- decoded += eol
- # Special case if original string did not end with eol
- if encoded[-1] not in '\r\n' and decoded.endswith(eol):
- decoded = decoded[:-1]
- return decoded
-
-
-# For convenience and backwards compatibility w/ standard base64 module
-body_decode = decode
-decodestring = decode
-
-
-
-def _unquote_match(match):
- """Turn a match in the form =AB to the ASCII character with value 0xab"""
- s = match.group(0)
- return unquote(s)
-
-
-# Header decoding is done a bit differently
-def header_decode(s):
- """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
-
- This function does not parse a full MIME header value encoded with
- quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
- the high level email.header class for that functionality.
- """
- s = s.replace('_', ' ')
- return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64mime module for that instead.
+
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding. To deal with the various line
+wrapping issues, use the email.header module.
+"""
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import bytes, chr, dict, int, range, super
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'body_length',
+ 'decode',
+ 'decodestring',
+ 'header_decode',
+ 'header_encode',
+ 'header_length',
+ 'quote',
+ 'unquote',
+ ]
+
+import re
+import io
+
+from string import ascii_letters, digits, hexdigits
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# Build a mapping of octets to the expansion of that octet. Since we're only
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise. Remember that headers and bodies have different sets of safe
+# characters. Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
+
+# Safe header bytes which need no encoding.
+for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')):
+ _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
+
+# Safe body bytes which need no encoding.
+for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'
+ b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+ b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+ _QUOPRI_BODY_MAP[c] = chr(c)
+
+
+
+# Helpers
+def header_check(octet):
+ """Return True if the octet should be escaped with header quopri."""
+ return chr(octet) != _QUOPRI_HEADER_MAP[octet]
+
+
+def body_check(octet):
+ """Return True if the octet should be escaped with body quopri."""
+ return chr(octet) != _QUOPRI_BODY_MAP[octet]
+
+
+def header_length(bytearray):
+ """Return a header quoted-printable encoding length.
+
+ Note that this does not include any RFC 2047 chrome added by
+ `header_encode()`.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for headers.
+ """
+ return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
+
+
+def body_length(bytearray):
+ """Return a body quoted-printable encoding length.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for bodies.
+ """
+ return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
+
+
+def _max_append(L, s, maxlen, extra=''):
+ if not isinstance(s, str):
+ s = chr(s)
+ if not L:
+ L.append(s.lstrip())
+ elif len(L[-1]) + len(s) <= maxlen:
+ L[-1] += extra + s
+ else:
+ L.append(s.lstrip())
+
+
+def unquote(s):
+ """Turn a string in the form =AB to the ASCII character with value 0xab"""
+ return chr(int(s[1:3], 16))
+
+
+def quote(c):
+ return '=%02X' % ord(c)
+
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with quoted-printable (like) encoding.
+
+ Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+ used specifically for email header fields to allow charsets with mostly 7
+ bit characters (and some 8 bit) to remain more or less readable in non-RFC
+ 2045 aware mail clients.
+
+ charset names the character set to use in the RFC 2046 header. It
+ defaults to iso-8859-1.
+ """
+ # Return empty headers as an empty string.
+ if not header_bytes:
+ return ''
+ # Iterate over every byte, encoding if necessary.
+ encoded = []
+ for octet in header_bytes:
+ encoded.append(_QUOPRI_HEADER_MAP[octet])
+ # Now add the RFC chrome to each encoded chunk and glue the chunks
+ # together.
+ return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
+
+
+class _body_accumulator(io.StringIO):
+
+ def __init__(self, maxlinelen, eol, *args, **kw):
+ super().__init__(*args, **kw)
+ self.eol = eol
+ self.maxlinelen = self.room = maxlinelen
+
+ def write_str(self, s):
+ """Add string s to the accumulated body."""
+ self.write(s)
+ self.room -= len(s)
+
+ def newline(self):
+ """Write eol, then start new line."""
+ self.write_str(self.eol)
+ self.room = self.maxlinelen
+
+ def write_soft_break(self):
+ """Write a soft break, then start a new line."""
+ self.write_str('=')
+ self.newline()
+
+ def write_wrapped(self, s, extra_room=0):
+ """Add a soft line break if needed, then write s."""
+ if self.room < len(s) + extra_room:
+ self.write_soft_break()
+ self.write_str(s)
+
+ def write_char(self, c, is_last_char):
+ if not is_last_char:
+ # Another character follows on this line, so we must leave
+ # extra room, either for it or a soft break, and whitespace
+ # need not be quoted.
+ self.write_wrapped(c, extra_room=1)
+ elif c not in ' \t':
+ # For this and remaining cases, no more characters follow,
+ # so there is no need to reserve extra room (since a hard
+ # break will immediately follow).
+ self.write_wrapped(c)
+ elif self.room >= 3:
+ # It's a whitespace character at end-of-line, and we have room
+ # for the three-character quoted encoding.
+ self.write(quote(c))
+ elif self.room == 2:
+ # There's room for the whitespace character and a soft break.
+ self.write(c)
+ self.write_soft_break()
+ else:
+ # There's room only for a soft break. The quoted whitespace
+ # will be the only content on the subsequent line.
+ self.write_soft_break()
+ self.write(quote(c))
+
+
+def body_encode(body, maxlinelen=76, eol=NL):
+ """Encode with quoted-printable, wrapping at maxlinelen characters.
+
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+
+ Each line will be wrapped at, at most, maxlinelen characters before the
+ eol string (maxlinelen defaults to 76 characters, the maximum value
+ permitted by RFC 2045). Long lines will have the 'soft line break'
+ quoted-printable character "=" appended to them, so the decoded text will
+ be identical to the original text.
+
+ The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
+ followed by a soft line break. Smaller values will generate a
+ ValueError.
+
+ """
+
+ if maxlinelen < 4:
+ raise ValueError("maxlinelen must be at least 4")
+ if not body:
+ return body
+
+ # The last line may or may not end in eol, but all other lines do.
+ last_has_eol = (body[-1] in '\r\n')
+
+ # This accumulator will make it easier to build the encoded body.
+ encoded_body = _body_accumulator(maxlinelen, eol)
+
+ lines = body.splitlines()
+ last_line_no = len(lines) - 1
+ for line_no, line in enumerate(lines):
+ last_char_index = len(line) - 1
+ for i, c in enumerate(line):
+ if body_check(ord(c)):
+ c = quote(c)
+ encoded_body.write_char(c, i==last_char_index)
+ # Add an eol if input line had eol. All input lines have eol except
+ # possibly the last one.
+ if line_no < last_line_no or last_has_eol:
+ encoded_body.newline()
+
+ return encoded_body.getvalue()
+
+
+
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+ """Decode a quoted-printable string.
+
+ Lines are separated with eol, which defaults to \\n.
+ """
+ if not encoded:
+ return encoded
+ # BAW: see comment in encode() above. Again, we're building up the
+ # decoded string with string concatenation, which could be done much more
+ # efficiently.
+ decoded = ''
+
+ for line in encoded.splitlines():
+ line = line.rstrip()
+ if not line:
+ decoded += eol
+ continue
+
+ i = 0
+ n = len(line)
+ while i < n:
+ c = line[i]
+ if c != '=':
+ decoded += c
+ i += 1
+ # Otherwise, c == "=". Are we at the end of the line? If so, add
+ # a soft line break.
+ elif i+1 == n:
+ i += 1
+ continue
+ # Decode if in form =AB
+ elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+ decoded += unquote(line[i:i+3])
+ i += 3
+ # Otherwise, not in form =AB, pass literally
+ else:
+ decoded += c
+ i += 1
+
+ if i == n:
+ decoded += eol
+ # Special case if original string did not end with eol
+ if encoded[-1] not in '\r\n' and decoded.endswith(eol):
+ decoded = decoded[:-1]
+ return decoded
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+
+
+
+def _unquote_match(match):
+ """Turn a match in the form =AB to the ASCII character with value 0xab"""
+ s = match.group(0)
+ return unquote(s)
+
+
+# Header decoding is done a bit differently
+def header_decode(s):
+ """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+
+ This function does not parse a full MIME header value encoded with
+ quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
+ the high level email.header class for that functionality.
+ """
+ s = s.replace('_', ' ')
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)
diff --git a/contrib/python/future/future/backports/email/utils.py b/contrib/python/future/future/backports/email/utils.py
index bbbf223354..4abebf7cb6 100644
--- a/contrib/python/future/future/backports/email/utils.py
+++ b/contrib/python/future/future/backports/email/utils.py
@@ -1,400 +1,400 @@
-# Copyright (C) 2001-2010 Python Software Foundation
-# Author: Barry Warsaw
-# Contact: email-sig@python.org
-
-"""Miscellaneous utilities."""
-
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from future import utils
-from future.builtins import bytes, int, str
-
-__all__ = [
- 'collapse_rfc2231_value',
- 'decode_params',
- 'decode_rfc2231',
- 'encode_rfc2231',
- 'formataddr',
- 'formatdate',
- 'format_datetime',
- 'getaddresses',
- 'make_msgid',
- 'mktime_tz',
- 'parseaddr',
- 'parsedate',
- 'parsedate_tz',
- 'parsedate_to_datetime',
- 'unquote',
- ]
-
-import os
-import re
-if utils.PY2:
- re.ASCII = 0
-import time
-import base64
-import random
-import socket
-from future.backports import datetime
-from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote
-import warnings
-from io import StringIO
-
-from future.backports.email._parseaddr import quote
-from future.backports.email._parseaddr import AddressList as _AddressList
-from future.backports.email._parseaddr import mktime_tz
-
-from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
-
-from quopri import decodestring as _qdecode
-
-# Intrapackage imports
-from future.backports.email.encoders import _bencode, _qencode
-from future.backports.email.charset import Charset
-
-COMMASPACE = ', '
-EMPTYSTRING = ''
-UEMPTYSTRING = ''
-CRLF = '\r\n'
-TICK = "'"
-
-specialsre = re.compile(r'[][\\()<>@,:;".]')
-escapesre = re.compile(r'[\\"]')
-
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
- '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
-
-# How to deal with a string containing bytes before handing it to the
-# application through the 'normal' interface.
-def _sanitize(string):
- # Turn any escaped bytes into unicode 'unknown' char.
- original_bytes = string.encode('ascii', 'surrogateescape')
- return original_bytes.decode('ascii', 'replace')
-
-
-# Helpers
-
-def formataddr(pair, charset='utf-8'):
- """The inverse of parseaddr(), this takes a 2-tuple of the form
- (realname, email_address) and returns the string value suitable
- for an RFC 2822 From, To or Cc header.
-
- If the first element of pair is false, then the second element is
- returned unmodified.
-
- Optional charset if given is the character set that is used to encode
- realname in case realname is not ASCII safe. Can be an instance of str or
- a Charset-like object which has a header_encode method. Default is
- 'utf-8'.
- """
- name, address = pair
- # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
- address.encode('ascii')
- if name:
- try:
- name.encode('ascii')
- except UnicodeEncodeError:
- if isinstance(charset, str):
- charset = Charset(charset)
- encoded_name = charset.header_encode(name)
- return "%s <%s>" % (encoded_name, address)
- else:
- quotes = ''
- if specialsre.search(name):
- quotes = '"'
- name = escapesre.sub(r'\\\g<0>', name)
- return '%s%s%s <%s>' % (quotes, name, quotes, address)
- return address
-
-
-
-def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(fieldvalues)
- a = _AddressList(all)
- return a.addresslist
-
-
-
-ecre = re.compile(r'''
- =\? # literal =?
- (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
- \? # literal ?
- (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
- \? # literal ?
- (?P<atom>.*?) # non-greedy up to the next ?= is the atom
- \?= # literal ?=
- ''', re.VERBOSE | re.IGNORECASE)
-
-
-def _format_timetuple_and_zone(timetuple, zone):
- return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
- ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
- timetuple[2],
- ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
- timetuple[0], timetuple[3], timetuple[4], timetuple[5],
- zone)
-
-def formatdate(timeval=None, localtime=False, usegmt=False):
- """Returns a date string as specified by RFC 2822, e.g.:
-
- Fri, 09 Nov 2001 01:08:47 -0000
-
- Optional timeval if given is a floating point time value as accepted by
- gmtime() and localtime(), otherwise the current time is used.
-
- Optional localtime is a flag that when True, interprets timeval, and
- returns a date relative to the local timezone instead of UTC, properly
- taking daylight savings time into account.
-
- Optional argument usegmt means that the timezone is written out as
- an ascii string, not numeric one (so "GMT" instead of "+0000"). This
- is needed for HTTP, and is only used when localtime==False.
- """
- # Note: we cannot use strftime() because that honors the locale and RFC
- # 2822 requires that day and month names be the English abbreviations.
- if timeval is None:
- timeval = time.time()
- if localtime:
- now = time.localtime(timeval)
- # Calculate timezone offset, based on whether the local zone has
- # daylight savings time, and whether DST is in effect.
- if time.daylight and now[-1]:
- offset = time.altzone
- else:
- offset = time.timezone
- hours, minutes = divmod(abs(offset), 3600)
- # Remember offset is in seconds west of UTC, but the timezone is in
- # minutes east of UTC, so the signs differ.
- if offset > 0:
- sign = '-'
- else:
- sign = '+'
- zone = '%s%02d%02d' % (sign, hours, minutes // 60)
- else:
- now = time.gmtime(timeval)
- # Timezone offset is always -0000
- if usegmt:
- zone = 'GMT'
- else:
- zone = '-0000'
- return _format_timetuple_and_zone(now, zone)
-
-def format_datetime(dt, usegmt=False):
- """Turn a datetime into a date string as specified in RFC 2822.
-
- If usegmt is True, dt must be an aware datetime with an offset of zero. In
- this case 'GMT' will be rendered instead of the normal +0000 required by
- RFC2822. This is to support HTTP headers involving date stamps.
- """
- now = dt.timetuple()
- if usegmt:
- if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
- raise ValueError("usegmt option requires a UTC datetime")
- zone = 'GMT'
- elif dt.tzinfo is None:
- zone = '-0000'
- else:
- zone = dt.strftime("%z")
- return _format_timetuple_and_zone(now, zone)
-
-
-def make_msgid(idstring=None, domain=None):
- """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
-
- <20020201195627.33539.96671@nightshade.la.mastaler.com>
-
- Optional idstring if given is a string used to strengthen the
- uniqueness of the message id. Optional domain if given provides the
- portion of the message id after the '@'. It defaults to the locally
- defined hostname.
- """
- timeval = time.time()
- utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
- pid = os.getpid()
- randint = random.randrange(100000)
- if idstring is None:
- idstring = ''
- else:
- idstring = '.' + idstring
- if domain is None:
- domain = socket.getfqdn()
- msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
- return msgid
-
-
-def parsedate_to_datetime(data):
- _3to2list = list(_parsedate_tz(data))
- dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:]
- if tz is None:
- return datetime.datetime(*dtuple[:6])
- return datetime.datetime(*dtuple[:6],
- tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-
-
-def parseaddr(addr):
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
- return addrs[0]
-
-
-# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
-def unquote(str):
- """Remove quotes from a string."""
- if len(str) > 1:
- if str.startswith('"') and str.endswith('"'):
- return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
- if str.startswith('<') and str.endswith('>'):
- return str[1:-1]
- return str
-
-
-
-# RFC2231-related functions - parameter encoding and decoding
-def decode_rfc2231(s):
- """Decode string according to RFC 2231"""
- parts = s.split(TICK, 2)
- if len(parts) <= 2:
- return None, None, s
- return parts
-
-
-def encode_rfc2231(s, charset=None, language=None):
- """Encode string according to RFC 2231.
-
- If neither charset nor language is given, then s is returned as-is. If
- charset is given but not language, the string is encoded using the empty
- string for language.
- """
- s = url_quote(s, safe='', encoding=charset or 'ascii')
- if charset is None and language is None:
- return s
- if language is None:
- language = ''
- return "%s'%s'%s" % (charset, language, s)
-
-
-rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
- re.ASCII)
-
-def decode_params(params):
- """Decode parameters list according to RFC 2231.
-
- params is a sequence of 2-tuples containing (param name, string value).
- """
- # Copy params so we don't mess with the original
- params = params[:]
- new_params = []
- # Map parameter's name to a list of continuations. The values are a
- # 3-tuple of the continuation number, the string value, and a flag
- # specifying whether a particular segment is %-encoded.
- rfc2231_params = {}
- name, value = params.pop(0)
- new_params.append((name, value))
- while params:
- name, value = params.pop(0)
- if name.endswith('*'):
- encoded = True
- else:
- encoded = False
- value = unquote(value)
- mo = rfc2231_continuation.match(name)
- if mo:
- name, num = mo.group('name', 'num')
- if num is not None:
- num = int(num)
- rfc2231_params.setdefault(name, []).append((num, value, encoded))
- else:
- new_params.append((name, '"%s"' % quote(value)))
- if rfc2231_params:
- for name, continuations in rfc2231_params.items():
- value = []
- extended = False
- # Sort by number
- continuations.sort()
- # And now append all values in numerical order, converting
- # %-encodings for the encoded segments. If any of the
- # continuation names ends in a *, then the entire string, after
- # decoding segments and concatenating, must have the charset and
- # language specifiers at the beginning of the string.
- for num, s, encoded in continuations:
- if encoded:
- # Decode as "latin-1", so the characters in s directly
- # represent the percent-encoded octet values.
- # collapse_rfc2231_value treats this as an octet sequence.
- s = url_unquote(s, encoding="latin-1")
- extended = True
- value.append(s)
- value = quote(EMPTYSTRING.join(value))
- if extended:
- charset, language, value = decode_rfc2231(value)
- new_params.append((name, (charset, language, '"%s"' % value)))
- else:
- new_params.append((name, '"%s"' % value))
- return new_params
-
-def collapse_rfc2231_value(value, errors='replace',
- fallback_charset='us-ascii'):
- if not isinstance(value, tuple) or len(value) != 3:
- return unquote(value)
- # While value comes to us as a unicode string, we need it to be a bytes
- # object. We do not want bytes() normal utf-8 decoder, we want a straight
- # interpretation of the string as character bytes.
- charset, language, text = value
- rawbytes = bytes(text, 'raw-unicode-escape')
- try:
- return str(rawbytes, charset, errors)
- except LookupError:
- # charset is not a known codec.
- return unquote(text)
-
-
-#
-# datetime doesn't provide a localtime function yet, so provide one. Code
-# adapted from the patch in issue 9527. This may not be perfect, but it is
-# better than not having it.
-#
-
-def localtime(dt=None, isdst=-1):
- """Return local time as an aware datetime object.
-
- If called without arguments, return current time. Otherwise *dt*
- argument should be a datetime instance, and it is converted to the
- local time zone according to the system time zone database. If *dt* is
- naive (that is, dt.tzinfo is None), it is assumed to be in local time.
- In this case, a positive or zero value for *isdst* causes localtime to
- presume initially that summer time (for example, Daylight Saving Time)
- is or is not (respectively) in effect for the specified time. A
- negative value for *isdst* causes the localtime() function to attempt
- to divine whether summer time is in effect for the specified time.
-
- """
- if dt is None:
- return datetime.datetime.now(datetime.timezone.utc).astimezone()
- if dt.tzinfo is not None:
- return dt.astimezone()
- # We have a naive datetime. Convert to a (localtime) timetuple and pass to
- # system mktime together with the isdst hint. System mktime will return
- # seconds since epoch.
- tm = dt.timetuple()[:-1] + (isdst,)
- seconds = time.mktime(tm)
- localtm = time.localtime(seconds)
- try:
- delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
- tz = datetime.timezone(delta, localtm.tm_zone)
- except AttributeError:
- # Compute UTC offset and compare with the value implied by tm_isdst.
- # If the values match, use the zone name implied by tm_isdst.
- delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
- dst = time.daylight and localtm.tm_isdst > 0
- gmtoff = -(time.altzone if dst else time.timezone)
- if delta == datetime.timedelta(seconds=gmtoff):
- tz = datetime.timezone(delta, time.tzname[dst])
- else:
- tz = datetime.timezone(delta)
- return dt.replace(tzinfo=tz)
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Miscellaneous utilities."""
+
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from future import utils
+from future.builtins import bytes, int, str
+
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'format_datetime',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'parsedate_to_datetime',
+ 'unquote',
+ ]
+
+import os
+import re
+if utils.PY2:
+ re.ASCII = 0
+import time
+import base64
+import random
+import socket
+from future.backports import datetime
+from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote
+import warnings
+from io import StringIO
+
+from future.backports.email._parseaddr import quote
+from future.backports.email._parseaddr import AddressList as _AddressList
+from future.backports.email._parseaddr import mktime_tz
+
+from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
+
+from quopri import decodestring as _qdecode
+
+# Intrapackage imports
+from future.backports.email.encoders import _bencode, _qencode
+from future.backports.email.charset import Charset
+
+COMMASPACE = ', '
+EMPTYSTRING = ''
+UEMPTYSTRING = ''
+CRLF = '\r\n'
+TICK = "'"
+
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[\\"]')
+
+# How to figure out if we are processing strings that come from a byte
+# source with undecodable characters.
+_has_surrogates = re.compile(
+ '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char.
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ return original_bytes.decode('ascii', 'replace')
+
+
+# Helpers
+
+def formataddr(pair, charset='utf-8'):
+ """The inverse of parseaddr(), this takes a 2-tuple of the form
+ (realname, email_address) and returns the string value suitable
+ for an RFC 2822 From, To or Cc header.
+
+ If the first element of pair is false, then the second element is
+ returned unmodified.
+
+ Optional charset if given is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
+ """
+ name, address = pair
+ # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
+ address.encode('ascii')
+ if name:
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+
+
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<atom>.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+
+
+def _format_timetuple_and_zone(timetuple, zone):
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+ timetuple[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+ timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+ zone)
+
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+
+ Fri, 09 Nov 2001 01:08:47 -0000
+
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime:
+ now = time.localtime(timeval)
+ # Calculate timezone offset, based on whether the local zone has
+ # daylight savings time, and whether DST is in effect.
+ if time.daylight and now[-1]:
+ offset = time.altzone
+ else:
+ offset = time.timezone
+ hours, minutes = divmod(abs(offset), 3600)
+ # Remember offset is in seconds west of UTC, but the timezone is in
+ # minutes east of UTC, so the signs differ.
+ if offset > 0:
+ sign = '-'
+ else:
+ sign = '+'
+ zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+ else:
+ now = time.gmtime(timeval)
+ # Timezone offset is always -0000
+ if usegmt:
+ zone = 'GMT'
+ else:
+ zone = '-0000'
+ return _format_timetuple_and_zone(now, zone)
+
+def format_datetime(dt, usegmt=False):
+ """Turn a datetime into a date string as specified in RFC 2822.
+
+ If usegmt is True, dt must be an aware datetime with an offset of zero. In
+ this case 'GMT' will be rendered instead of the normal +0000 required by
+ RFC2822. This is to support HTTP headers involving date stamps.
+ """
+ now = dt.timetuple()
+ if usegmt:
+ if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+ raise ValueError("usegmt option requires a UTC datetime")
+ zone = 'GMT'
+ elif dt.tzinfo is None:
+ zone = '-0000'
+ else:
+ zone = dt.strftime("%z")
+ return _format_timetuple_and_zone(now, zone)
+
+
+def make_msgid(idstring=None, domain=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+
+ <20020201195627.33539.96671@nightshade.la.mastaler.com>
+
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
+ """
+ timeval = time.time()
+ utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+ pid = os.getpid()
+ randint = random.randrange(100000)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
+ return msgid
+
+
+def parsedate_to_datetime(data):
+ _3to2list = list(_parsedate_tz(data))
+ dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:]
+ if tz is None:
+ return datetime.datetime(*dtuple[:6])
+ return datetime.datetime(*dtuple[:6],
+ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
+
+
+def parseaddr(addr):
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+
+
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+
+
+
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+
+
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ s = url_quote(s, safe='', encoding=charset or 'ascii')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+
+
+rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
+ re.ASCII)
+
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = url_unquote(s, encoding="latin-1")
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if not isinstance(value, tuple) or len(value) != 3:
+ return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)
+
+
+#
+# datetime doesn't provide a localtime function yet, so provide one. Code
+# adapted from the patch in issue 9527. This may not be perfect, but it is
+# better than not having it.
+#
+
+def localtime(dt=None, isdst=-1):
+ """Return local time as an aware datetime object.
+
+ If called without arguments, return current time. Otherwise *dt*
+ argument should be a datetime instance, and it is converted to the
+ local time zone according to the system time zone database. If *dt* is
+ naive (that is, dt.tzinfo is None), it is assumed to be in local time.
+ In this case, a positive or zero value for *isdst* causes localtime to
+ presume initially that summer time (for example, Daylight Saving Time)
+ is or is not (respectively) in effect for the specified time. A
+ negative value for *isdst* causes the localtime() function to attempt
+ to divine whether summer time is in effect for the specified time.
+
+ """
+ if dt is None:
+ return datetime.datetime.now(datetime.timezone.utc).astimezone()
+ if dt.tzinfo is not None:
+ return dt.astimezone()
+ # We have a naive datetime. Convert to a (localtime) timetuple and pass to
+ # system mktime together with the isdst hint. System mktime will return
+ # seconds since epoch.
+ tm = dt.timetuple()[:-1] + (isdst,)
+ seconds = time.mktime(tm)
+ localtm = time.localtime(seconds)
+ try:
+ delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
+ tz = datetime.timezone(delta, localtm.tm_zone)
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied by tm_isdst.
+ # If the values match, use the zone name implied by tm_isdst.
+ delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
+ dst = time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(time.altzone if dst else time.timezone)
+ if delta == datetime.timedelta(seconds=gmtoff):
+ tz = datetime.timezone(delta, time.tzname[dst])
+ else:
+ tz = datetime.timezone(delta)
+ return dt.replace(tzinfo=tz)
diff --git a/contrib/python/future/future/backports/html/__init__.py b/contrib/python/future/future/backports/html/__init__.py
index 5b2d91797c..58e133fd4b 100644
--- a/contrib/python/future/future/backports/html/__init__.py
+++ b/contrib/python/future/future/backports/html/__init__.py
@@ -1,27 +1,27 @@
-"""
-General functions for HTML manipulation, backported from Py3.
-
-Note that this uses Python 2.7 code with the corresponding Python 3
-module names and locations.
-"""
-
-from __future__ import unicode_literals
-
-
-_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
-_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
- ord('"'): '&quot;', ord('\''): '&#x27;'}
-
-# NB: this is a candidate for a bytes/string polymorphic interface
-
-def escape(s, quote=True):
- """
- Replace special characters "&", "<" and ">" to HTML-safe sequences.
- If the optional flag quote is true (the default), the quotation mark
- characters, both double quote (") and single quote (') characters are also
- translated.
- """
- assert not isinstance(s, bytes), 'Pass a unicode string'
- if quote:
- return s.translate(_escape_map_full)
- return s.translate(_escape_map)
+"""
+General functions for HTML manipulation, backported from Py3.
+
+Note that this uses Python 2.7 code with the corresponding Python 3
+module names and locations.
+"""
+
+from __future__ import unicode_literals
+
+
+_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
+_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
+ ord('"'): '&quot;', ord('\''): '&#x27;'}
+
+# NB: this is a candidate for a bytes/string polymorphic interface
+
+def escape(s, quote=True):
+ """
+ Replace special characters "&", "<" and ">" to HTML-safe sequences.
+ If the optional flag quote is true (the default), the quotation mark
+ characters, both double quote (") and single quote (') characters are also
+ translated.
+ """
+ assert not isinstance(s, bytes), 'Pass a unicode string'
+ if quote:
+ return s.translate(_escape_map_full)
+ return s.translate(_escape_map)
diff --git a/contrib/python/future/future/backports/html/entities.py b/contrib/python/future/future/backports/html/entities.py
index 75e08848d9..5c73f6923a 100644
--- a/contrib/python/future/future/backports/html/entities.py
+++ b/contrib/python/future/future/backports/html/entities.py
@@ -1,2514 +1,2514 @@
-"""HTML character entity references.
-
-Backported for python-future from Python 3.3
-"""
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future.builtins import *
-
-
-# maps the HTML entity name to the Unicode codepoint
-name2codepoint = {
- 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
- 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
- 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1
- 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
- 'Alpha': 0x0391, # greek capital letter alpha, U+0391
- 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
- 'Atilde': 0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1
- 'Auml': 0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1
- 'Beta': 0x0392, # greek capital letter beta, U+0392
- 'Ccedil': 0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1
- 'Chi': 0x03a7, # greek capital letter chi, U+03A7
- 'Dagger': 0x2021, # double dagger, U+2021 ISOpub
- 'Delta': 0x0394, # greek capital letter delta, U+0394 ISOgrk3
- 'ETH': 0x00d0, # latin capital letter ETH, U+00D0 ISOlat1
- 'Eacute': 0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1
- 'Ecirc': 0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1
- 'Egrave': 0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1
- 'Epsilon': 0x0395, # greek capital letter epsilon, U+0395
- 'Eta': 0x0397, # greek capital letter eta, U+0397
- 'Euml': 0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1
- 'Gamma': 0x0393, # greek capital letter gamma, U+0393 ISOgrk3
- 'Iacute': 0x00cd, # latin capital letter I with acute, U+00CD ISOlat1
- 'Icirc': 0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1
- 'Igrave': 0x00cc, # latin capital letter I with grave, U+00CC ISOlat1
- 'Iota': 0x0399, # greek capital letter iota, U+0399
- 'Iuml': 0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1
- 'Kappa': 0x039a, # greek capital letter kappa, U+039A
- 'Lambda': 0x039b, # greek capital letter lambda, U+039B ISOgrk3
- 'Mu': 0x039c, # greek capital letter mu, U+039C
- 'Ntilde': 0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1
- 'Nu': 0x039d, # greek capital letter nu, U+039D
- 'OElig': 0x0152, # latin capital ligature OE, U+0152 ISOlat2
- 'Oacute': 0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1
- 'Ocirc': 0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1
- 'Ograve': 0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1
- 'Omega': 0x03a9, # greek capital letter omega, U+03A9 ISOgrk3
- 'Omicron': 0x039f, # greek capital letter omicron, U+039F
- 'Oslash': 0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
- 'Otilde': 0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1
- 'Ouml': 0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1
- 'Phi': 0x03a6, # greek capital letter phi, U+03A6 ISOgrk3
- 'Pi': 0x03a0, # greek capital letter pi, U+03A0 ISOgrk3
- 'Prime': 0x2033, # double prime = seconds = inches, U+2033 ISOtech
- 'Psi': 0x03a8, # greek capital letter psi, U+03A8 ISOgrk3
- 'Rho': 0x03a1, # greek capital letter rho, U+03A1
- 'Scaron': 0x0160, # latin capital letter S with caron, U+0160 ISOlat2
- 'Sigma': 0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3
- 'THORN': 0x00de, # latin capital letter THORN, U+00DE ISOlat1
- 'Tau': 0x03a4, # greek capital letter tau, U+03A4
- 'Theta': 0x0398, # greek capital letter theta, U+0398 ISOgrk3
- 'Uacute': 0x00da, # latin capital letter U with acute, U+00DA ISOlat1
- 'Ucirc': 0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1
- 'Ugrave': 0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1
- 'Upsilon': 0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3
- 'Uuml': 0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1
- 'Xi': 0x039e, # greek capital letter xi, U+039E ISOgrk3
- 'Yacute': 0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1
- 'Yuml': 0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2
- 'Zeta': 0x0396, # greek capital letter zeta, U+0396
- 'aacute': 0x00e1, # latin small letter a with acute, U+00E1 ISOlat1
- 'acirc': 0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1
- 'acute': 0x00b4, # acute accent = spacing acute, U+00B4 ISOdia
- 'aelig': 0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
- 'agrave': 0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
- 'alefsym': 0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW
- 'alpha': 0x03b1, # greek small letter alpha, U+03B1 ISOgrk3
- 'amp': 0x0026, # ampersand, U+0026 ISOnum
- 'and': 0x2227, # logical and = wedge, U+2227 ISOtech
- 'ang': 0x2220, # angle, U+2220 ISOamso
- 'aring': 0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
- 'asymp': 0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr
- 'atilde': 0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1
- 'auml': 0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1
- 'bdquo': 0x201e, # double low-9 quotation mark, U+201E NEW
- 'beta': 0x03b2, # greek small letter beta, U+03B2 ISOgrk3
- 'brvbar': 0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum
- 'bull': 0x2022, # bullet = black small circle, U+2022 ISOpub
- 'cap': 0x2229, # intersection = cap, U+2229 ISOtech
- 'ccedil': 0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1
- 'cedil': 0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia
- 'cent': 0x00a2, # cent sign, U+00A2 ISOnum
- 'chi': 0x03c7, # greek small letter chi, U+03C7 ISOgrk3
- 'circ': 0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub
- 'clubs': 0x2663, # black club suit = shamrock, U+2663 ISOpub
- 'cong': 0x2245, # approximately equal to, U+2245 ISOtech
- 'copy': 0x00a9, # copyright sign, U+00A9 ISOnum
- 'crarr': 0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
- 'cup': 0x222a, # union = cup, U+222A ISOtech
- 'curren': 0x00a4, # currency sign, U+00A4 ISOnum
- 'dArr': 0x21d3, # downwards double arrow, U+21D3 ISOamsa
- 'dagger': 0x2020, # dagger, U+2020 ISOpub
- 'darr': 0x2193, # downwards arrow, U+2193 ISOnum
- 'deg': 0x00b0, # degree sign, U+00B0 ISOnum
- 'delta': 0x03b4, # greek small letter delta, U+03B4 ISOgrk3
- 'diams': 0x2666, # black diamond suit, U+2666 ISOpub
- 'divide': 0x00f7, # division sign, U+00F7 ISOnum
- 'eacute': 0x00e9, # latin small letter e with acute, U+00E9 ISOlat1
- 'ecirc': 0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1
- 'egrave': 0x00e8, # latin small letter e with grave, U+00E8 ISOlat1
- 'empty': 0x2205, # empty set = null set = diameter, U+2205 ISOamso
- 'emsp': 0x2003, # em space, U+2003 ISOpub
- 'ensp': 0x2002, # en space, U+2002 ISOpub
- 'epsilon': 0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3
- 'equiv': 0x2261, # identical to, U+2261 ISOtech
- 'eta': 0x03b7, # greek small letter eta, U+03B7 ISOgrk3
- 'eth': 0x00f0, # latin small letter eth, U+00F0 ISOlat1
- 'euml': 0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1
- 'euro': 0x20ac, # euro sign, U+20AC NEW
- 'exist': 0x2203, # there exists, U+2203 ISOtech
- 'fnof': 0x0192, # latin small f with hook = function = florin, U+0192 ISOtech
- 'forall': 0x2200, # for all, U+2200 ISOtech
- 'frac12': 0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum
- 'frac14': 0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
- 'frac34': 0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
- 'frasl': 0x2044, # fraction slash, U+2044 NEW
- 'gamma': 0x03b3, # greek small letter gamma, U+03B3 ISOgrk3
- 'ge': 0x2265, # greater-than or equal to, U+2265 ISOtech
- 'gt': 0x003e, # greater-than sign, U+003E ISOnum
- 'hArr': 0x21d4, # left right double arrow, U+21D4 ISOamsa
- 'harr': 0x2194, # left right arrow, U+2194 ISOamsa
- 'hearts': 0x2665, # black heart suit = valentine, U+2665 ISOpub
- 'hellip': 0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub
- 'iacute': 0x00ed, # latin small letter i with acute, U+00ED ISOlat1
- 'icirc': 0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1
- 'iexcl': 0x00a1, # inverted exclamation mark, U+00A1 ISOnum
- 'igrave': 0x00ec, # latin small letter i with grave, U+00EC ISOlat1
- 'image': 0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso
- 'infin': 0x221e, # infinity, U+221E ISOtech
- 'int': 0x222b, # integral, U+222B ISOtech
- 'iota': 0x03b9, # greek small letter iota, U+03B9 ISOgrk3
- 'iquest': 0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum
- 'isin': 0x2208, # element of, U+2208 ISOtech
- 'iuml': 0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1
- 'kappa': 0x03ba, # greek small letter kappa, U+03BA ISOgrk3
- 'lArr': 0x21d0, # leftwards double arrow, U+21D0 ISOtech
- 'lambda': 0x03bb, # greek small letter lambda, U+03BB ISOgrk3
- 'lang': 0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech
- 'laquo': 0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
- 'larr': 0x2190, # leftwards arrow, U+2190 ISOnum
- 'lceil': 0x2308, # left ceiling = apl upstile, U+2308 ISOamsc
- 'ldquo': 0x201c, # left double quotation mark, U+201C ISOnum
- 'le': 0x2264, # less-than or equal to, U+2264 ISOtech
- 'lfloor': 0x230a, # left floor = apl downstile, U+230A ISOamsc
- 'lowast': 0x2217, # asterisk operator, U+2217 ISOtech
- 'loz': 0x25ca, # lozenge, U+25CA ISOpub
- 'lrm': 0x200e, # left-to-right mark, U+200E NEW RFC 2070
- 'lsaquo': 0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed
- 'lsquo': 0x2018, # left single quotation mark, U+2018 ISOnum
- 'lt': 0x003c, # less-than sign, U+003C ISOnum
- 'macr': 0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
- 'mdash': 0x2014, # em dash, U+2014 ISOpub
- 'micro': 0x00b5, # micro sign, U+00B5 ISOnum
- 'middot': 0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
- 'minus': 0x2212, # minus sign, U+2212 ISOtech
- 'mu': 0x03bc, # greek small letter mu, U+03BC ISOgrk3
- 'nabla': 0x2207, # nabla = backward difference, U+2207 ISOtech
- 'nbsp': 0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum
- 'ndash': 0x2013, # en dash, U+2013 ISOpub
- 'ne': 0x2260, # not equal to, U+2260 ISOtech
- 'ni': 0x220b, # contains as member, U+220B ISOtech
- 'not': 0x00ac, # not sign, U+00AC ISOnum
- 'notin': 0x2209, # not an element of, U+2209 ISOtech
- 'nsub': 0x2284, # not a subset of, U+2284 ISOamsn
- 'ntilde': 0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1
- 'nu': 0x03bd, # greek small letter nu, U+03BD ISOgrk3
- 'oacute': 0x00f3, # latin small letter o with acute, U+00F3 ISOlat1
- 'ocirc': 0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1
- 'oelig': 0x0153, # latin small ligature oe, U+0153 ISOlat2
- 'ograve': 0x00f2, # latin small letter o with grave, U+00F2 ISOlat1
- 'oline': 0x203e, # overline = spacing overscore, U+203E NEW
- 'omega': 0x03c9, # greek small letter omega, U+03C9 ISOgrk3
- 'omicron': 0x03bf, # greek small letter omicron, U+03BF NEW
- 'oplus': 0x2295, # circled plus = direct sum, U+2295 ISOamsb
- 'or': 0x2228, # logical or = vee, U+2228 ISOtech
- 'ordf': 0x00aa, # feminine ordinal indicator, U+00AA ISOnum
- 'ordm': 0x00ba, # masculine ordinal indicator, U+00BA ISOnum
- 'oslash': 0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
- 'otilde': 0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1
- 'otimes': 0x2297, # circled times = vector product, U+2297 ISOamsb
- 'ouml': 0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1
- 'para': 0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum
- 'part': 0x2202, # partial differential, U+2202 ISOtech
- 'permil': 0x2030, # per mille sign, U+2030 ISOtech
- 'perp': 0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
- 'phi': 0x03c6, # greek small letter phi, U+03C6 ISOgrk3
- 'pi': 0x03c0, # greek small letter pi, U+03C0 ISOgrk3
- 'piv': 0x03d6, # greek pi symbol, U+03D6 ISOgrk3
- 'plusmn': 0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
- 'pound': 0x00a3, # pound sign, U+00A3 ISOnum
- 'prime': 0x2032, # prime = minutes = feet, U+2032 ISOtech
- 'prod': 0x220f, # n-ary product = product sign, U+220F ISOamsb
- 'prop': 0x221d, # proportional to, U+221D ISOtech
- 'psi': 0x03c8, # greek small letter psi, U+03C8 ISOgrk3
- 'quot': 0x0022, # quotation mark = APL quote, U+0022 ISOnum
- 'rArr': 0x21d2, # rightwards double arrow, U+21D2 ISOtech
- 'radic': 0x221a, # square root = radical sign, U+221A ISOtech
- 'rang': 0x232a, # right-pointing angle bracket = ket, U+232A ISOtech
- 'raquo': 0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
- 'rarr': 0x2192, # rightwards arrow, U+2192 ISOnum
- 'rceil': 0x2309, # right ceiling, U+2309 ISOamsc
- 'rdquo': 0x201d, # right double quotation mark, U+201D ISOnum
- 'real': 0x211c, # blackletter capital R = real part symbol, U+211C ISOamso
- 'reg': 0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum
- 'rfloor': 0x230b, # right floor, U+230B ISOamsc
- 'rho': 0x03c1, # greek small letter rho, U+03C1 ISOgrk3
- 'rlm': 0x200f, # right-to-left mark, U+200F NEW RFC 2070
- 'rsaquo': 0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed
- 'rsquo': 0x2019, # right single quotation mark, U+2019 ISOnum
- 'sbquo': 0x201a, # single low-9 quotation mark, U+201A NEW
- 'scaron': 0x0161, # latin small letter s with caron, U+0161 ISOlat2
- 'sdot': 0x22c5, # dot operator, U+22C5 ISOamsb
- 'sect': 0x00a7, # section sign, U+00A7 ISOnum
- 'shy': 0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum
- 'sigma': 0x03c3, # greek small letter sigma, U+03C3 ISOgrk3
- 'sigmaf': 0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3
- 'sim': 0x223c, # tilde operator = varies with = similar to, U+223C ISOtech
- 'spades': 0x2660, # black spade suit, U+2660 ISOpub
- 'sub': 0x2282, # subset of, U+2282 ISOtech
- 'sube': 0x2286, # subset of or equal to, U+2286 ISOtech
- 'sum': 0x2211, # n-ary sumation, U+2211 ISOamsb
- 'sup': 0x2283, # superset of, U+2283 ISOtech
- 'sup1': 0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum
- 'sup2': 0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum
- 'sup3': 0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum
- 'supe': 0x2287, # superset of or equal to, U+2287 ISOtech
- 'szlig': 0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1
- 'tau': 0x03c4, # greek small letter tau, U+03C4 ISOgrk3
- 'there4': 0x2234, # therefore, U+2234 ISOtech
- 'theta': 0x03b8, # greek small letter theta, U+03B8 ISOgrk3
- 'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW
- 'thinsp': 0x2009, # thin space, U+2009 ISOpub
- 'thorn': 0x00fe, # latin small letter thorn with, U+00FE ISOlat1
- 'tilde': 0x02dc, # small tilde, U+02DC ISOdia
- 'times': 0x00d7, # multiplication sign, U+00D7 ISOnum
- 'trade': 0x2122, # trade mark sign, U+2122 ISOnum
- 'uArr': 0x21d1, # upwards double arrow, U+21D1 ISOamsa
- 'uacute': 0x00fa, # latin small letter u with acute, U+00FA ISOlat1
- 'uarr': 0x2191, # upwards arrow, U+2191 ISOnum
- 'ucirc': 0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1
- 'ugrave': 0x00f9, # latin small letter u with grave, U+00F9 ISOlat1
- 'uml': 0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia
- 'upsih': 0x03d2, # greek upsilon with hook symbol, U+03D2 NEW
- 'upsilon': 0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3
- 'uuml': 0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1
- 'weierp': 0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso
- 'xi': 0x03be, # greek small letter xi, U+03BE ISOgrk3
- 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1
- 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum
- 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1
- 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3
- 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070
- 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070
-}
-
-
-# maps the HTML5 named character references to the equivalent Unicode character(s)
-html5 = {
- 'Aacute': '\xc1',
- 'aacute': '\xe1',
- 'Aacute;': '\xc1',
- 'aacute;': '\xe1',
- 'Abreve;': '\u0102',
- 'abreve;': '\u0103',
- 'ac;': '\u223e',
- 'acd;': '\u223f',
- 'acE;': '\u223e\u0333',
- 'Acirc': '\xc2',
- 'acirc': '\xe2',
- 'Acirc;': '\xc2',
- 'acirc;': '\xe2',
- 'acute': '\xb4',
- 'acute;': '\xb4',
- 'Acy;': '\u0410',
- 'acy;': '\u0430',
- 'AElig': '\xc6',
- 'aelig': '\xe6',
- 'AElig;': '\xc6',
- 'aelig;': '\xe6',
- 'af;': '\u2061',
- 'Afr;': '\U0001d504',
- 'afr;': '\U0001d51e',
- 'Agrave': '\xc0',
- 'agrave': '\xe0',
- 'Agrave;': '\xc0',
- 'agrave;': '\xe0',
- 'alefsym;': '\u2135',
- 'aleph;': '\u2135',
- 'Alpha;': '\u0391',
- 'alpha;': '\u03b1',
- 'Amacr;': '\u0100',
- 'amacr;': '\u0101',
- 'amalg;': '\u2a3f',
- 'AMP': '&',
- 'amp': '&',
- 'AMP;': '&',
- 'amp;': '&',
- 'And;': '\u2a53',
- 'and;': '\u2227',
- 'andand;': '\u2a55',
- 'andd;': '\u2a5c',
- 'andslope;': '\u2a58',
- 'andv;': '\u2a5a',
- 'ang;': '\u2220',
- 'ange;': '\u29a4',
- 'angle;': '\u2220',
- 'angmsd;': '\u2221',
- 'angmsdaa;': '\u29a8',
- 'angmsdab;': '\u29a9',
- 'angmsdac;': '\u29aa',
- 'angmsdad;': '\u29ab',
- 'angmsdae;': '\u29ac',
- 'angmsdaf;': '\u29ad',
- 'angmsdag;': '\u29ae',
- 'angmsdah;': '\u29af',
- 'angrt;': '\u221f',
- 'angrtvb;': '\u22be',
- 'angrtvbd;': '\u299d',
- 'angsph;': '\u2222',
- 'angst;': '\xc5',
- 'angzarr;': '\u237c',
- 'Aogon;': '\u0104',
- 'aogon;': '\u0105',
- 'Aopf;': '\U0001d538',
- 'aopf;': '\U0001d552',
- 'ap;': '\u2248',
- 'apacir;': '\u2a6f',
- 'apE;': '\u2a70',
- 'ape;': '\u224a',
- 'apid;': '\u224b',
- 'apos;': "'",
- 'ApplyFunction;': '\u2061',
- 'approx;': '\u2248',
- 'approxeq;': '\u224a',
- 'Aring': '\xc5',
- 'aring': '\xe5',
- 'Aring;': '\xc5',
- 'aring;': '\xe5',
- 'Ascr;': '\U0001d49c',
- 'ascr;': '\U0001d4b6',
- 'Assign;': '\u2254',
- 'ast;': '*',
- 'asymp;': '\u2248',
- 'asympeq;': '\u224d',
- 'Atilde': '\xc3',
- 'atilde': '\xe3',
- 'Atilde;': '\xc3',
- 'atilde;': '\xe3',
- 'Auml': '\xc4',
- 'auml': '\xe4',
- 'Auml;': '\xc4',
- 'auml;': '\xe4',
- 'awconint;': '\u2233',
- 'awint;': '\u2a11',
- 'backcong;': '\u224c',
- 'backepsilon;': '\u03f6',
- 'backprime;': '\u2035',
- 'backsim;': '\u223d',
- 'backsimeq;': '\u22cd',
- 'Backslash;': '\u2216',
- 'Barv;': '\u2ae7',
- 'barvee;': '\u22bd',
- 'Barwed;': '\u2306',
- 'barwed;': '\u2305',
- 'barwedge;': '\u2305',
- 'bbrk;': '\u23b5',
- 'bbrktbrk;': '\u23b6',
- 'bcong;': '\u224c',
- 'Bcy;': '\u0411',
- 'bcy;': '\u0431',
- 'bdquo;': '\u201e',
- 'becaus;': '\u2235',
- 'Because;': '\u2235',
- 'because;': '\u2235',
- 'bemptyv;': '\u29b0',
- 'bepsi;': '\u03f6',
- 'bernou;': '\u212c',
- 'Bernoullis;': '\u212c',
- 'Beta;': '\u0392',
- 'beta;': '\u03b2',
- 'beth;': '\u2136',
- 'between;': '\u226c',
- 'Bfr;': '\U0001d505',
- 'bfr;': '\U0001d51f',
- 'bigcap;': '\u22c2',
- 'bigcirc;': '\u25ef',
- 'bigcup;': '\u22c3',
- 'bigodot;': '\u2a00',
- 'bigoplus;': '\u2a01',
- 'bigotimes;': '\u2a02',
- 'bigsqcup;': '\u2a06',
- 'bigstar;': '\u2605',
- 'bigtriangledown;': '\u25bd',
- 'bigtriangleup;': '\u25b3',
- 'biguplus;': '\u2a04',
- 'bigvee;': '\u22c1',
- 'bigwedge;': '\u22c0',
- 'bkarow;': '\u290d',
- 'blacklozenge;': '\u29eb',
- 'blacksquare;': '\u25aa',
- 'blacktriangle;': '\u25b4',
- 'blacktriangledown;': '\u25be',
- 'blacktriangleleft;': '\u25c2',
- 'blacktriangleright;': '\u25b8',
- 'blank;': '\u2423',
- 'blk12;': '\u2592',
- 'blk14;': '\u2591',
- 'blk34;': '\u2593',
- 'block;': '\u2588',
- 'bne;': '=\u20e5',
- 'bnequiv;': '\u2261\u20e5',
- 'bNot;': '\u2aed',
- 'bnot;': '\u2310',
- 'Bopf;': '\U0001d539',
- 'bopf;': '\U0001d553',
- 'bot;': '\u22a5',
- 'bottom;': '\u22a5',
- 'bowtie;': '\u22c8',
- 'boxbox;': '\u29c9',
- 'boxDL;': '\u2557',
- 'boxDl;': '\u2556',
- 'boxdL;': '\u2555',
- 'boxdl;': '\u2510',
- 'boxDR;': '\u2554',
- 'boxDr;': '\u2553',
- 'boxdR;': '\u2552',
- 'boxdr;': '\u250c',
- 'boxH;': '\u2550',
- 'boxh;': '\u2500',
- 'boxHD;': '\u2566',
- 'boxHd;': '\u2564',
- 'boxhD;': '\u2565',
- 'boxhd;': '\u252c',
- 'boxHU;': '\u2569',
- 'boxHu;': '\u2567',
- 'boxhU;': '\u2568',
- 'boxhu;': '\u2534',
- 'boxminus;': '\u229f',
- 'boxplus;': '\u229e',
- 'boxtimes;': '\u22a0',
- 'boxUL;': '\u255d',
- 'boxUl;': '\u255c',
- 'boxuL;': '\u255b',
- 'boxul;': '\u2518',
- 'boxUR;': '\u255a',
- 'boxUr;': '\u2559',
- 'boxuR;': '\u2558',
- 'boxur;': '\u2514',
- 'boxV;': '\u2551',
- 'boxv;': '\u2502',
- 'boxVH;': '\u256c',
- 'boxVh;': '\u256b',
- 'boxvH;': '\u256a',
- 'boxvh;': '\u253c',
- 'boxVL;': '\u2563',
- 'boxVl;': '\u2562',
- 'boxvL;': '\u2561',
- 'boxvl;': '\u2524',
- 'boxVR;': '\u2560',
- 'boxVr;': '\u255f',
- 'boxvR;': '\u255e',
- 'boxvr;': '\u251c',
- 'bprime;': '\u2035',
- 'Breve;': '\u02d8',
- 'breve;': '\u02d8',
- 'brvbar': '\xa6',
- 'brvbar;': '\xa6',
- 'Bscr;': '\u212c',
- 'bscr;': '\U0001d4b7',
- 'bsemi;': '\u204f',
- 'bsim;': '\u223d',
- 'bsime;': '\u22cd',
- 'bsol;': '\\',
- 'bsolb;': '\u29c5',
- 'bsolhsub;': '\u27c8',
- 'bull;': '\u2022',
- 'bullet;': '\u2022',
- 'bump;': '\u224e',
- 'bumpE;': '\u2aae',
- 'bumpe;': '\u224f',
- 'Bumpeq;': '\u224e',
- 'bumpeq;': '\u224f',
- 'Cacute;': '\u0106',
- 'cacute;': '\u0107',
- 'Cap;': '\u22d2',
- 'cap;': '\u2229',
- 'capand;': '\u2a44',
- 'capbrcup;': '\u2a49',
- 'capcap;': '\u2a4b',
- 'capcup;': '\u2a47',
- 'capdot;': '\u2a40',
- 'CapitalDifferentialD;': '\u2145',
- 'caps;': '\u2229\ufe00',
- 'caret;': '\u2041',
- 'caron;': '\u02c7',
- 'Cayleys;': '\u212d',
- 'ccaps;': '\u2a4d',
- 'Ccaron;': '\u010c',
- 'ccaron;': '\u010d',
- 'Ccedil': '\xc7',
- 'ccedil': '\xe7',
- 'Ccedil;': '\xc7',
- 'ccedil;': '\xe7',
- 'Ccirc;': '\u0108',
- 'ccirc;': '\u0109',
- 'Cconint;': '\u2230',
- 'ccups;': '\u2a4c',
- 'ccupssm;': '\u2a50',
- 'Cdot;': '\u010a',
- 'cdot;': '\u010b',
- 'cedil': '\xb8',
- 'cedil;': '\xb8',
- 'Cedilla;': '\xb8',
- 'cemptyv;': '\u29b2',
- 'cent': '\xa2',
- 'cent;': '\xa2',
- 'CenterDot;': '\xb7',
- 'centerdot;': '\xb7',
- 'Cfr;': '\u212d',
- 'cfr;': '\U0001d520',
- 'CHcy;': '\u0427',
- 'chcy;': '\u0447',
- 'check;': '\u2713',
- 'checkmark;': '\u2713',
- 'Chi;': '\u03a7',
- 'chi;': '\u03c7',
- 'cir;': '\u25cb',
- 'circ;': '\u02c6',
- 'circeq;': '\u2257',
- 'circlearrowleft;': '\u21ba',
- 'circlearrowright;': '\u21bb',
- 'circledast;': '\u229b',
- 'circledcirc;': '\u229a',
- 'circleddash;': '\u229d',
- 'CircleDot;': '\u2299',
- 'circledR;': '\xae',
- 'circledS;': '\u24c8',
- 'CircleMinus;': '\u2296',
- 'CirclePlus;': '\u2295',
- 'CircleTimes;': '\u2297',
- 'cirE;': '\u29c3',
- 'cire;': '\u2257',
- 'cirfnint;': '\u2a10',
- 'cirmid;': '\u2aef',
- 'cirscir;': '\u29c2',
- 'ClockwiseContourIntegral;': '\u2232',
- 'CloseCurlyDoubleQuote;': '\u201d',
- 'CloseCurlyQuote;': '\u2019',
- 'clubs;': '\u2663',
- 'clubsuit;': '\u2663',
- 'Colon;': '\u2237',
- 'colon;': ':',
- 'Colone;': '\u2a74',
- 'colone;': '\u2254',
- 'coloneq;': '\u2254',
- 'comma;': ',',
- 'commat;': '@',
- 'comp;': '\u2201',
- 'compfn;': '\u2218',
- 'complement;': '\u2201',
- 'complexes;': '\u2102',
- 'cong;': '\u2245',
- 'congdot;': '\u2a6d',
- 'Congruent;': '\u2261',
- 'Conint;': '\u222f',
- 'conint;': '\u222e',
- 'ContourIntegral;': '\u222e',
- 'Copf;': '\u2102',
- 'copf;': '\U0001d554',
- 'coprod;': '\u2210',
- 'Coproduct;': '\u2210',
- 'COPY': '\xa9',
- 'copy': '\xa9',
- 'COPY;': '\xa9',
- 'copy;': '\xa9',
- 'copysr;': '\u2117',
- 'CounterClockwiseContourIntegral;': '\u2233',
- 'crarr;': '\u21b5',
- 'Cross;': '\u2a2f',
- 'cross;': '\u2717',
- 'Cscr;': '\U0001d49e',
- 'cscr;': '\U0001d4b8',
- 'csub;': '\u2acf',
- 'csube;': '\u2ad1',
- 'csup;': '\u2ad0',
- 'csupe;': '\u2ad2',
- 'ctdot;': '\u22ef',
- 'cudarrl;': '\u2938',
- 'cudarrr;': '\u2935',
- 'cuepr;': '\u22de',
- 'cuesc;': '\u22df',
- 'cularr;': '\u21b6',
- 'cularrp;': '\u293d',
- 'Cup;': '\u22d3',
- 'cup;': '\u222a',
- 'cupbrcap;': '\u2a48',
- 'CupCap;': '\u224d',
- 'cupcap;': '\u2a46',
- 'cupcup;': '\u2a4a',
- 'cupdot;': '\u228d',
- 'cupor;': '\u2a45',
- 'cups;': '\u222a\ufe00',
- 'curarr;': '\u21b7',
- 'curarrm;': '\u293c',
- 'curlyeqprec;': '\u22de',
- 'curlyeqsucc;': '\u22df',
- 'curlyvee;': '\u22ce',
- 'curlywedge;': '\u22cf',
- 'curren': '\xa4',
- 'curren;': '\xa4',
- 'curvearrowleft;': '\u21b6',
- 'curvearrowright;': '\u21b7',
- 'cuvee;': '\u22ce',
- 'cuwed;': '\u22cf',
- 'cwconint;': '\u2232',
- 'cwint;': '\u2231',
- 'cylcty;': '\u232d',
- 'Dagger;': '\u2021',
- 'dagger;': '\u2020',
- 'daleth;': '\u2138',
- 'Darr;': '\u21a1',
- 'dArr;': '\u21d3',
- 'darr;': '\u2193',
- 'dash;': '\u2010',
- 'Dashv;': '\u2ae4',
- 'dashv;': '\u22a3',
- 'dbkarow;': '\u290f',
- 'dblac;': '\u02dd',
- 'Dcaron;': '\u010e',
- 'dcaron;': '\u010f',
- 'Dcy;': '\u0414',
- 'dcy;': '\u0434',
- 'DD;': '\u2145',
- 'dd;': '\u2146',
- 'ddagger;': '\u2021',
- 'ddarr;': '\u21ca',
- 'DDotrahd;': '\u2911',
- 'ddotseq;': '\u2a77',
- 'deg': '\xb0',
- 'deg;': '\xb0',
- 'Del;': '\u2207',
- 'Delta;': '\u0394',
- 'delta;': '\u03b4',
- 'demptyv;': '\u29b1',
- 'dfisht;': '\u297f',
- 'Dfr;': '\U0001d507',
- 'dfr;': '\U0001d521',
- 'dHar;': '\u2965',
- 'dharl;': '\u21c3',
- 'dharr;': '\u21c2',
- 'DiacriticalAcute;': '\xb4',
- 'DiacriticalDot;': '\u02d9',
- 'DiacriticalDoubleAcute;': '\u02dd',
- 'DiacriticalGrave;': '`',
- 'DiacriticalTilde;': '\u02dc',
- 'diam;': '\u22c4',
- 'Diamond;': '\u22c4',
- 'diamond;': '\u22c4',
- 'diamondsuit;': '\u2666',
- 'diams;': '\u2666',
- 'die;': '\xa8',
- 'DifferentialD;': '\u2146',
- 'digamma;': '\u03dd',
- 'disin;': '\u22f2',
- 'div;': '\xf7',
- 'divide': '\xf7',
- 'divide;': '\xf7',
- 'divideontimes;': '\u22c7',
- 'divonx;': '\u22c7',
- 'DJcy;': '\u0402',
- 'djcy;': '\u0452',
- 'dlcorn;': '\u231e',
- 'dlcrop;': '\u230d',
- 'dollar;': '$',
- 'Dopf;': '\U0001d53b',
- 'dopf;': '\U0001d555',
- 'Dot;': '\xa8',
- 'dot;': '\u02d9',
- 'DotDot;': '\u20dc',
- 'doteq;': '\u2250',
- 'doteqdot;': '\u2251',
- 'DotEqual;': '\u2250',
- 'dotminus;': '\u2238',
- 'dotplus;': '\u2214',
- 'dotsquare;': '\u22a1',
- 'doublebarwedge;': '\u2306',
- 'DoubleContourIntegral;': '\u222f',
- 'DoubleDot;': '\xa8',
- 'DoubleDownArrow;': '\u21d3',
- 'DoubleLeftArrow;': '\u21d0',
- 'DoubleLeftRightArrow;': '\u21d4',
- 'DoubleLeftTee;': '\u2ae4',
- 'DoubleLongLeftArrow;': '\u27f8',
- 'DoubleLongLeftRightArrow;': '\u27fa',
- 'DoubleLongRightArrow;': '\u27f9',
- 'DoubleRightArrow;': '\u21d2',
- 'DoubleRightTee;': '\u22a8',
- 'DoubleUpArrow;': '\u21d1',
- 'DoubleUpDownArrow;': '\u21d5',
- 'DoubleVerticalBar;': '\u2225',
- 'DownArrow;': '\u2193',
- 'Downarrow;': '\u21d3',
- 'downarrow;': '\u2193',
- 'DownArrowBar;': '\u2913',
- 'DownArrowUpArrow;': '\u21f5',
- 'DownBreve;': '\u0311',
- 'downdownarrows;': '\u21ca',
- 'downharpoonleft;': '\u21c3',
- 'downharpoonright;': '\u21c2',
- 'DownLeftRightVector;': '\u2950',
- 'DownLeftTeeVector;': '\u295e',
- 'DownLeftVector;': '\u21bd',
- 'DownLeftVectorBar;': '\u2956',
- 'DownRightTeeVector;': '\u295f',
- 'DownRightVector;': '\u21c1',
- 'DownRightVectorBar;': '\u2957',
- 'DownTee;': '\u22a4',
- 'DownTeeArrow;': '\u21a7',
- 'drbkarow;': '\u2910',
- 'drcorn;': '\u231f',
- 'drcrop;': '\u230c',
- 'Dscr;': '\U0001d49f',
- 'dscr;': '\U0001d4b9',
- 'DScy;': '\u0405',
- 'dscy;': '\u0455',
- 'dsol;': '\u29f6',
- 'Dstrok;': '\u0110',
- 'dstrok;': '\u0111',
- 'dtdot;': '\u22f1',
- 'dtri;': '\u25bf',
- 'dtrif;': '\u25be',
- 'duarr;': '\u21f5',
- 'duhar;': '\u296f',
- 'dwangle;': '\u29a6',
- 'DZcy;': '\u040f',
- 'dzcy;': '\u045f',
- 'dzigrarr;': '\u27ff',
- 'Eacute': '\xc9',
- 'eacute': '\xe9',
- 'Eacute;': '\xc9',
- 'eacute;': '\xe9',
- 'easter;': '\u2a6e',
- 'Ecaron;': '\u011a',
- 'ecaron;': '\u011b',
- 'ecir;': '\u2256',
- 'Ecirc': '\xca',
- 'ecirc': '\xea',
- 'Ecirc;': '\xca',
- 'ecirc;': '\xea',
- 'ecolon;': '\u2255',
- 'Ecy;': '\u042d',
- 'ecy;': '\u044d',
- 'eDDot;': '\u2a77',
- 'Edot;': '\u0116',
- 'eDot;': '\u2251',
- 'edot;': '\u0117',
- 'ee;': '\u2147',
- 'efDot;': '\u2252',
- 'Efr;': '\U0001d508',
- 'efr;': '\U0001d522',
- 'eg;': '\u2a9a',
- 'Egrave': '\xc8',
- 'egrave': '\xe8',
- 'Egrave;': '\xc8',
- 'egrave;': '\xe8',
- 'egs;': '\u2a96',
- 'egsdot;': '\u2a98',
- 'el;': '\u2a99',
- 'Element;': '\u2208',
- 'elinters;': '\u23e7',
- 'ell;': '\u2113',
- 'els;': '\u2a95',
- 'elsdot;': '\u2a97',
- 'Emacr;': '\u0112',
- 'emacr;': '\u0113',
- 'empty;': '\u2205',
- 'emptyset;': '\u2205',
- 'EmptySmallSquare;': '\u25fb',
- 'emptyv;': '\u2205',
- 'EmptyVerySmallSquare;': '\u25ab',
- 'emsp13;': '\u2004',
- 'emsp14;': '\u2005',
- 'emsp;': '\u2003',
- 'ENG;': '\u014a',
- 'eng;': '\u014b',
- 'ensp;': '\u2002',
- 'Eogon;': '\u0118',
- 'eogon;': '\u0119',
- 'Eopf;': '\U0001d53c',
- 'eopf;': '\U0001d556',
- 'epar;': '\u22d5',
- 'eparsl;': '\u29e3',
- 'eplus;': '\u2a71',
- 'epsi;': '\u03b5',
- 'Epsilon;': '\u0395',
- 'epsilon;': '\u03b5',
- 'epsiv;': '\u03f5',
- 'eqcirc;': '\u2256',
- 'eqcolon;': '\u2255',
- 'eqsim;': '\u2242',
- 'eqslantgtr;': '\u2a96',
- 'eqslantless;': '\u2a95',
- 'Equal;': '\u2a75',
- 'equals;': '=',
- 'EqualTilde;': '\u2242',
- 'equest;': '\u225f',
- 'Equilibrium;': '\u21cc',
- 'equiv;': '\u2261',
- 'equivDD;': '\u2a78',
- 'eqvparsl;': '\u29e5',
- 'erarr;': '\u2971',
- 'erDot;': '\u2253',
- 'Escr;': '\u2130',
- 'escr;': '\u212f',
- 'esdot;': '\u2250',
- 'Esim;': '\u2a73',
- 'esim;': '\u2242',
- 'Eta;': '\u0397',
- 'eta;': '\u03b7',
- 'ETH': '\xd0',
- 'eth': '\xf0',
- 'ETH;': '\xd0',
- 'eth;': '\xf0',
- 'Euml': '\xcb',
- 'euml': '\xeb',
- 'Euml;': '\xcb',
- 'euml;': '\xeb',
- 'euro;': '\u20ac',
- 'excl;': '!',
- 'exist;': '\u2203',
- 'Exists;': '\u2203',
- 'expectation;': '\u2130',
- 'ExponentialE;': '\u2147',
- 'exponentiale;': '\u2147',
- 'fallingdotseq;': '\u2252',
- 'Fcy;': '\u0424',
- 'fcy;': '\u0444',
- 'female;': '\u2640',
- 'ffilig;': '\ufb03',
- 'fflig;': '\ufb00',
- 'ffllig;': '\ufb04',
- 'Ffr;': '\U0001d509',
- 'ffr;': '\U0001d523',
- 'filig;': '\ufb01',
- 'FilledSmallSquare;': '\u25fc',
- 'FilledVerySmallSquare;': '\u25aa',
- 'fjlig;': 'fj',
- 'flat;': '\u266d',
- 'fllig;': '\ufb02',
- 'fltns;': '\u25b1',
- 'fnof;': '\u0192',
- 'Fopf;': '\U0001d53d',
- 'fopf;': '\U0001d557',
- 'ForAll;': '\u2200',
- 'forall;': '\u2200',
- 'fork;': '\u22d4',
- 'forkv;': '\u2ad9',
- 'Fouriertrf;': '\u2131',
- 'fpartint;': '\u2a0d',
- 'frac12': '\xbd',
- 'frac12;': '\xbd',
- 'frac13;': '\u2153',
- 'frac14': '\xbc',
- 'frac14;': '\xbc',
- 'frac15;': '\u2155',
- 'frac16;': '\u2159',
- 'frac18;': '\u215b',
- 'frac23;': '\u2154',
- 'frac25;': '\u2156',
- 'frac34': '\xbe',
- 'frac34;': '\xbe',
- 'frac35;': '\u2157',
- 'frac38;': '\u215c',
- 'frac45;': '\u2158',
- 'frac56;': '\u215a',
- 'frac58;': '\u215d',
- 'frac78;': '\u215e',
- 'frasl;': '\u2044',
- 'frown;': '\u2322',
- 'Fscr;': '\u2131',
- 'fscr;': '\U0001d4bb',
- 'gacute;': '\u01f5',
- 'Gamma;': '\u0393',
- 'gamma;': '\u03b3',
- 'Gammad;': '\u03dc',
- 'gammad;': '\u03dd',
- 'gap;': '\u2a86',
- 'Gbreve;': '\u011e',
- 'gbreve;': '\u011f',
- 'Gcedil;': '\u0122',
- 'Gcirc;': '\u011c',
- 'gcirc;': '\u011d',
- 'Gcy;': '\u0413',
- 'gcy;': '\u0433',
- 'Gdot;': '\u0120',
- 'gdot;': '\u0121',
- 'gE;': '\u2267',
- 'ge;': '\u2265',
- 'gEl;': '\u2a8c',
- 'gel;': '\u22db',
- 'geq;': '\u2265',
- 'geqq;': '\u2267',
- 'geqslant;': '\u2a7e',
- 'ges;': '\u2a7e',
- 'gescc;': '\u2aa9',
- 'gesdot;': '\u2a80',
- 'gesdoto;': '\u2a82',
- 'gesdotol;': '\u2a84',
- 'gesl;': '\u22db\ufe00',
- 'gesles;': '\u2a94',
- 'Gfr;': '\U0001d50a',
- 'gfr;': '\U0001d524',
- 'Gg;': '\u22d9',
- 'gg;': '\u226b',
- 'ggg;': '\u22d9',
- 'gimel;': '\u2137',
- 'GJcy;': '\u0403',
- 'gjcy;': '\u0453',
- 'gl;': '\u2277',
- 'gla;': '\u2aa5',
- 'glE;': '\u2a92',
- 'glj;': '\u2aa4',
- 'gnap;': '\u2a8a',
- 'gnapprox;': '\u2a8a',
- 'gnE;': '\u2269',
- 'gne;': '\u2a88',
- 'gneq;': '\u2a88',
- 'gneqq;': '\u2269',
- 'gnsim;': '\u22e7',
- 'Gopf;': '\U0001d53e',
- 'gopf;': '\U0001d558',
- 'grave;': '`',
- 'GreaterEqual;': '\u2265',
- 'GreaterEqualLess;': '\u22db',
- 'GreaterFullEqual;': '\u2267',
- 'GreaterGreater;': '\u2aa2',
- 'GreaterLess;': '\u2277',
- 'GreaterSlantEqual;': '\u2a7e',
- 'GreaterTilde;': '\u2273',
- 'Gscr;': '\U0001d4a2',
- 'gscr;': '\u210a',
- 'gsim;': '\u2273',
- 'gsime;': '\u2a8e',
- 'gsiml;': '\u2a90',
- 'GT': '>',
- 'gt': '>',
- 'GT;': '>',
- 'Gt;': '\u226b',
- 'gt;': '>',
- 'gtcc;': '\u2aa7',
- 'gtcir;': '\u2a7a',
- 'gtdot;': '\u22d7',
- 'gtlPar;': '\u2995',
- 'gtquest;': '\u2a7c',
- 'gtrapprox;': '\u2a86',
- 'gtrarr;': '\u2978',
- 'gtrdot;': '\u22d7',
- 'gtreqless;': '\u22db',
- 'gtreqqless;': '\u2a8c',
- 'gtrless;': '\u2277',
- 'gtrsim;': '\u2273',
- 'gvertneqq;': '\u2269\ufe00',
- 'gvnE;': '\u2269\ufe00',
- 'Hacek;': '\u02c7',
- 'hairsp;': '\u200a',
- 'half;': '\xbd',
- 'hamilt;': '\u210b',
- 'HARDcy;': '\u042a',
- 'hardcy;': '\u044a',
- 'hArr;': '\u21d4',
- 'harr;': '\u2194',
- 'harrcir;': '\u2948',
- 'harrw;': '\u21ad',
- 'Hat;': '^',
- 'hbar;': '\u210f',
- 'Hcirc;': '\u0124',
- 'hcirc;': '\u0125',
- 'hearts;': '\u2665',
- 'heartsuit;': '\u2665',
- 'hellip;': '\u2026',
- 'hercon;': '\u22b9',
- 'Hfr;': '\u210c',
- 'hfr;': '\U0001d525',
- 'HilbertSpace;': '\u210b',
- 'hksearow;': '\u2925',
- 'hkswarow;': '\u2926',
- 'hoarr;': '\u21ff',
- 'homtht;': '\u223b',
- 'hookleftarrow;': '\u21a9',
- 'hookrightarrow;': '\u21aa',
- 'Hopf;': '\u210d',
- 'hopf;': '\U0001d559',
- 'horbar;': '\u2015',
- 'HorizontalLine;': '\u2500',
- 'Hscr;': '\u210b',
- 'hscr;': '\U0001d4bd',
- 'hslash;': '\u210f',
- 'Hstrok;': '\u0126',
- 'hstrok;': '\u0127',
- 'HumpDownHump;': '\u224e',
- 'HumpEqual;': '\u224f',
- 'hybull;': '\u2043',
- 'hyphen;': '\u2010',
- 'Iacute': '\xcd',
- 'iacute': '\xed',
- 'Iacute;': '\xcd',
- 'iacute;': '\xed',
- 'ic;': '\u2063',
- 'Icirc': '\xce',
- 'icirc': '\xee',
- 'Icirc;': '\xce',
- 'icirc;': '\xee',
- 'Icy;': '\u0418',
- 'icy;': '\u0438',
- 'Idot;': '\u0130',
- 'IEcy;': '\u0415',
- 'iecy;': '\u0435',
- 'iexcl': '\xa1',
- 'iexcl;': '\xa1',
- 'iff;': '\u21d4',
- 'Ifr;': '\u2111',
- 'ifr;': '\U0001d526',
- 'Igrave': '\xcc',
- 'igrave': '\xec',
- 'Igrave;': '\xcc',
- 'igrave;': '\xec',
- 'ii;': '\u2148',
- 'iiiint;': '\u2a0c',
- 'iiint;': '\u222d',
- 'iinfin;': '\u29dc',
- 'iiota;': '\u2129',
- 'IJlig;': '\u0132',
- 'ijlig;': '\u0133',
- 'Im;': '\u2111',
- 'Imacr;': '\u012a',
- 'imacr;': '\u012b',
- 'image;': '\u2111',
- 'ImaginaryI;': '\u2148',
- 'imagline;': '\u2110',
- 'imagpart;': '\u2111',
- 'imath;': '\u0131',
- 'imof;': '\u22b7',
- 'imped;': '\u01b5',
- 'Implies;': '\u21d2',
- 'in;': '\u2208',
- 'incare;': '\u2105',
- 'infin;': '\u221e',
- 'infintie;': '\u29dd',
- 'inodot;': '\u0131',
- 'Int;': '\u222c',
- 'int;': '\u222b',
- 'intcal;': '\u22ba',
- 'integers;': '\u2124',
- 'Integral;': '\u222b',
- 'intercal;': '\u22ba',
- 'Intersection;': '\u22c2',
- 'intlarhk;': '\u2a17',
- 'intprod;': '\u2a3c',
- 'InvisibleComma;': '\u2063',
- 'InvisibleTimes;': '\u2062',
- 'IOcy;': '\u0401',
- 'iocy;': '\u0451',
- 'Iogon;': '\u012e',
- 'iogon;': '\u012f',
- 'Iopf;': '\U0001d540',
- 'iopf;': '\U0001d55a',
- 'Iota;': '\u0399',
- 'iota;': '\u03b9',
- 'iprod;': '\u2a3c',
- 'iquest': '\xbf',
- 'iquest;': '\xbf',
- 'Iscr;': '\u2110',
- 'iscr;': '\U0001d4be',
- 'isin;': '\u2208',
- 'isindot;': '\u22f5',
- 'isinE;': '\u22f9',
- 'isins;': '\u22f4',
- 'isinsv;': '\u22f3',
- 'isinv;': '\u2208',
- 'it;': '\u2062',
- 'Itilde;': '\u0128',
- 'itilde;': '\u0129',
- 'Iukcy;': '\u0406',
- 'iukcy;': '\u0456',
- 'Iuml': '\xcf',
- 'iuml': '\xef',
- 'Iuml;': '\xcf',
- 'iuml;': '\xef',
- 'Jcirc;': '\u0134',
- 'jcirc;': '\u0135',
- 'Jcy;': '\u0419',
- 'jcy;': '\u0439',
- 'Jfr;': '\U0001d50d',
- 'jfr;': '\U0001d527',
- 'jmath;': '\u0237',
- 'Jopf;': '\U0001d541',
- 'jopf;': '\U0001d55b',
- 'Jscr;': '\U0001d4a5',
- 'jscr;': '\U0001d4bf',
- 'Jsercy;': '\u0408',
- 'jsercy;': '\u0458',
- 'Jukcy;': '\u0404',
- 'jukcy;': '\u0454',
- 'Kappa;': '\u039a',
- 'kappa;': '\u03ba',
- 'kappav;': '\u03f0',
- 'Kcedil;': '\u0136',
- 'kcedil;': '\u0137',
- 'Kcy;': '\u041a',
- 'kcy;': '\u043a',
- 'Kfr;': '\U0001d50e',
- 'kfr;': '\U0001d528',
- 'kgreen;': '\u0138',
- 'KHcy;': '\u0425',
- 'khcy;': '\u0445',
- 'KJcy;': '\u040c',
- 'kjcy;': '\u045c',
- 'Kopf;': '\U0001d542',
- 'kopf;': '\U0001d55c',
- 'Kscr;': '\U0001d4a6',
- 'kscr;': '\U0001d4c0',
- 'lAarr;': '\u21da',
- 'Lacute;': '\u0139',
- 'lacute;': '\u013a',
- 'laemptyv;': '\u29b4',
- 'lagran;': '\u2112',
- 'Lambda;': '\u039b',
- 'lambda;': '\u03bb',
- 'Lang;': '\u27ea',
- 'lang;': '\u27e8',
- 'langd;': '\u2991',
- 'langle;': '\u27e8',
- 'lap;': '\u2a85',
- 'Laplacetrf;': '\u2112',
- 'laquo': '\xab',
- 'laquo;': '\xab',
- 'Larr;': '\u219e',
- 'lArr;': '\u21d0',
- 'larr;': '\u2190',
- 'larrb;': '\u21e4',
- 'larrbfs;': '\u291f',
- 'larrfs;': '\u291d',
- 'larrhk;': '\u21a9',
- 'larrlp;': '\u21ab',
- 'larrpl;': '\u2939',
- 'larrsim;': '\u2973',
- 'larrtl;': '\u21a2',
- 'lat;': '\u2aab',
- 'lAtail;': '\u291b',
- 'latail;': '\u2919',
- 'late;': '\u2aad',
- 'lates;': '\u2aad\ufe00',
- 'lBarr;': '\u290e',
- 'lbarr;': '\u290c',
- 'lbbrk;': '\u2772',
- 'lbrace;': '{',
- 'lbrack;': '[',
- 'lbrke;': '\u298b',
- 'lbrksld;': '\u298f',
- 'lbrkslu;': '\u298d',
- 'Lcaron;': '\u013d',
- 'lcaron;': '\u013e',
- 'Lcedil;': '\u013b',
- 'lcedil;': '\u013c',
- 'lceil;': '\u2308',
- 'lcub;': '{',
- 'Lcy;': '\u041b',
- 'lcy;': '\u043b',
- 'ldca;': '\u2936',
- 'ldquo;': '\u201c',
- 'ldquor;': '\u201e',
- 'ldrdhar;': '\u2967',
- 'ldrushar;': '\u294b',
- 'ldsh;': '\u21b2',
- 'lE;': '\u2266',
- 'le;': '\u2264',
- 'LeftAngleBracket;': '\u27e8',
- 'LeftArrow;': '\u2190',
- 'Leftarrow;': '\u21d0',
- 'leftarrow;': '\u2190',
- 'LeftArrowBar;': '\u21e4',
- 'LeftArrowRightArrow;': '\u21c6',
- 'leftarrowtail;': '\u21a2',
- 'LeftCeiling;': '\u2308',
- 'LeftDoubleBracket;': '\u27e6',
- 'LeftDownTeeVector;': '\u2961',
- 'LeftDownVector;': '\u21c3',
- 'LeftDownVectorBar;': '\u2959',
- 'LeftFloor;': '\u230a',
- 'leftharpoondown;': '\u21bd',
- 'leftharpoonup;': '\u21bc',
- 'leftleftarrows;': '\u21c7',
- 'LeftRightArrow;': '\u2194',
- 'Leftrightarrow;': '\u21d4',
- 'leftrightarrow;': '\u2194',
- 'leftrightarrows;': '\u21c6',
- 'leftrightharpoons;': '\u21cb',
- 'leftrightsquigarrow;': '\u21ad',
- 'LeftRightVector;': '\u294e',
- 'LeftTee;': '\u22a3',
- 'LeftTeeArrow;': '\u21a4',
- 'LeftTeeVector;': '\u295a',
- 'leftthreetimes;': '\u22cb',
- 'LeftTriangle;': '\u22b2',
- 'LeftTriangleBar;': '\u29cf',
- 'LeftTriangleEqual;': '\u22b4',
- 'LeftUpDownVector;': '\u2951',
- 'LeftUpTeeVector;': '\u2960',
- 'LeftUpVector;': '\u21bf',
- 'LeftUpVectorBar;': '\u2958',
- 'LeftVector;': '\u21bc',
- 'LeftVectorBar;': '\u2952',
- 'lEg;': '\u2a8b',
- 'leg;': '\u22da',
- 'leq;': '\u2264',
- 'leqq;': '\u2266',
- 'leqslant;': '\u2a7d',
- 'les;': '\u2a7d',
- 'lescc;': '\u2aa8',
- 'lesdot;': '\u2a7f',
- 'lesdoto;': '\u2a81',
- 'lesdotor;': '\u2a83',
- 'lesg;': '\u22da\ufe00',
- 'lesges;': '\u2a93',
- 'lessapprox;': '\u2a85',
- 'lessdot;': '\u22d6',
- 'lesseqgtr;': '\u22da',
- 'lesseqqgtr;': '\u2a8b',
- 'LessEqualGreater;': '\u22da',
- 'LessFullEqual;': '\u2266',
- 'LessGreater;': '\u2276',
- 'lessgtr;': '\u2276',
- 'LessLess;': '\u2aa1',
- 'lesssim;': '\u2272',
- 'LessSlantEqual;': '\u2a7d',
- 'LessTilde;': '\u2272',
- 'lfisht;': '\u297c',
- 'lfloor;': '\u230a',
- 'Lfr;': '\U0001d50f',
- 'lfr;': '\U0001d529',
- 'lg;': '\u2276',
- 'lgE;': '\u2a91',
- 'lHar;': '\u2962',
- 'lhard;': '\u21bd',
- 'lharu;': '\u21bc',
- 'lharul;': '\u296a',
- 'lhblk;': '\u2584',
- 'LJcy;': '\u0409',
- 'ljcy;': '\u0459',
- 'Ll;': '\u22d8',
- 'll;': '\u226a',
- 'llarr;': '\u21c7',
- 'llcorner;': '\u231e',
- 'Lleftarrow;': '\u21da',
- 'llhard;': '\u296b',
- 'lltri;': '\u25fa',
- 'Lmidot;': '\u013f',
- 'lmidot;': '\u0140',
- 'lmoust;': '\u23b0',
- 'lmoustache;': '\u23b0',
- 'lnap;': '\u2a89',
- 'lnapprox;': '\u2a89',
- 'lnE;': '\u2268',
- 'lne;': '\u2a87',
- 'lneq;': '\u2a87',
- 'lneqq;': '\u2268',
- 'lnsim;': '\u22e6',
- 'loang;': '\u27ec',
- 'loarr;': '\u21fd',
- 'lobrk;': '\u27e6',
- 'LongLeftArrow;': '\u27f5',
- 'Longleftarrow;': '\u27f8',
- 'longleftarrow;': '\u27f5',
- 'LongLeftRightArrow;': '\u27f7',
- 'Longleftrightarrow;': '\u27fa',
- 'longleftrightarrow;': '\u27f7',
- 'longmapsto;': '\u27fc',
- 'LongRightArrow;': '\u27f6',
- 'Longrightarrow;': '\u27f9',
- 'longrightarrow;': '\u27f6',
- 'looparrowleft;': '\u21ab',
- 'looparrowright;': '\u21ac',
- 'lopar;': '\u2985',
- 'Lopf;': '\U0001d543',
- 'lopf;': '\U0001d55d',
- 'loplus;': '\u2a2d',
- 'lotimes;': '\u2a34',
- 'lowast;': '\u2217',
- 'lowbar;': '_',
- 'LowerLeftArrow;': '\u2199',
- 'LowerRightArrow;': '\u2198',
- 'loz;': '\u25ca',
- 'lozenge;': '\u25ca',
- 'lozf;': '\u29eb',
- 'lpar;': '(',
- 'lparlt;': '\u2993',
- 'lrarr;': '\u21c6',
- 'lrcorner;': '\u231f',
- 'lrhar;': '\u21cb',
- 'lrhard;': '\u296d',
- 'lrm;': '\u200e',
- 'lrtri;': '\u22bf',
- 'lsaquo;': '\u2039',
- 'Lscr;': '\u2112',
- 'lscr;': '\U0001d4c1',
- 'Lsh;': '\u21b0',
- 'lsh;': '\u21b0',
- 'lsim;': '\u2272',
- 'lsime;': '\u2a8d',
- 'lsimg;': '\u2a8f',
- 'lsqb;': '[',
- 'lsquo;': '\u2018',
- 'lsquor;': '\u201a',
- 'Lstrok;': '\u0141',
- 'lstrok;': '\u0142',
- 'LT': '<',
- 'lt': '<',
- 'LT;': '<',
- 'Lt;': '\u226a',
- 'lt;': '<',
- 'ltcc;': '\u2aa6',
- 'ltcir;': '\u2a79',
- 'ltdot;': '\u22d6',
- 'lthree;': '\u22cb',
- 'ltimes;': '\u22c9',
- 'ltlarr;': '\u2976',
- 'ltquest;': '\u2a7b',
- 'ltri;': '\u25c3',
- 'ltrie;': '\u22b4',
- 'ltrif;': '\u25c2',
- 'ltrPar;': '\u2996',
- 'lurdshar;': '\u294a',
- 'luruhar;': '\u2966',
- 'lvertneqq;': '\u2268\ufe00',
- 'lvnE;': '\u2268\ufe00',
- 'macr': '\xaf',
- 'macr;': '\xaf',
- 'male;': '\u2642',
- 'malt;': '\u2720',
- 'maltese;': '\u2720',
- 'Map;': '\u2905',
- 'map;': '\u21a6',
- 'mapsto;': '\u21a6',
- 'mapstodown;': '\u21a7',
- 'mapstoleft;': '\u21a4',
- 'mapstoup;': '\u21a5',
- 'marker;': '\u25ae',
- 'mcomma;': '\u2a29',
- 'Mcy;': '\u041c',
- 'mcy;': '\u043c',
- 'mdash;': '\u2014',
- 'mDDot;': '\u223a',
- 'measuredangle;': '\u2221',
- 'MediumSpace;': '\u205f',
- 'Mellintrf;': '\u2133',
- 'Mfr;': '\U0001d510',
- 'mfr;': '\U0001d52a',
- 'mho;': '\u2127',
- 'micro': '\xb5',
- 'micro;': '\xb5',
- 'mid;': '\u2223',
- 'midast;': '*',
- 'midcir;': '\u2af0',
- 'middot': '\xb7',
- 'middot;': '\xb7',
- 'minus;': '\u2212',
- 'minusb;': '\u229f',
- 'minusd;': '\u2238',
- 'minusdu;': '\u2a2a',
- 'MinusPlus;': '\u2213',
- 'mlcp;': '\u2adb',
- 'mldr;': '\u2026',
- 'mnplus;': '\u2213',
- 'models;': '\u22a7',
- 'Mopf;': '\U0001d544',
- 'mopf;': '\U0001d55e',
- 'mp;': '\u2213',
- 'Mscr;': '\u2133',
- 'mscr;': '\U0001d4c2',
- 'mstpos;': '\u223e',
- 'Mu;': '\u039c',
- 'mu;': '\u03bc',
- 'multimap;': '\u22b8',
- 'mumap;': '\u22b8',
- 'nabla;': '\u2207',
- 'Nacute;': '\u0143',
- 'nacute;': '\u0144',
- 'nang;': '\u2220\u20d2',
- 'nap;': '\u2249',
- 'napE;': '\u2a70\u0338',
- 'napid;': '\u224b\u0338',
- 'napos;': '\u0149',
- 'napprox;': '\u2249',
- 'natur;': '\u266e',
- 'natural;': '\u266e',
- 'naturals;': '\u2115',
- 'nbsp': '\xa0',
- 'nbsp;': '\xa0',
- 'nbump;': '\u224e\u0338',
- 'nbumpe;': '\u224f\u0338',
- 'ncap;': '\u2a43',
- 'Ncaron;': '\u0147',
- 'ncaron;': '\u0148',
- 'Ncedil;': '\u0145',
- 'ncedil;': '\u0146',
- 'ncong;': '\u2247',
- 'ncongdot;': '\u2a6d\u0338',
- 'ncup;': '\u2a42',
- 'Ncy;': '\u041d',
- 'ncy;': '\u043d',
- 'ndash;': '\u2013',
- 'ne;': '\u2260',
- 'nearhk;': '\u2924',
- 'neArr;': '\u21d7',
- 'nearr;': '\u2197',
- 'nearrow;': '\u2197',
- 'nedot;': '\u2250\u0338',
- 'NegativeMediumSpace;': '\u200b',
- 'NegativeThickSpace;': '\u200b',
- 'NegativeThinSpace;': '\u200b',
- 'NegativeVeryThinSpace;': '\u200b',
- 'nequiv;': '\u2262',
- 'nesear;': '\u2928',
- 'nesim;': '\u2242\u0338',
- 'NestedGreaterGreater;': '\u226b',
- 'NestedLessLess;': '\u226a',
- 'NewLine;': '\n',
- 'nexist;': '\u2204',
- 'nexists;': '\u2204',
- 'Nfr;': '\U0001d511',
- 'nfr;': '\U0001d52b',
- 'ngE;': '\u2267\u0338',
- 'nge;': '\u2271',
- 'ngeq;': '\u2271',
- 'ngeqq;': '\u2267\u0338',
- 'ngeqslant;': '\u2a7e\u0338',
- 'nges;': '\u2a7e\u0338',
- 'nGg;': '\u22d9\u0338',
- 'ngsim;': '\u2275',
- 'nGt;': '\u226b\u20d2',
- 'ngt;': '\u226f',
- 'ngtr;': '\u226f',
- 'nGtv;': '\u226b\u0338',
- 'nhArr;': '\u21ce',
- 'nharr;': '\u21ae',
- 'nhpar;': '\u2af2',
- 'ni;': '\u220b',
- 'nis;': '\u22fc',
- 'nisd;': '\u22fa',
- 'niv;': '\u220b',
- 'NJcy;': '\u040a',
- 'njcy;': '\u045a',
- 'nlArr;': '\u21cd',
- 'nlarr;': '\u219a',
- 'nldr;': '\u2025',
- 'nlE;': '\u2266\u0338',
- 'nle;': '\u2270',
- 'nLeftarrow;': '\u21cd',
- 'nleftarrow;': '\u219a',
- 'nLeftrightarrow;': '\u21ce',
- 'nleftrightarrow;': '\u21ae',
- 'nleq;': '\u2270',
- 'nleqq;': '\u2266\u0338',
- 'nleqslant;': '\u2a7d\u0338',
- 'nles;': '\u2a7d\u0338',
- 'nless;': '\u226e',
- 'nLl;': '\u22d8\u0338',
- 'nlsim;': '\u2274',
- 'nLt;': '\u226a\u20d2',
- 'nlt;': '\u226e',
- 'nltri;': '\u22ea',
- 'nltrie;': '\u22ec',
- 'nLtv;': '\u226a\u0338',
- 'nmid;': '\u2224',
- 'NoBreak;': '\u2060',
- 'NonBreakingSpace;': '\xa0',
- 'Nopf;': '\u2115',
- 'nopf;': '\U0001d55f',
- 'not': '\xac',
- 'Not;': '\u2aec',
- 'not;': '\xac',
- 'NotCongruent;': '\u2262',
- 'NotCupCap;': '\u226d',
- 'NotDoubleVerticalBar;': '\u2226',
- 'NotElement;': '\u2209',
- 'NotEqual;': '\u2260',
- 'NotEqualTilde;': '\u2242\u0338',
- 'NotExists;': '\u2204',
- 'NotGreater;': '\u226f',
- 'NotGreaterEqual;': '\u2271',
- 'NotGreaterFullEqual;': '\u2267\u0338',
- 'NotGreaterGreater;': '\u226b\u0338',
- 'NotGreaterLess;': '\u2279',
- 'NotGreaterSlantEqual;': '\u2a7e\u0338',
- 'NotGreaterTilde;': '\u2275',
- 'NotHumpDownHump;': '\u224e\u0338',
- 'NotHumpEqual;': '\u224f\u0338',
- 'notin;': '\u2209',
- 'notindot;': '\u22f5\u0338',
- 'notinE;': '\u22f9\u0338',
- 'notinva;': '\u2209',
- 'notinvb;': '\u22f7',
- 'notinvc;': '\u22f6',
- 'NotLeftTriangle;': '\u22ea',
- 'NotLeftTriangleBar;': '\u29cf\u0338',
- 'NotLeftTriangleEqual;': '\u22ec',
- 'NotLess;': '\u226e',
- 'NotLessEqual;': '\u2270',
- 'NotLessGreater;': '\u2278',
- 'NotLessLess;': '\u226a\u0338',
- 'NotLessSlantEqual;': '\u2a7d\u0338',
- 'NotLessTilde;': '\u2274',
- 'NotNestedGreaterGreater;': '\u2aa2\u0338',
- 'NotNestedLessLess;': '\u2aa1\u0338',
- 'notni;': '\u220c',
- 'notniva;': '\u220c',
- 'notnivb;': '\u22fe',
- 'notnivc;': '\u22fd',
- 'NotPrecedes;': '\u2280',
- 'NotPrecedesEqual;': '\u2aaf\u0338',
- 'NotPrecedesSlantEqual;': '\u22e0',
- 'NotReverseElement;': '\u220c',
- 'NotRightTriangle;': '\u22eb',
- 'NotRightTriangleBar;': '\u29d0\u0338',
- 'NotRightTriangleEqual;': '\u22ed',
- 'NotSquareSubset;': '\u228f\u0338',
- 'NotSquareSubsetEqual;': '\u22e2',
- 'NotSquareSuperset;': '\u2290\u0338',
- 'NotSquareSupersetEqual;': '\u22e3',
- 'NotSubset;': '\u2282\u20d2',
- 'NotSubsetEqual;': '\u2288',
- 'NotSucceeds;': '\u2281',
- 'NotSucceedsEqual;': '\u2ab0\u0338',
- 'NotSucceedsSlantEqual;': '\u22e1',
- 'NotSucceedsTilde;': '\u227f\u0338',
- 'NotSuperset;': '\u2283\u20d2',
- 'NotSupersetEqual;': '\u2289',
- 'NotTilde;': '\u2241',
- 'NotTildeEqual;': '\u2244',
- 'NotTildeFullEqual;': '\u2247',
- 'NotTildeTilde;': '\u2249',
- 'NotVerticalBar;': '\u2224',
- 'npar;': '\u2226',
- 'nparallel;': '\u2226',
- 'nparsl;': '\u2afd\u20e5',
- 'npart;': '\u2202\u0338',
- 'npolint;': '\u2a14',
- 'npr;': '\u2280',
- 'nprcue;': '\u22e0',
- 'npre;': '\u2aaf\u0338',
- 'nprec;': '\u2280',
- 'npreceq;': '\u2aaf\u0338',
- 'nrArr;': '\u21cf',
- 'nrarr;': '\u219b',
- 'nrarrc;': '\u2933\u0338',
- 'nrarrw;': '\u219d\u0338',
- 'nRightarrow;': '\u21cf',
- 'nrightarrow;': '\u219b',
- 'nrtri;': '\u22eb',
- 'nrtrie;': '\u22ed',
- 'nsc;': '\u2281',
- 'nsccue;': '\u22e1',
- 'nsce;': '\u2ab0\u0338',
- 'Nscr;': '\U0001d4a9',
- 'nscr;': '\U0001d4c3',
- 'nshortmid;': '\u2224',
- 'nshortparallel;': '\u2226',
- 'nsim;': '\u2241',
- 'nsime;': '\u2244',
- 'nsimeq;': '\u2244',
- 'nsmid;': '\u2224',
- 'nspar;': '\u2226',
- 'nsqsube;': '\u22e2',
- 'nsqsupe;': '\u22e3',
- 'nsub;': '\u2284',
- 'nsubE;': '\u2ac5\u0338',
- 'nsube;': '\u2288',
- 'nsubset;': '\u2282\u20d2',
- 'nsubseteq;': '\u2288',
- 'nsubseteqq;': '\u2ac5\u0338',
- 'nsucc;': '\u2281',
- 'nsucceq;': '\u2ab0\u0338',
- 'nsup;': '\u2285',
- 'nsupE;': '\u2ac6\u0338',
- 'nsupe;': '\u2289',
- 'nsupset;': '\u2283\u20d2',
- 'nsupseteq;': '\u2289',
- 'nsupseteqq;': '\u2ac6\u0338',
- 'ntgl;': '\u2279',
- 'Ntilde': '\xd1',
- 'ntilde': '\xf1',
- 'Ntilde;': '\xd1',
- 'ntilde;': '\xf1',
- 'ntlg;': '\u2278',
- 'ntriangleleft;': '\u22ea',
- 'ntrianglelefteq;': '\u22ec',
- 'ntriangleright;': '\u22eb',
- 'ntrianglerighteq;': '\u22ed',
- 'Nu;': '\u039d',
- 'nu;': '\u03bd',
- 'num;': '#',
- 'numero;': '\u2116',
- 'numsp;': '\u2007',
- 'nvap;': '\u224d\u20d2',
- 'nVDash;': '\u22af',
- 'nVdash;': '\u22ae',
- 'nvDash;': '\u22ad',
- 'nvdash;': '\u22ac',
- 'nvge;': '\u2265\u20d2',
- 'nvgt;': '>\u20d2',
- 'nvHarr;': '\u2904',
- 'nvinfin;': '\u29de',
- 'nvlArr;': '\u2902',
- 'nvle;': '\u2264\u20d2',
- 'nvlt;': '<\u20d2',
- 'nvltrie;': '\u22b4\u20d2',
- 'nvrArr;': '\u2903',
- 'nvrtrie;': '\u22b5\u20d2',
- 'nvsim;': '\u223c\u20d2',
- 'nwarhk;': '\u2923',
- 'nwArr;': '\u21d6',
- 'nwarr;': '\u2196',
- 'nwarrow;': '\u2196',
- 'nwnear;': '\u2927',
- 'Oacute': '\xd3',
- 'oacute': '\xf3',
- 'Oacute;': '\xd3',
- 'oacute;': '\xf3',
- 'oast;': '\u229b',
- 'ocir;': '\u229a',
- 'Ocirc': '\xd4',
- 'ocirc': '\xf4',
- 'Ocirc;': '\xd4',
- 'ocirc;': '\xf4',
- 'Ocy;': '\u041e',
- 'ocy;': '\u043e',
- 'odash;': '\u229d',
- 'Odblac;': '\u0150',
- 'odblac;': '\u0151',
- 'odiv;': '\u2a38',
- 'odot;': '\u2299',
- 'odsold;': '\u29bc',
- 'OElig;': '\u0152',
- 'oelig;': '\u0153',
- 'ofcir;': '\u29bf',
- 'Ofr;': '\U0001d512',
- 'ofr;': '\U0001d52c',
- 'ogon;': '\u02db',
- 'Ograve': '\xd2',
- 'ograve': '\xf2',
- 'Ograve;': '\xd2',
- 'ograve;': '\xf2',
- 'ogt;': '\u29c1',
- 'ohbar;': '\u29b5',
- 'ohm;': '\u03a9',
- 'oint;': '\u222e',
- 'olarr;': '\u21ba',
- 'olcir;': '\u29be',
- 'olcross;': '\u29bb',
- 'oline;': '\u203e',
- 'olt;': '\u29c0',
- 'Omacr;': '\u014c',
- 'omacr;': '\u014d',
- 'Omega;': '\u03a9',
- 'omega;': '\u03c9',
- 'Omicron;': '\u039f',
- 'omicron;': '\u03bf',
- 'omid;': '\u29b6',
- 'ominus;': '\u2296',
- 'Oopf;': '\U0001d546',
- 'oopf;': '\U0001d560',
- 'opar;': '\u29b7',
- 'OpenCurlyDoubleQuote;': '\u201c',
- 'OpenCurlyQuote;': '\u2018',
- 'operp;': '\u29b9',
- 'oplus;': '\u2295',
- 'Or;': '\u2a54',
- 'or;': '\u2228',
- 'orarr;': '\u21bb',
- 'ord;': '\u2a5d',
- 'order;': '\u2134',
- 'orderof;': '\u2134',
- 'ordf': '\xaa',
- 'ordf;': '\xaa',
- 'ordm': '\xba',
- 'ordm;': '\xba',
- 'origof;': '\u22b6',
- 'oror;': '\u2a56',
- 'orslope;': '\u2a57',
- 'orv;': '\u2a5b',
- 'oS;': '\u24c8',
- 'Oscr;': '\U0001d4aa',
- 'oscr;': '\u2134',
- 'Oslash': '\xd8',
- 'oslash': '\xf8',
- 'Oslash;': '\xd8',
- 'oslash;': '\xf8',
- 'osol;': '\u2298',
- 'Otilde': '\xd5',
- 'otilde': '\xf5',
- 'Otilde;': '\xd5',
- 'otilde;': '\xf5',
- 'Otimes;': '\u2a37',
- 'otimes;': '\u2297',
- 'otimesas;': '\u2a36',
- 'Ouml': '\xd6',
- 'ouml': '\xf6',
- 'Ouml;': '\xd6',
- 'ouml;': '\xf6',
- 'ovbar;': '\u233d',
- 'OverBar;': '\u203e',
- 'OverBrace;': '\u23de',
- 'OverBracket;': '\u23b4',
- 'OverParenthesis;': '\u23dc',
- 'par;': '\u2225',
- 'para': '\xb6',
- 'para;': '\xb6',
- 'parallel;': '\u2225',
- 'parsim;': '\u2af3',
- 'parsl;': '\u2afd',
- 'part;': '\u2202',
- 'PartialD;': '\u2202',
- 'Pcy;': '\u041f',
- 'pcy;': '\u043f',
- 'percnt;': '%',
- 'period;': '.',
- 'permil;': '\u2030',
- 'perp;': '\u22a5',
- 'pertenk;': '\u2031',
- 'Pfr;': '\U0001d513',
- 'pfr;': '\U0001d52d',
- 'Phi;': '\u03a6',
- 'phi;': '\u03c6',
- 'phiv;': '\u03d5',
- 'phmmat;': '\u2133',
- 'phone;': '\u260e',
- 'Pi;': '\u03a0',
- 'pi;': '\u03c0',
- 'pitchfork;': '\u22d4',
- 'piv;': '\u03d6',
- 'planck;': '\u210f',
- 'planckh;': '\u210e',
- 'plankv;': '\u210f',
- 'plus;': '+',
- 'plusacir;': '\u2a23',
- 'plusb;': '\u229e',
- 'pluscir;': '\u2a22',
- 'plusdo;': '\u2214',
- 'plusdu;': '\u2a25',
- 'pluse;': '\u2a72',
- 'PlusMinus;': '\xb1',
- 'plusmn': '\xb1',
- 'plusmn;': '\xb1',
- 'plussim;': '\u2a26',
- 'plustwo;': '\u2a27',
- 'pm;': '\xb1',
- 'Poincareplane;': '\u210c',
- 'pointint;': '\u2a15',
- 'Popf;': '\u2119',
- 'popf;': '\U0001d561',
- 'pound': '\xa3',
- 'pound;': '\xa3',
- 'Pr;': '\u2abb',
- 'pr;': '\u227a',
- 'prap;': '\u2ab7',
- 'prcue;': '\u227c',
- 'prE;': '\u2ab3',
- 'pre;': '\u2aaf',
- 'prec;': '\u227a',
- 'precapprox;': '\u2ab7',
- 'preccurlyeq;': '\u227c',
- 'Precedes;': '\u227a',
- 'PrecedesEqual;': '\u2aaf',
- 'PrecedesSlantEqual;': '\u227c',
- 'PrecedesTilde;': '\u227e',
- 'preceq;': '\u2aaf',
- 'precnapprox;': '\u2ab9',
- 'precneqq;': '\u2ab5',
- 'precnsim;': '\u22e8',
- 'precsim;': '\u227e',
- 'Prime;': '\u2033',
- 'prime;': '\u2032',
- 'primes;': '\u2119',
- 'prnap;': '\u2ab9',
- 'prnE;': '\u2ab5',
- 'prnsim;': '\u22e8',
- 'prod;': '\u220f',
- 'Product;': '\u220f',
- 'profalar;': '\u232e',
- 'profline;': '\u2312',
- 'profsurf;': '\u2313',
- 'prop;': '\u221d',
- 'Proportion;': '\u2237',
- 'Proportional;': '\u221d',
- 'propto;': '\u221d',
- 'prsim;': '\u227e',
- 'prurel;': '\u22b0',
- 'Pscr;': '\U0001d4ab',
- 'pscr;': '\U0001d4c5',
- 'Psi;': '\u03a8',
- 'psi;': '\u03c8',
- 'puncsp;': '\u2008',
- 'Qfr;': '\U0001d514',
- 'qfr;': '\U0001d52e',
- 'qint;': '\u2a0c',
- 'Qopf;': '\u211a',
- 'qopf;': '\U0001d562',
- 'qprime;': '\u2057',
- 'Qscr;': '\U0001d4ac',
- 'qscr;': '\U0001d4c6',
- 'quaternions;': '\u210d',
- 'quatint;': '\u2a16',
- 'quest;': '?',
- 'questeq;': '\u225f',
- 'QUOT': '"',
- 'quot': '"',
- 'QUOT;': '"',
- 'quot;': '"',
- 'rAarr;': '\u21db',
- 'race;': '\u223d\u0331',
- 'Racute;': '\u0154',
- 'racute;': '\u0155',
- 'radic;': '\u221a',
- 'raemptyv;': '\u29b3',
- 'Rang;': '\u27eb',
- 'rang;': '\u27e9',
- 'rangd;': '\u2992',
- 'range;': '\u29a5',
- 'rangle;': '\u27e9',
- 'raquo': '\xbb',
- 'raquo;': '\xbb',
- 'Rarr;': '\u21a0',
- 'rArr;': '\u21d2',
- 'rarr;': '\u2192',
- 'rarrap;': '\u2975',
- 'rarrb;': '\u21e5',
- 'rarrbfs;': '\u2920',
- 'rarrc;': '\u2933',
- 'rarrfs;': '\u291e',
- 'rarrhk;': '\u21aa',
- 'rarrlp;': '\u21ac',
- 'rarrpl;': '\u2945',
- 'rarrsim;': '\u2974',
- 'Rarrtl;': '\u2916',
- 'rarrtl;': '\u21a3',
- 'rarrw;': '\u219d',
- 'rAtail;': '\u291c',
- 'ratail;': '\u291a',
- 'ratio;': '\u2236',
- 'rationals;': '\u211a',
- 'RBarr;': '\u2910',
- 'rBarr;': '\u290f',
- 'rbarr;': '\u290d',
- 'rbbrk;': '\u2773',
- 'rbrace;': '}',
- 'rbrack;': ']',
- 'rbrke;': '\u298c',
- 'rbrksld;': '\u298e',
- 'rbrkslu;': '\u2990',
- 'Rcaron;': '\u0158',
- 'rcaron;': '\u0159',
- 'Rcedil;': '\u0156',
- 'rcedil;': '\u0157',
- 'rceil;': '\u2309',
- 'rcub;': '}',
- 'Rcy;': '\u0420',
- 'rcy;': '\u0440',
- 'rdca;': '\u2937',
- 'rdldhar;': '\u2969',
- 'rdquo;': '\u201d',
- 'rdquor;': '\u201d',
- 'rdsh;': '\u21b3',
- 'Re;': '\u211c',
- 'real;': '\u211c',
- 'realine;': '\u211b',
- 'realpart;': '\u211c',
- 'reals;': '\u211d',
- 'rect;': '\u25ad',
- 'REG': '\xae',
- 'reg': '\xae',
- 'REG;': '\xae',
- 'reg;': '\xae',
- 'ReverseElement;': '\u220b',
- 'ReverseEquilibrium;': '\u21cb',
- 'ReverseUpEquilibrium;': '\u296f',
- 'rfisht;': '\u297d',
- 'rfloor;': '\u230b',
- 'Rfr;': '\u211c',
- 'rfr;': '\U0001d52f',
- 'rHar;': '\u2964',
- 'rhard;': '\u21c1',
- 'rharu;': '\u21c0',
- 'rharul;': '\u296c',
- 'Rho;': '\u03a1',
- 'rho;': '\u03c1',
- 'rhov;': '\u03f1',
- 'RightAngleBracket;': '\u27e9',
- 'RightArrow;': '\u2192',
- 'Rightarrow;': '\u21d2',
- 'rightarrow;': '\u2192',
- 'RightArrowBar;': '\u21e5',
- 'RightArrowLeftArrow;': '\u21c4',
- 'rightarrowtail;': '\u21a3',
- 'RightCeiling;': '\u2309',
- 'RightDoubleBracket;': '\u27e7',
- 'RightDownTeeVector;': '\u295d',
- 'RightDownVector;': '\u21c2',
- 'RightDownVectorBar;': '\u2955',
- 'RightFloor;': '\u230b',
- 'rightharpoondown;': '\u21c1',
- 'rightharpoonup;': '\u21c0',
- 'rightleftarrows;': '\u21c4',
- 'rightleftharpoons;': '\u21cc',
- 'rightrightarrows;': '\u21c9',
- 'rightsquigarrow;': '\u219d',
- 'RightTee;': '\u22a2',
- 'RightTeeArrow;': '\u21a6',
- 'RightTeeVector;': '\u295b',
- 'rightthreetimes;': '\u22cc',
- 'RightTriangle;': '\u22b3',
- 'RightTriangleBar;': '\u29d0',
- 'RightTriangleEqual;': '\u22b5',
- 'RightUpDownVector;': '\u294f',
- 'RightUpTeeVector;': '\u295c',
- 'RightUpVector;': '\u21be',
- 'RightUpVectorBar;': '\u2954',
- 'RightVector;': '\u21c0',
- 'RightVectorBar;': '\u2953',
- 'ring;': '\u02da',
- 'risingdotseq;': '\u2253',
- 'rlarr;': '\u21c4',
- 'rlhar;': '\u21cc',
- 'rlm;': '\u200f',
- 'rmoust;': '\u23b1',
- 'rmoustache;': '\u23b1',
- 'rnmid;': '\u2aee',
- 'roang;': '\u27ed',
- 'roarr;': '\u21fe',
- 'robrk;': '\u27e7',
- 'ropar;': '\u2986',
- 'Ropf;': '\u211d',
- 'ropf;': '\U0001d563',
- 'roplus;': '\u2a2e',
- 'rotimes;': '\u2a35',
- 'RoundImplies;': '\u2970',
- 'rpar;': ')',
- 'rpargt;': '\u2994',
- 'rppolint;': '\u2a12',
- 'rrarr;': '\u21c9',
- 'Rrightarrow;': '\u21db',
- 'rsaquo;': '\u203a',
- 'Rscr;': '\u211b',
- 'rscr;': '\U0001d4c7',
- 'Rsh;': '\u21b1',
- 'rsh;': '\u21b1',
- 'rsqb;': ']',
- 'rsquo;': '\u2019',
- 'rsquor;': '\u2019',
- 'rthree;': '\u22cc',
- 'rtimes;': '\u22ca',
- 'rtri;': '\u25b9',
- 'rtrie;': '\u22b5',
- 'rtrif;': '\u25b8',
- 'rtriltri;': '\u29ce',
- 'RuleDelayed;': '\u29f4',
- 'ruluhar;': '\u2968',
- 'rx;': '\u211e',
- 'Sacute;': '\u015a',
- 'sacute;': '\u015b',
- 'sbquo;': '\u201a',
- 'Sc;': '\u2abc',
- 'sc;': '\u227b',
- 'scap;': '\u2ab8',
- 'Scaron;': '\u0160',
- 'scaron;': '\u0161',
- 'sccue;': '\u227d',
- 'scE;': '\u2ab4',
- 'sce;': '\u2ab0',
- 'Scedil;': '\u015e',
- 'scedil;': '\u015f',
- 'Scirc;': '\u015c',
- 'scirc;': '\u015d',
- 'scnap;': '\u2aba',
- 'scnE;': '\u2ab6',
- 'scnsim;': '\u22e9',
- 'scpolint;': '\u2a13',
- 'scsim;': '\u227f',
- 'Scy;': '\u0421',
- 'scy;': '\u0441',
- 'sdot;': '\u22c5',
- 'sdotb;': '\u22a1',
- 'sdote;': '\u2a66',
- 'searhk;': '\u2925',
- 'seArr;': '\u21d8',
- 'searr;': '\u2198',
- 'searrow;': '\u2198',
- 'sect': '\xa7',
- 'sect;': '\xa7',
- 'semi;': ';',
- 'seswar;': '\u2929',
- 'setminus;': '\u2216',
- 'setmn;': '\u2216',
- 'sext;': '\u2736',
- 'Sfr;': '\U0001d516',
- 'sfr;': '\U0001d530',
- 'sfrown;': '\u2322',
- 'sharp;': '\u266f',
- 'SHCHcy;': '\u0429',
- 'shchcy;': '\u0449',
- 'SHcy;': '\u0428',
- 'shcy;': '\u0448',
- 'ShortDownArrow;': '\u2193',
- 'ShortLeftArrow;': '\u2190',
- 'shortmid;': '\u2223',
- 'shortparallel;': '\u2225',
- 'ShortRightArrow;': '\u2192',
- 'ShortUpArrow;': '\u2191',
- 'shy': '\xad',
- 'shy;': '\xad',
- 'Sigma;': '\u03a3',
- 'sigma;': '\u03c3',
- 'sigmaf;': '\u03c2',
- 'sigmav;': '\u03c2',
- 'sim;': '\u223c',
- 'simdot;': '\u2a6a',
- 'sime;': '\u2243',
- 'simeq;': '\u2243',
- 'simg;': '\u2a9e',
- 'simgE;': '\u2aa0',
- 'siml;': '\u2a9d',
- 'simlE;': '\u2a9f',
- 'simne;': '\u2246',
- 'simplus;': '\u2a24',
- 'simrarr;': '\u2972',
- 'slarr;': '\u2190',
- 'SmallCircle;': '\u2218',
- 'smallsetminus;': '\u2216',
- 'smashp;': '\u2a33',
- 'smeparsl;': '\u29e4',
- 'smid;': '\u2223',
- 'smile;': '\u2323',
- 'smt;': '\u2aaa',
- 'smte;': '\u2aac',
- 'smtes;': '\u2aac\ufe00',
- 'SOFTcy;': '\u042c',
- 'softcy;': '\u044c',
- 'sol;': '/',
- 'solb;': '\u29c4',
- 'solbar;': '\u233f',
- 'Sopf;': '\U0001d54a',
- 'sopf;': '\U0001d564',
- 'spades;': '\u2660',
- 'spadesuit;': '\u2660',
- 'spar;': '\u2225',
- 'sqcap;': '\u2293',
- 'sqcaps;': '\u2293\ufe00',
- 'sqcup;': '\u2294',
- 'sqcups;': '\u2294\ufe00',
- 'Sqrt;': '\u221a',
- 'sqsub;': '\u228f',
- 'sqsube;': '\u2291',
- 'sqsubset;': '\u228f',
- 'sqsubseteq;': '\u2291',
- 'sqsup;': '\u2290',
- 'sqsupe;': '\u2292',
- 'sqsupset;': '\u2290',
- 'sqsupseteq;': '\u2292',
- 'squ;': '\u25a1',
- 'Square;': '\u25a1',
- 'square;': '\u25a1',
- 'SquareIntersection;': '\u2293',
- 'SquareSubset;': '\u228f',
- 'SquareSubsetEqual;': '\u2291',
- 'SquareSuperset;': '\u2290',
- 'SquareSupersetEqual;': '\u2292',
- 'SquareUnion;': '\u2294',
- 'squarf;': '\u25aa',
- 'squf;': '\u25aa',
- 'srarr;': '\u2192',
- 'Sscr;': '\U0001d4ae',
- 'sscr;': '\U0001d4c8',
- 'ssetmn;': '\u2216',
- 'ssmile;': '\u2323',
- 'sstarf;': '\u22c6',
- 'Star;': '\u22c6',
- 'star;': '\u2606',
- 'starf;': '\u2605',
- 'straightepsilon;': '\u03f5',
- 'straightphi;': '\u03d5',
- 'strns;': '\xaf',
- 'Sub;': '\u22d0',
- 'sub;': '\u2282',
- 'subdot;': '\u2abd',
- 'subE;': '\u2ac5',
- 'sube;': '\u2286',
- 'subedot;': '\u2ac3',
- 'submult;': '\u2ac1',
- 'subnE;': '\u2acb',
- 'subne;': '\u228a',
- 'subplus;': '\u2abf',
- 'subrarr;': '\u2979',
- 'Subset;': '\u22d0',
- 'subset;': '\u2282',
- 'subseteq;': '\u2286',
- 'subseteqq;': '\u2ac5',
- 'SubsetEqual;': '\u2286',
- 'subsetneq;': '\u228a',
- 'subsetneqq;': '\u2acb',
- 'subsim;': '\u2ac7',
- 'subsub;': '\u2ad5',
- 'subsup;': '\u2ad3',
- 'succ;': '\u227b',
- 'succapprox;': '\u2ab8',
- 'succcurlyeq;': '\u227d',
- 'Succeeds;': '\u227b',
- 'SucceedsEqual;': '\u2ab0',
- 'SucceedsSlantEqual;': '\u227d',
- 'SucceedsTilde;': '\u227f',
- 'succeq;': '\u2ab0',
- 'succnapprox;': '\u2aba',
- 'succneqq;': '\u2ab6',
- 'succnsim;': '\u22e9',
- 'succsim;': '\u227f',
- 'SuchThat;': '\u220b',
- 'Sum;': '\u2211',
- 'sum;': '\u2211',
- 'sung;': '\u266a',
- 'sup1': '\xb9',
- 'sup1;': '\xb9',
- 'sup2': '\xb2',
- 'sup2;': '\xb2',
- 'sup3': '\xb3',
- 'sup3;': '\xb3',
- 'Sup;': '\u22d1',
- 'sup;': '\u2283',
- 'supdot;': '\u2abe',
- 'supdsub;': '\u2ad8',
- 'supE;': '\u2ac6',
- 'supe;': '\u2287',
- 'supedot;': '\u2ac4',
- 'Superset;': '\u2283',
- 'SupersetEqual;': '\u2287',
- 'suphsol;': '\u27c9',
- 'suphsub;': '\u2ad7',
- 'suplarr;': '\u297b',
- 'supmult;': '\u2ac2',
- 'supnE;': '\u2acc',
- 'supne;': '\u228b',
- 'supplus;': '\u2ac0',
- 'Supset;': '\u22d1',
- 'supset;': '\u2283',
- 'supseteq;': '\u2287',
- 'supseteqq;': '\u2ac6',
- 'supsetneq;': '\u228b',
- 'supsetneqq;': '\u2acc',
- 'supsim;': '\u2ac8',
- 'supsub;': '\u2ad4',
- 'supsup;': '\u2ad6',
- 'swarhk;': '\u2926',
- 'swArr;': '\u21d9',
- 'swarr;': '\u2199',
- 'swarrow;': '\u2199',
- 'swnwar;': '\u292a',
- 'szlig': '\xdf',
- 'szlig;': '\xdf',
- 'Tab;': '\t',
- 'target;': '\u2316',
- 'Tau;': '\u03a4',
- 'tau;': '\u03c4',
- 'tbrk;': '\u23b4',
- 'Tcaron;': '\u0164',
- 'tcaron;': '\u0165',
- 'Tcedil;': '\u0162',
- 'tcedil;': '\u0163',
- 'Tcy;': '\u0422',
- 'tcy;': '\u0442',
- 'tdot;': '\u20db',
- 'telrec;': '\u2315',
- 'Tfr;': '\U0001d517',
- 'tfr;': '\U0001d531',
- 'there4;': '\u2234',
- 'Therefore;': '\u2234',
- 'therefore;': '\u2234',
- 'Theta;': '\u0398',
- 'theta;': '\u03b8',
- 'thetasym;': '\u03d1',
- 'thetav;': '\u03d1',
- 'thickapprox;': '\u2248',
- 'thicksim;': '\u223c',
- 'ThickSpace;': '\u205f\u200a',
- 'thinsp;': '\u2009',
- 'ThinSpace;': '\u2009',
- 'thkap;': '\u2248',
- 'thksim;': '\u223c',
- 'THORN': '\xde',
- 'thorn': '\xfe',
- 'THORN;': '\xde',
- 'thorn;': '\xfe',
- 'Tilde;': '\u223c',
- 'tilde;': '\u02dc',
- 'TildeEqual;': '\u2243',
- 'TildeFullEqual;': '\u2245',
- 'TildeTilde;': '\u2248',
- 'times': '\xd7',
- 'times;': '\xd7',
- 'timesb;': '\u22a0',
- 'timesbar;': '\u2a31',
- 'timesd;': '\u2a30',
- 'tint;': '\u222d',
- 'toea;': '\u2928',
- 'top;': '\u22a4',
- 'topbot;': '\u2336',
- 'topcir;': '\u2af1',
- 'Topf;': '\U0001d54b',
- 'topf;': '\U0001d565',
- 'topfork;': '\u2ada',
- 'tosa;': '\u2929',
- 'tprime;': '\u2034',
- 'TRADE;': '\u2122',
- 'trade;': '\u2122',
- 'triangle;': '\u25b5',
- 'triangledown;': '\u25bf',
- 'triangleleft;': '\u25c3',
- 'trianglelefteq;': '\u22b4',
- 'triangleq;': '\u225c',
- 'triangleright;': '\u25b9',
- 'trianglerighteq;': '\u22b5',
- 'tridot;': '\u25ec',
- 'trie;': '\u225c',
- 'triminus;': '\u2a3a',
- 'TripleDot;': '\u20db',
- 'triplus;': '\u2a39',
- 'trisb;': '\u29cd',
- 'tritime;': '\u2a3b',
- 'trpezium;': '\u23e2',
- 'Tscr;': '\U0001d4af',
- 'tscr;': '\U0001d4c9',
- 'TScy;': '\u0426',
- 'tscy;': '\u0446',
- 'TSHcy;': '\u040b',
- 'tshcy;': '\u045b',
- 'Tstrok;': '\u0166',
- 'tstrok;': '\u0167',
- 'twixt;': '\u226c',
- 'twoheadleftarrow;': '\u219e',
- 'twoheadrightarrow;': '\u21a0',
- 'Uacute': '\xda',
- 'uacute': '\xfa',
- 'Uacute;': '\xda',
- 'uacute;': '\xfa',
- 'Uarr;': '\u219f',
- 'uArr;': '\u21d1',
- 'uarr;': '\u2191',
- 'Uarrocir;': '\u2949',
- 'Ubrcy;': '\u040e',
- 'ubrcy;': '\u045e',
- 'Ubreve;': '\u016c',
- 'ubreve;': '\u016d',
- 'Ucirc': '\xdb',
- 'ucirc': '\xfb',
- 'Ucirc;': '\xdb',
- 'ucirc;': '\xfb',
- 'Ucy;': '\u0423',
- 'ucy;': '\u0443',
- 'udarr;': '\u21c5',
- 'Udblac;': '\u0170',
- 'udblac;': '\u0171',
- 'udhar;': '\u296e',
- 'ufisht;': '\u297e',
- 'Ufr;': '\U0001d518',
- 'ufr;': '\U0001d532',
- 'Ugrave': '\xd9',
- 'ugrave': '\xf9',
- 'Ugrave;': '\xd9',
- 'ugrave;': '\xf9',
- 'uHar;': '\u2963',
- 'uharl;': '\u21bf',
- 'uharr;': '\u21be',
- 'uhblk;': '\u2580',
- 'ulcorn;': '\u231c',
- 'ulcorner;': '\u231c',
- 'ulcrop;': '\u230f',
- 'ultri;': '\u25f8',
- 'Umacr;': '\u016a',
- 'umacr;': '\u016b',
- 'uml': '\xa8',
- 'uml;': '\xa8',
- 'UnderBar;': '_',
- 'UnderBrace;': '\u23df',
- 'UnderBracket;': '\u23b5',
- 'UnderParenthesis;': '\u23dd',
- 'Union;': '\u22c3',
- 'UnionPlus;': '\u228e',
- 'Uogon;': '\u0172',
- 'uogon;': '\u0173',
- 'Uopf;': '\U0001d54c',
- 'uopf;': '\U0001d566',
- 'UpArrow;': '\u2191',
- 'Uparrow;': '\u21d1',
- 'uparrow;': '\u2191',
- 'UpArrowBar;': '\u2912',
- 'UpArrowDownArrow;': '\u21c5',
- 'UpDownArrow;': '\u2195',
- 'Updownarrow;': '\u21d5',
- 'updownarrow;': '\u2195',
- 'UpEquilibrium;': '\u296e',
- 'upharpoonleft;': '\u21bf',
- 'upharpoonright;': '\u21be',
- 'uplus;': '\u228e',
- 'UpperLeftArrow;': '\u2196',
- 'UpperRightArrow;': '\u2197',
- 'Upsi;': '\u03d2',
- 'upsi;': '\u03c5',
- 'upsih;': '\u03d2',
- 'Upsilon;': '\u03a5',
- 'upsilon;': '\u03c5',
- 'UpTee;': '\u22a5',
- 'UpTeeArrow;': '\u21a5',
- 'upuparrows;': '\u21c8',
- 'urcorn;': '\u231d',
- 'urcorner;': '\u231d',
- 'urcrop;': '\u230e',
- 'Uring;': '\u016e',
- 'uring;': '\u016f',
- 'urtri;': '\u25f9',
- 'Uscr;': '\U0001d4b0',
- 'uscr;': '\U0001d4ca',
- 'utdot;': '\u22f0',
- 'Utilde;': '\u0168',
- 'utilde;': '\u0169',
- 'utri;': '\u25b5',
- 'utrif;': '\u25b4',
- 'uuarr;': '\u21c8',
- 'Uuml': '\xdc',
- 'uuml': '\xfc',
- 'Uuml;': '\xdc',
- 'uuml;': '\xfc',
- 'uwangle;': '\u29a7',
- 'vangrt;': '\u299c',
- 'varepsilon;': '\u03f5',
- 'varkappa;': '\u03f0',
- 'varnothing;': '\u2205',
- 'varphi;': '\u03d5',
- 'varpi;': '\u03d6',
- 'varpropto;': '\u221d',
- 'vArr;': '\u21d5',
- 'varr;': '\u2195',
- 'varrho;': '\u03f1',
- 'varsigma;': '\u03c2',
- 'varsubsetneq;': '\u228a\ufe00',
- 'varsubsetneqq;': '\u2acb\ufe00',
- 'varsupsetneq;': '\u228b\ufe00',
- 'varsupsetneqq;': '\u2acc\ufe00',
- 'vartheta;': '\u03d1',
- 'vartriangleleft;': '\u22b2',
- 'vartriangleright;': '\u22b3',
- 'Vbar;': '\u2aeb',
- 'vBar;': '\u2ae8',
- 'vBarv;': '\u2ae9',
- 'Vcy;': '\u0412',
- 'vcy;': '\u0432',
- 'VDash;': '\u22ab',
- 'Vdash;': '\u22a9',
- 'vDash;': '\u22a8',
- 'vdash;': '\u22a2',
- 'Vdashl;': '\u2ae6',
- 'Vee;': '\u22c1',
- 'vee;': '\u2228',
- 'veebar;': '\u22bb',
- 'veeeq;': '\u225a',
- 'vellip;': '\u22ee',
- 'Verbar;': '\u2016',
- 'verbar;': '|',
- 'Vert;': '\u2016',
- 'vert;': '|',
- 'VerticalBar;': '\u2223',
- 'VerticalLine;': '|',
- 'VerticalSeparator;': '\u2758',
- 'VerticalTilde;': '\u2240',
- 'VeryThinSpace;': '\u200a',
- 'Vfr;': '\U0001d519',
- 'vfr;': '\U0001d533',
- 'vltri;': '\u22b2',
- 'vnsub;': '\u2282\u20d2',
- 'vnsup;': '\u2283\u20d2',
- 'Vopf;': '\U0001d54d',
- 'vopf;': '\U0001d567',
- 'vprop;': '\u221d',
- 'vrtri;': '\u22b3',
- 'Vscr;': '\U0001d4b1',
- 'vscr;': '\U0001d4cb',
- 'vsubnE;': '\u2acb\ufe00',
- 'vsubne;': '\u228a\ufe00',
- 'vsupnE;': '\u2acc\ufe00',
- 'vsupne;': '\u228b\ufe00',
- 'Vvdash;': '\u22aa',
- 'vzigzag;': '\u299a',
- 'Wcirc;': '\u0174',
- 'wcirc;': '\u0175',
- 'wedbar;': '\u2a5f',
- 'Wedge;': '\u22c0',
- 'wedge;': '\u2227',
- 'wedgeq;': '\u2259',
- 'weierp;': '\u2118',
- 'Wfr;': '\U0001d51a',
- 'wfr;': '\U0001d534',
- 'Wopf;': '\U0001d54e',
- 'wopf;': '\U0001d568',
- 'wp;': '\u2118',
- 'wr;': '\u2240',
- 'wreath;': '\u2240',
- 'Wscr;': '\U0001d4b2',
- 'wscr;': '\U0001d4cc',
- 'xcap;': '\u22c2',
- 'xcirc;': '\u25ef',
- 'xcup;': '\u22c3',
- 'xdtri;': '\u25bd',
- 'Xfr;': '\U0001d51b',
- 'xfr;': '\U0001d535',
- 'xhArr;': '\u27fa',
- 'xharr;': '\u27f7',
- 'Xi;': '\u039e',
- 'xi;': '\u03be',
- 'xlArr;': '\u27f8',
- 'xlarr;': '\u27f5',
- 'xmap;': '\u27fc',
- 'xnis;': '\u22fb',
- 'xodot;': '\u2a00',
- 'Xopf;': '\U0001d54f',
- 'xopf;': '\U0001d569',
- 'xoplus;': '\u2a01',
- 'xotime;': '\u2a02',
- 'xrArr;': '\u27f9',
- 'xrarr;': '\u27f6',
- 'Xscr;': '\U0001d4b3',
- 'xscr;': '\U0001d4cd',
- 'xsqcup;': '\u2a06',
- 'xuplus;': '\u2a04',
- 'xutri;': '\u25b3',
- 'xvee;': '\u22c1',
- 'xwedge;': '\u22c0',
- 'Yacute': '\xdd',
- 'yacute': '\xfd',
- 'Yacute;': '\xdd',
- 'yacute;': '\xfd',
- 'YAcy;': '\u042f',
- 'yacy;': '\u044f',
- 'Ycirc;': '\u0176',
- 'ycirc;': '\u0177',
- 'Ycy;': '\u042b',
- 'ycy;': '\u044b',
- 'yen': '\xa5',
- 'yen;': '\xa5',
- 'Yfr;': '\U0001d51c',
- 'yfr;': '\U0001d536',
- 'YIcy;': '\u0407',
- 'yicy;': '\u0457',
- 'Yopf;': '\U0001d550',
- 'yopf;': '\U0001d56a',
- 'Yscr;': '\U0001d4b4',
- 'yscr;': '\U0001d4ce',
- 'YUcy;': '\u042e',
- 'yucy;': '\u044e',
- 'yuml': '\xff',
- 'Yuml;': '\u0178',
- 'yuml;': '\xff',
- 'Zacute;': '\u0179',
- 'zacute;': '\u017a',
- 'Zcaron;': '\u017d',
- 'zcaron;': '\u017e',
- 'Zcy;': '\u0417',
- 'zcy;': '\u0437',
- 'Zdot;': '\u017b',
- 'zdot;': '\u017c',
- 'zeetrf;': '\u2128',
- 'ZeroWidthSpace;': '\u200b',
- 'Zeta;': '\u0396',
- 'zeta;': '\u03b6',
- 'Zfr;': '\u2128',
- 'zfr;': '\U0001d537',
- 'ZHcy;': '\u0416',
- 'zhcy;': '\u0436',
- 'zigrarr;': '\u21dd',
- 'Zopf;': '\u2124',
- 'zopf;': '\U0001d56b',
- 'Zscr;': '\U0001d4b5',
- 'zscr;': '\U0001d4cf',
- 'zwj;': '\u200d',
- 'zwnj;': '\u200c',
-}
-
-# maps the Unicode codepoint to the HTML entity name
-codepoint2name = {}
-
-# maps the HTML entity name to the character
-# (or a character reference if the character is outside the Latin-1 range)
-entitydefs = {}
-
-for (name, codepoint) in name2codepoint.items():
- codepoint2name[codepoint] = name
- entitydefs[name] = chr(codepoint)
-
-del name, codepoint
+"""HTML character entity references.
+
+Backported for python-future from Python 3.3
+"""
+
+from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+from future.builtins import *
+
+
+# maps the HTML entity name to the Unicode codepoint
+name2codepoint = {
+ 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
+ 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
+ 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1
+ 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
+ 'Alpha': 0x0391, # greek capital letter alpha, U+0391
+ 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
+ 'Atilde': 0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1
+ 'Auml': 0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1
+ 'Beta': 0x0392, # greek capital letter beta, U+0392
+ 'Ccedil': 0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1
+ 'Chi': 0x03a7, # greek capital letter chi, U+03A7
+ 'Dagger': 0x2021, # double dagger, U+2021 ISOpub
+ 'Delta': 0x0394, # greek capital letter delta, U+0394 ISOgrk3
+ 'ETH': 0x00d0, # latin capital letter ETH, U+00D0 ISOlat1
+ 'Eacute': 0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1
+ 'Ecirc': 0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1
+ 'Egrave': 0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1
+ 'Epsilon': 0x0395, # greek capital letter epsilon, U+0395
+ 'Eta': 0x0397, # greek capital letter eta, U+0397
+ 'Euml': 0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1
+ 'Gamma': 0x0393, # greek capital letter gamma, U+0393 ISOgrk3
+ 'Iacute': 0x00cd, # latin capital letter I with acute, U+00CD ISOlat1
+ 'Icirc': 0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1
+ 'Igrave': 0x00cc, # latin capital letter I with grave, U+00CC ISOlat1
+ 'Iota': 0x0399, # greek capital letter iota, U+0399
+ 'Iuml': 0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1
+ 'Kappa': 0x039a, # greek capital letter kappa, U+039A
+ 'Lambda': 0x039b, # greek capital letter lambda, U+039B ISOgrk3
+ 'Mu': 0x039c, # greek capital letter mu, U+039C
+ 'Ntilde': 0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1
+ 'Nu': 0x039d, # greek capital letter nu, U+039D
+ 'OElig': 0x0152, # latin capital ligature OE, U+0152 ISOlat2
+ 'Oacute': 0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1
+ 'Ocirc': 0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1
+ 'Ograve': 0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1
+ 'Omega': 0x03a9, # greek capital letter omega, U+03A9 ISOgrk3
+ 'Omicron': 0x039f, # greek capital letter omicron, U+039F
+ 'Oslash': 0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
+ 'Otilde': 0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1
+ 'Ouml': 0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1
+ 'Phi': 0x03a6, # greek capital letter phi, U+03A6 ISOgrk3
+ 'Pi': 0x03a0, # greek capital letter pi, U+03A0 ISOgrk3
+ 'Prime': 0x2033, # double prime = seconds = inches, U+2033 ISOtech
+ 'Psi': 0x03a8, # greek capital letter psi, U+03A8 ISOgrk3
+ 'Rho': 0x03a1, # greek capital letter rho, U+03A1
+ 'Scaron': 0x0160, # latin capital letter S with caron, U+0160 ISOlat2
+ 'Sigma': 0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3
+ 'THORN': 0x00de, # latin capital letter THORN, U+00DE ISOlat1
+ 'Tau': 0x03a4, # greek capital letter tau, U+03A4
+ 'Theta': 0x0398, # greek capital letter theta, U+0398 ISOgrk3
+ 'Uacute': 0x00da, # latin capital letter U with acute, U+00DA ISOlat1
+ 'Ucirc': 0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1
+ 'Ugrave': 0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1
+ 'Upsilon': 0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3
+ 'Uuml': 0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1
+ 'Xi': 0x039e, # greek capital letter xi, U+039E ISOgrk3
+ 'Yacute': 0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1
+ 'Yuml': 0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2
+ 'Zeta': 0x0396, # greek capital letter zeta, U+0396
+ 'aacute': 0x00e1, # latin small letter a with acute, U+00E1 ISOlat1
+ 'acirc': 0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1
+ 'acute': 0x00b4, # acute accent = spacing acute, U+00B4 ISOdia
+ 'aelig': 0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
+ 'agrave': 0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
+ 'alefsym': 0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW
+ 'alpha': 0x03b1, # greek small letter alpha, U+03B1 ISOgrk3
+ 'amp': 0x0026, # ampersand, U+0026 ISOnum
+ 'and': 0x2227, # logical and = wedge, U+2227 ISOtech
+ 'ang': 0x2220, # angle, U+2220 ISOamso
+ 'aring': 0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
+ 'asymp': 0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr
+ 'atilde': 0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1
+ 'auml': 0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1
+ 'bdquo': 0x201e, # double low-9 quotation mark, U+201E NEW
+ 'beta': 0x03b2, # greek small letter beta, U+03B2 ISOgrk3
+ 'brvbar': 0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum
+ 'bull': 0x2022, # bullet = black small circle, U+2022 ISOpub
+ 'cap': 0x2229, # intersection = cap, U+2229 ISOtech
+ 'ccedil': 0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1
+ 'cedil': 0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia
+ 'cent': 0x00a2, # cent sign, U+00A2 ISOnum
+ 'chi': 0x03c7, # greek small letter chi, U+03C7 ISOgrk3
+ 'circ': 0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub
+ 'clubs': 0x2663, # black club suit = shamrock, U+2663 ISOpub
+ 'cong': 0x2245, # approximately equal to, U+2245 ISOtech
+ 'copy': 0x00a9, # copyright sign, U+00A9 ISOnum
+ 'crarr': 0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW
+ 'cup': 0x222a, # union = cup, U+222A ISOtech
+ 'curren': 0x00a4, # currency sign, U+00A4 ISOnum
+ 'dArr': 0x21d3, # downwards double arrow, U+21D3 ISOamsa
+ 'dagger': 0x2020, # dagger, U+2020 ISOpub
+ 'darr': 0x2193, # downwards arrow, U+2193 ISOnum
+ 'deg': 0x00b0, # degree sign, U+00B0 ISOnum
+ 'delta': 0x03b4, # greek small letter delta, U+03B4 ISOgrk3
+ 'diams': 0x2666, # black diamond suit, U+2666 ISOpub
+ 'divide': 0x00f7, # division sign, U+00F7 ISOnum
+ 'eacute': 0x00e9, # latin small letter e with acute, U+00E9 ISOlat1
+ 'ecirc': 0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1
+ 'egrave': 0x00e8, # latin small letter e with grave, U+00E8 ISOlat1
+ 'empty': 0x2205, # empty set = null set = diameter, U+2205 ISOamso
+ 'emsp': 0x2003, # em space, U+2003 ISOpub
+ 'ensp': 0x2002, # en space, U+2002 ISOpub
+ 'epsilon': 0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3
+ 'equiv': 0x2261, # identical to, U+2261 ISOtech
+ 'eta': 0x03b7, # greek small letter eta, U+03B7 ISOgrk3
+ 'eth': 0x00f0, # latin small letter eth, U+00F0 ISOlat1
+ 'euml': 0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1
+ 'euro': 0x20ac, # euro sign, U+20AC NEW
+ 'exist': 0x2203, # there exists, U+2203 ISOtech
+ 'fnof': 0x0192, # latin small f with hook = function = florin, U+0192 ISOtech
+ 'forall': 0x2200, # for all, U+2200 ISOtech
+ 'frac12': 0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum
+ 'frac14': 0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
+ 'frac34': 0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
+ 'frasl': 0x2044, # fraction slash, U+2044 NEW
+ 'gamma': 0x03b3, # greek small letter gamma, U+03B3 ISOgrk3
+ 'ge': 0x2265, # greater-than or equal to, U+2265 ISOtech
+ 'gt': 0x003e, # greater-than sign, U+003E ISOnum
+ 'hArr': 0x21d4, # left right double arrow, U+21D4 ISOamsa
+ 'harr': 0x2194, # left right arrow, U+2194 ISOamsa
+ 'hearts': 0x2665, # black heart suit = valentine, U+2665 ISOpub
+ 'hellip': 0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub
+ 'iacute': 0x00ed, # latin small letter i with acute, U+00ED ISOlat1
+ 'icirc': 0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1
+ 'iexcl': 0x00a1, # inverted exclamation mark, U+00A1 ISOnum
+ 'igrave': 0x00ec, # latin small letter i with grave, U+00EC ISOlat1
+ 'image': 0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso
+ 'infin': 0x221e, # infinity, U+221E ISOtech
+ 'int': 0x222b, # integral, U+222B ISOtech
+ 'iota': 0x03b9, # greek small letter iota, U+03B9 ISOgrk3
+ 'iquest': 0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum
+ 'isin': 0x2208, # element of, U+2208 ISOtech
+ 'iuml': 0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1
+ 'kappa': 0x03ba, # greek small letter kappa, U+03BA ISOgrk3
+ 'lArr': 0x21d0, # leftwards double arrow, U+21D0 ISOtech
+ 'lambda': 0x03bb, # greek small letter lambda, U+03BB ISOgrk3
+ 'lang': 0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech
+ 'laquo': 0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
+ 'larr': 0x2190, # leftwards arrow, U+2190 ISOnum
+ 'lceil': 0x2308, # left ceiling = apl upstile, U+2308 ISOamsc
+ 'ldquo': 0x201c, # left double quotation mark, U+201C ISOnum
+ 'le': 0x2264, # less-than or equal to, U+2264 ISOtech
+ 'lfloor': 0x230a, # left floor = apl downstile, U+230A ISOamsc
+ 'lowast': 0x2217, # asterisk operator, U+2217 ISOtech
+ 'loz': 0x25ca, # lozenge, U+25CA ISOpub
+ 'lrm': 0x200e, # left-to-right mark, U+200E NEW RFC 2070
+ 'lsaquo': 0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed
+ 'lsquo': 0x2018, # left single quotation mark, U+2018 ISOnum
+ 'lt': 0x003c, # less-than sign, U+003C ISOnum
+ 'macr': 0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia
+ 'mdash': 0x2014, # em dash, U+2014 ISOpub
+ 'micro': 0x00b5, # micro sign, U+00B5 ISOnum
+ 'middot': 0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
+ 'minus': 0x2212, # minus sign, U+2212 ISOtech
+ 'mu': 0x03bc, # greek small letter mu, U+03BC ISOgrk3
+ 'nabla': 0x2207, # nabla = backward difference, U+2207 ISOtech
+ 'nbsp': 0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum
+ 'ndash': 0x2013, # en dash, U+2013 ISOpub
+ 'ne': 0x2260, # not equal to, U+2260 ISOtech
+ 'ni': 0x220b, # contains as member, U+220B ISOtech
+ 'not': 0x00ac, # not sign, U+00AC ISOnum
+ 'notin': 0x2209, # not an element of, U+2209 ISOtech
+ 'nsub': 0x2284, # not a subset of, U+2284 ISOamsn
+ 'ntilde': 0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1
+ 'nu': 0x03bd, # greek small letter nu, U+03BD ISOgrk3
+ 'oacute': 0x00f3, # latin small letter o with acute, U+00F3 ISOlat1
+ 'ocirc': 0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1
+ 'oelig': 0x0153, # latin small ligature oe, U+0153 ISOlat2
+ 'ograve': 0x00f2, # latin small letter o with grave, U+00F2 ISOlat1
+ 'oline': 0x203e, # overline = spacing overscore, U+203E NEW
+ 'omega': 0x03c9, # greek small letter omega, U+03C9 ISOgrk3
+ 'omicron': 0x03bf, # greek small letter omicron, U+03BF NEW
+ 'oplus': 0x2295, # circled plus = direct sum, U+2295 ISOamsb
+ 'or': 0x2228, # logical or = vee, U+2228 ISOtech
+ 'ordf': 0x00aa, # feminine ordinal indicator, U+00AA ISOnum
+ 'ordm': 0x00ba, # masculine ordinal indicator, U+00BA ISOnum
+ 'oslash': 0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
+ 'otilde': 0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1
+ 'otimes': 0x2297, # circled times = vector product, U+2297 ISOamsb
+ 'ouml': 0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1
+ 'para': 0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum
+ 'part': 0x2202, # partial differential, U+2202 ISOtech
+ 'permil': 0x2030, # per mille sign, U+2030 ISOtech
+ 'perp': 0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech
+ 'phi': 0x03c6, # greek small letter phi, U+03C6 ISOgrk3
+ 'pi': 0x03c0, # greek small letter pi, U+03C0 ISOgrk3
+ 'piv': 0x03d6, # greek pi symbol, U+03D6 ISOgrk3
+ 'plusmn': 0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
+ 'pound': 0x00a3, # pound sign, U+00A3 ISOnum
+ 'prime': 0x2032, # prime = minutes = feet, U+2032 ISOtech
+ 'prod': 0x220f, # n-ary product = product sign, U+220F ISOamsb
+ 'prop': 0x221d, # proportional to, U+221D ISOtech
+ 'psi': 0x03c8, # greek small letter psi, U+03C8 ISOgrk3
+ 'quot': 0x0022, # quotation mark = APL quote, U+0022 ISOnum
+ 'rArr': 0x21d2, # rightwards double arrow, U+21D2 ISOtech
+ 'radic': 0x221a, # square root = radical sign, U+221A ISOtech
+ 'rang': 0x232a, # right-pointing angle bracket = ket, U+232A ISOtech
+ 'raquo': 0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
+ 'rarr': 0x2192, # rightwards arrow, U+2192 ISOnum
+ 'rceil': 0x2309, # right ceiling, U+2309 ISOamsc
+ 'rdquo': 0x201d, # right double quotation mark, U+201D ISOnum
+ 'real': 0x211c, # blackletter capital R = real part symbol, U+211C ISOamso
+ 'reg': 0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum
+ 'rfloor': 0x230b, # right floor, U+230B ISOamsc
+ 'rho': 0x03c1, # greek small letter rho, U+03C1 ISOgrk3
+ 'rlm': 0x200f, # right-to-left mark, U+200F NEW RFC 2070
+ 'rsaquo': 0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed
+ 'rsquo': 0x2019, # right single quotation mark, U+2019 ISOnum
+ 'sbquo': 0x201a, # single low-9 quotation mark, U+201A NEW
+ 'scaron': 0x0161, # latin small letter s with caron, U+0161 ISOlat2
+ 'sdot': 0x22c5, # dot operator, U+22C5 ISOamsb
+ 'sect': 0x00a7, # section sign, U+00A7 ISOnum
+ 'shy': 0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum
+ 'sigma': 0x03c3, # greek small letter sigma, U+03C3 ISOgrk3
+ 'sigmaf': 0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3
+ 'sim': 0x223c, # tilde operator = varies with = similar to, U+223C ISOtech
+ 'spades': 0x2660, # black spade suit, U+2660 ISOpub
+ 'sub': 0x2282, # subset of, U+2282 ISOtech
+ 'sube': 0x2286, # subset of or equal to, U+2286 ISOtech
+ 'sum': 0x2211, # n-ary sumation, U+2211 ISOamsb
+ 'sup': 0x2283, # superset of, U+2283 ISOtech
+ 'sup1': 0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum
+ 'sup2': 0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum
+ 'sup3': 0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum
+ 'supe': 0x2287, # superset of or equal to, U+2287 ISOtech
+ 'szlig': 0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1
+ 'tau': 0x03c4, # greek small letter tau, U+03C4 ISOgrk3
+ 'there4': 0x2234, # therefore, U+2234 ISOtech
+ 'theta': 0x03b8, # greek small letter theta, U+03B8 ISOgrk3
+ 'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW
+ 'thinsp': 0x2009, # thin space, U+2009 ISOpub
+ 'thorn': 0x00fe, # latin small letter thorn with, U+00FE ISOlat1
+ 'tilde': 0x02dc, # small tilde, U+02DC ISOdia
+ 'times': 0x00d7, # multiplication sign, U+00D7 ISOnum
+ 'trade': 0x2122, # trade mark sign, U+2122 ISOnum
+ 'uArr': 0x21d1, # upwards double arrow, U+21D1 ISOamsa
+ 'uacute': 0x00fa, # latin small letter u with acute, U+00FA ISOlat1
+ 'uarr': 0x2191, # upwards arrow, U+2191 ISOnum
+ 'ucirc': 0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1
+ 'ugrave': 0x00f9, # latin small letter u with grave, U+00F9 ISOlat1
+ 'uml': 0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia
+ 'upsih': 0x03d2, # greek upsilon with hook symbol, U+03D2 NEW
+ 'upsilon': 0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3
+ 'uuml': 0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1
+ 'weierp': 0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso
+ 'xi': 0x03be, # greek small letter xi, U+03BE ISOgrk3
+ 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1
+ 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum
+ 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1
+ 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3
+ 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070
+ 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070
+}
+
+
+# maps the HTML5 named character references to the equivalent Unicode character(s)
+html5 = {
+ 'Aacute': '\xc1',
+ 'aacute': '\xe1',
+ 'Aacute;': '\xc1',
+ 'aacute;': '\xe1',
+ 'Abreve;': '\u0102',
+ 'abreve;': '\u0103',
+ 'ac;': '\u223e',
+ 'acd;': '\u223f',
+ 'acE;': '\u223e\u0333',
+ 'Acirc': '\xc2',
+ 'acirc': '\xe2',
+ 'Acirc;': '\xc2',
+ 'acirc;': '\xe2',
+ 'acute': '\xb4',
+ 'acute;': '\xb4',
+ 'Acy;': '\u0410',
+ 'acy;': '\u0430',
+ 'AElig': '\xc6',
+ 'aelig': '\xe6',
+ 'AElig;': '\xc6',
+ 'aelig;': '\xe6',
+ 'af;': '\u2061',
+ 'Afr;': '\U0001d504',
+ 'afr;': '\U0001d51e',
+ 'Agrave': '\xc0',
+ 'agrave': '\xe0',
+ 'Agrave;': '\xc0',
+ 'agrave;': '\xe0',
+ 'alefsym;': '\u2135',
+ 'aleph;': '\u2135',
+ 'Alpha;': '\u0391',
+ 'alpha;': '\u03b1',
+ 'Amacr;': '\u0100',
+ 'amacr;': '\u0101',
+ 'amalg;': '\u2a3f',
+ 'AMP': '&',
+ 'amp': '&',
+ 'AMP;': '&',
+ 'amp;': '&',
+ 'And;': '\u2a53',
+ 'and;': '\u2227',
+ 'andand;': '\u2a55',
+ 'andd;': '\u2a5c',
+ 'andslope;': '\u2a58',
+ 'andv;': '\u2a5a',
+ 'ang;': '\u2220',
+ 'ange;': '\u29a4',
+ 'angle;': '\u2220',
+ 'angmsd;': '\u2221',
+ 'angmsdaa;': '\u29a8',
+ 'angmsdab;': '\u29a9',
+ 'angmsdac;': '\u29aa',
+ 'angmsdad;': '\u29ab',
+ 'angmsdae;': '\u29ac',
+ 'angmsdaf;': '\u29ad',
+ 'angmsdag;': '\u29ae',
+ 'angmsdah;': '\u29af',
+ 'angrt;': '\u221f',
+ 'angrtvb;': '\u22be',
+ 'angrtvbd;': '\u299d',
+ 'angsph;': '\u2222',
+ 'angst;': '\xc5',
+ 'angzarr;': '\u237c',
+ 'Aogon;': '\u0104',
+ 'aogon;': '\u0105',
+ 'Aopf;': '\U0001d538',
+ 'aopf;': '\U0001d552',
+ 'ap;': '\u2248',
+ 'apacir;': '\u2a6f',
+ 'apE;': '\u2a70',
+ 'ape;': '\u224a',
+ 'apid;': '\u224b',
+ 'apos;': "'",
+ 'ApplyFunction;': '\u2061',
+ 'approx;': '\u2248',
+ 'approxeq;': '\u224a',
+ 'Aring': '\xc5',
+ 'aring': '\xe5',
+ 'Aring;': '\xc5',
+ 'aring;': '\xe5',
+ 'Ascr;': '\U0001d49c',
+ 'ascr;': '\U0001d4b6',
+ 'Assign;': '\u2254',
+ 'ast;': '*',
+ 'asymp;': '\u2248',
+ 'asympeq;': '\u224d',
+ 'Atilde': '\xc3',
+ 'atilde': '\xe3',
+ 'Atilde;': '\xc3',
+ 'atilde;': '\xe3',
+ 'Auml': '\xc4',
+ 'auml': '\xe4',
+ 'Auml;': '\xc4',
+ 'auml;': '\xe4',
+ 'awconint;': '\u2233',
+ 'awint;': '\u2a11',
+ 'backcong;': '\u224c',
+ 'backepsilon;': '\u03f6',
+ 'backprime;': '\u2035',
+ 'backsim;': '\u223d',
+ 'backsimeq;': '\u22cd',
+ 'Backslash;': '\u2216',
+ 'Barv;': '\u2ae7',
+ 'barvee;': '\u22bd',
+ 'Barwed;': '\u2306',
+ 'barwed;': '\u2305',
+ 'barwedge;': '\u2305',
+ 'bbrk;': '\u23b5',
+ 'bbrktbrk;': '\u23b6',
+ 'bcong;': '\u224c',
+ 'Bcy;': '\u0411',
+ 'bcy;': '\u0431',
+ 'bdquo;': '\u201e',
+ 'becaus;': '\u2235',
+ 'Because;': '\u2235',
+ 'because;': '\u2235',
+ 'bemptyv;': '\u29b0',
+ 'bepsi;': '\u03f6',
+ 'bernou;': '\u212c',
+ 'Bernoullis;': '\u212c',
+ 'Beta;': '\u0392',
+ 'beta;': '\u03b2',
+ 'beth;': '\u2136',
+ 'between;': '\u226c',
+ 'Bfr;': '\U0001d505',
+ 'bfr;': '\U0001d51f',
+ 'bigcap;': '\u22c2',
+ 'bigcirc;': '\u25ef',
+ 'bigcup;': '\u22c3',
+ 'bigodot;': '\u2a00',
+ 'bigoplus;': '\u2a01',
+ 'bigotimes;': '\u2a02',
+ 'bigsqcup;': '\u2a06',
+ 'bigstar;': '\u2605',
+ 'bigtriangledown;': '\u25bd',
+ 'bigtriangleup;': '\u25b3',
+ 'biguplus;': '\u2a04',
+ 'bigvee;': '\u22c1',
+ 'bigwedge;': '\u22c0',
+ 'bkarow;': '\u290d',
+ 'blacklozenge;': '\u29eb',
+ 'blacksquare;': '\u25aa',
+ 'blacktriangle;': '\u25b4',
+ 'blacktriangledown;': '\u25be',
+ 'blacktriangleleft;': '\u25c2',
+ 'blacktriangleright;': '\u25b8',
+ 'blank;': '\u2423',
+ 'blk12;': '\u2592',
+ 'blk14;': '\u2591',
+ 'blk34;': '\u2593',
+ 'block;': '\u2588',
+ 'bne;': '=\u20e5',
+ 'bnequiv;': '\u2261\u20e5',
+ 'bNot;': '\u2aed',
+ 'bnot;': '\u2310',
+ 'Bopf;': '\U0001d539',
+ 'bopf;': '\U0001d553',
+ 'bot;': '\u22a5',
+ 'bottom;': '\u22a5',
+ 'bowtie;': '\u22c8',
+ 'boxbox;': '\u29c9',
+ 'boxDL;': '\u2557',
+ 'boxDl;': '\u2556',
+ 'boxdL;': '\u2555',
+ 'boxdl;': '\u2510',
+ 'boxDR;': '\u2554',
+ 'boxDr;': '\u2553',
+ 'boxdR;': '\u2552',
+ 'boxdr;': '\u250c',
+ 'boxH;': '\u2550',
+ 'boxh;': '\u2500',
+ 'boxHD;': '\u2566',
+ 'boxHd;': '\u2564',
+ 'boxhD;': '\u2565',
+ 'boxhd;': '\u252c',
+ 'boxHU;': '\u2569',
+ 'boxHu;': '\u2567',
+ 'boxhU;': '\u2568',
+ 'boxhu;': '\u2534',
+ 'boxminus;': '\u229f',
+ 'boxplus;': '\u229e',
+ 'boxtimes;': '\u22a0',
+ 'boxUL;': '\u255d',
+ 'boxUl;': '\u255c',
+ 'boxuL;': '\u255b',
+ 'boxul;': '\u2518',
+ 'boxUR;': '\u255a',
+ 'boxUr;': '\u2559',
+ 'boxuR;': '\u2558',
+ 'boxur;': '\u2514',
+ 'boxV;': '\u2551',
+ 'boxv;': '\u2502',
+ 'boxVH;': '\u256c',
+ 'boxVh;': '\u256b',
+ 'boxvH;': '\u256a',
+ 'boxvh;': '\u253c',
+ 'boxVL;': '\u2563',
+ 'boxVl;': '\u2562',
+ 'boxvL;': '\u2561',
+ 'boxvl;': '\u2524',
+ 'boxVR;': '\u2560',
+ 'boxVr;': '\u255f',
+ 'boxvR;': '\u255e',
+ 'boxvr;': '\u251c',
+ 'bprime;': '\u2035',
+ 'Breve;': '\u02d8',
+ 'breve;': '\u02d8',
+ 'brvbar': '\xa6',
+ 'brvbar;': '\xa6',
+ 'Bscr;': '\u212c',
+ 'bscr;': '\U0001d4b7',
+ 'bsemi;': '\u204f',
+ 'bsim;': '\u223d',
+ 'bsime;': '\u22cd',
+ 'bsol;': '\\',
+ 'bsolb;': '\u29c5',
+ 'bsolhsub;': '\u27c8',
+ 'bull;': '\u2022',
+ 'bullet;': '\u2022',
+ 'bump;': '\u224e',
+ 'bumpE;': '\u2aae',
+ 'bumpe;': '\u224f',
+ 'Bumpeq;': '\u224e',
+ 'bumpeq;': '\u224f',
+ 'Cacute;': '\u0106',
+ 'cacute;': '\u0107',
+ 'Cap;': '\u22d2',
+ 'cap;': '\u2229',
+ 'capand;': '\u2a44',
+ 'capbrcup;': '\u2a49',
+ 'capcap;': '\u2a4b',
+ 'capcup;': '\u2a47',
+ 'capdot;': '\u2a40',
+ 'CapitalDifferentialD;': '\u2145',
+ 'caps;': '\u2229\ufe00',
+ 'caret;': '\u2041',
+ 'caron;': '\u02c7',
+ 'Cayleys;': '\u212d',
+ 'ccaps;': '\u2a4d',
+ 'Ccaron;': '\u010c',
+ 'ccaron;': '\u010d',
+ 'Ccedil': '\xc7',
+ 'ccedil': '\xe7',
+ 'Ccedil;': '\xc7',
+ 'ccedil;': '\xe7',
+ 'Ccirc;': '\u0108',
+ 'ccirc;': '\u0109',
+ 'Cconint;': '\u2230',
+ 'ccups;': '\u2a4c',
+ 'ccupssm;': '\u2a50',
+ 'Cdot;': '\u010a',
+ 'cdot;': '\u010b',
+ 'cedil': '\xb8',
+ 'cedil;': '\xb8',
+ 'Cedilla;': '\xb8',
+ 'cemptyv;': '\u29b2',
+ 'cent': '\xa2',
+ 'cent;': '\xa2',
+ 'CenterDot;': '\xb7',
+ 'centerdot;': '\xb7',
+ 'Cfr;': '\u212d',
+ 'cfr;': '\U0001d520',
+ 'CHcy;': '\u0427',
+ 'chcy;': '\u0447',
+ 'check;': '\u2713',
+ 'checkmark;': '\u2713',
+ 'Chi;': '\u03a7',
+ 'chi;': '\u03c7',
+ 'cir;': '\u25cb',
+ 'circ;': '\u02c6',
+ 'circeq;': '\u2257',
+ 'circlearrowleft;': '\u21ba',
+ 'circlearrowright;': '\u21bb',
+ 'circledast;': '\u229b',
+ 'circledcirc;': '\u229a',
+ 'circleddash;': '\u229d',
+ 'CircleDot;': '\u2299',
+ 'circledR;': '\xae',
+ 'circledS;': '\u24c8',
+ 'CircleMinus;': '\u2296',
+ 'CirclePlus;': '\u2295',
+ 'CircleTimes;': '\u2297',
+ 'cirE;': '\u29c3',
+ 'cire;': '\u2257',
+ 'cirfnint;': '\u2a10',
+ 'cirmid;': '\u2aef',
+ 'cirscir;': '\u29c2',
+ 'ClockwiseContourIntegral;': '\u2232',
+ 'CloseCurlyDoubleQuote;': '\u201d',
+ 'CloseCurlyQuote;': '\u2019',
+ 'clubs;': '\u2663',
+ 'clubsuit;': '\u2663',
+ 'Colon;': '\u2237',
+ 'colon;': ':',
+ 'Colone;': '\u2a74',
+ 'colone;': '\u2254',
+ 'coloneq;': '\u2254',
+ 'comma;': ',',
+ 'commat;': '@',
+ 'comp;': '\u2201',
+ 'compfn;': '\u2218',
+ 'complement;': '\u2201',
+ 'complexes;': '\u2102',
+ 'cong;': '\u2245',
+ 'congdot;': '\u2a6d',
+ 'Congruent;': '\u2261',
+ 'Conint;': '\u222f',
+ 'conint;': '\u222e',
+ 'ContourIntegral;': '\u222e',
+ 'Copf;': '\u2102',
+ 'copf;': '\U0001d554',
+ 'coprod;': '\u2210',
+ 'Coproduct;': '\u2210',
+ 'COPY': '\xa9',
+ 'copy': '\xa9',
+ 'COPY;': '\xa9',
+ 'copy;': '\xa9',
+ 'copysr;': '\u2117',
+ 'CounterClockwiseContourIntegral;': '\u2233',
+ 'crarr;': '\u21b5',
+ 'Cross;': '\u2a2f',
+ 'cross;': '\u2717',
+ 'Cscr;': '\U0001d49e',
+ 'cscr;': '\U0001d4b8',
+ 'csub;': '\u2acf',
+ 'csube;': '\u2ad1',
+ 'csup;': '\u2ad0',
+ 'csupe;': '\u2ad2',
+ 'ctdot;': '\u22ef',
+ 'cudarrl;': '\u2938',
+ 'cudarrr;': '\u2935',
+ 'cuepr;': '\u22de',
+ 'cuesc;': '\u22df',
+ 'cularr;': '\u21b6',
+ 'cularrp;': '\u293d',
+ 'Cup;': '\u22d3',
+ 'cup;': '\u222a',
+ 'cupbrcap;': '\u2a48',
+ 'CupCap;': '\u224d',
+ 'cupcap;': '\u2a46',
+ 'cupcup;': '\u2a4a',
+ 'cupdot;': '\u228d',
+ 'cupor;': '\u2a45',
+ 'cups;': '\u222a\ufe00',
+ 'curarr;': '\u21b7',
+ 'curarrm;': '\u293c',
+ 'curlyeqprec;': '\u22de',
+ 'curlyeqsucc;': '\u22df',
+ 'curlyvee;': '\u22ce',
+ 'curlywedge;': '\u22cf',
+ 'curren': '\xa4',
+ 'curren;': '\xa4',
+ 'curvearrowleft;': '\u21b6',
+ 'curvearrowright;': '\u21b7',
+ 'cuvee;': '\u22ce',
+ 'cuwed;': '\u22cf',
+ 'cwconint;': '\u2232',
+ 'cwint;': '\u2231',
+ 'cylcty;': '\u232d',
+ 'Dagger;': '\u2021',
+ 'dagger;': '\u2020',
+ 'daleth;': '\u2138',
+ 'Darr;': '\u21a1',
+ 'dArr;': '\u21d3',
+ 'darr;': '\u2193',
+ 'dash;': '\u2010',
+ 'Dashv;': '\u2ae4',
+ 'dashv;': '\u22a3',
+ 'dbkarow;': '\u290f',
+ 'dblac;': '\u02dd',
+ 'Dcaron;': '\u010e',
+ 'dcaron;': '\u010f',
+ 'Dcy;': '\u0414',
+ 'dcy;': '\u0434',
+ 'DD;': '\u2145',
+ 'dd;': '\u2146',
+ 'ddagger;': '\u2021',
+ 'ddarr;': '\u21ca',
+ 'DDotrahd;': '\u2911',
+ 'ddotseq;': '\u2a77',
+ 'deg': '\xb0',
+ 'deg;': '\xb0',
+ 'Del;': '\u2207',
+ 'Delta;': '\u0394',
+ 'delta;': '\u03b4',
+ 'demptyv;': '\u29b1',
+ 'dfisht;': '\u297f',
+ 'Dfr;': '\U0001d507',
+ 'dfr;': '\U0001d521',
+ 'dHar;': '\u2965',
+ 'dharl;': '\u21c3',
+ 'dharr;': '\u21c2',
+ 'DiacriticalAcute;': '\xb4',
+ 'DiacriticalDot;': '\u02d9',
+ 'DiacriticalDoubleAcute;': '\u02dd',
+ 'DiacriticalGrave;': '`',
+ 'DiacriticalTilde;': '\u02dc',
+ 'diam;': '\u22c4',
+ 'Diamond;': '\u22c4',
+ 'diamond;': '\u22c4',
+ 'diamondsuit;': '\u2666',
+ 'diams;': '\u2666',
+ 'die;': '\xa8',
+ 'DifferentialD;': '\u2146',
+ 'digamma;': '\u03dd',
+ 'disin;': '\u22f2',
+ 'div;': '\xf7',
+ 'divide': '\xf7',
+ 'divide;': '\xf7',
+ 'divideontimes;': '\u22c7',
+ 'divonx;': '\u22c7',
+ 'DJcy;': '\u0402',
+ 'djcy;': '\u0452',
+ 'dlcorn;': '\u231e',
+ 'dlcrop;': '\u230d',
+ 'dollar;': '$',
+ 'Dopf;': '\U0001d53b',
+ 'dopf;': '\U0001d555',
+ 'Dot;': '\xa8',
+ 'dot;': '\u02d9',
+ 'DotDot;': '\u20dc',
+ 'doteq;': '\u2250',
+ 'doteqdot;': '\u2251',
+ 'DotEqual;': '\u2250',
+ 'dotminus;': '\u2238',
+ 'dotplus;': '\u2214',
+ 'dotsquare;': '\u22a1',
+ 'doublebarwedge;': '\u2306',
+ 'DoubleContourIntegral;': '\u222f',
+ 'DoubleDot;': '\xa8',
+ 'DoubleDownArrow;': '\u21d3',
+ 'DoubleLeftArrow;': '\u21d0',
+ 'DoubleLeftRightArrow;': '\u21d4',
+ 'DoubleLeftTee;': '\u2ae4',
+ 'DoubleLongLeftArrow;': '\u27f8',
+ 'DoubleLongLeftRightArrow;': '\u27fa',
+ 'DoubleLongRightArrow;': '\u27f9',
+ 'DoubleRightArrow;': '\u21d2',
+ 'DoubleRightTee;': '\u22a8',
+ 'DoubleUpArrow;': '\u21d1',
+ 'DoubleUpDownArrow;': '\u21d5',
+ 'DoubleVerticalBar;': '\u2225',
+ 'DownArrow;': '\u2193',
+ 'Downarrow;': '\u21d3',
+ 'downarrow;': '\u2193',
+ 'DownArrowBar;': '\u2913',
+ 'DownArrowUpArrow;': '\u21f5',
+ 'DownBreve;': '\u0311',
+ 'downdownarrows;': '\u21ca',
+ 'downharpoonleft;': '\u21c3',
+ 'downharpoonright;': '\u21c2',
+ 'DownLeftRightVector;': '\u2950',
+ 'DownLeftTeeVector;': '\u295e',
+ 'DownLeftVector;': '\u21bd',
+ 'DownLeftVectorBar;': '\u2956',
+ 'DownRightTeeVector;': '\u295f',
+ 'DownRightVector;': '\u21c1',
+ 'DownRightVectorBar;': '\u2957',
+ 'DownTee;': '\u22a4',
+ 'DownTeeArrow;': '\u21a7',
+ 'drbkarow;': '\u2910',
+ 'drcorn;': '\u231f',
+ 'drcrop;': '\u230c',
+ 'Dscr;': '\U0001d49f',
+ 'dscr;': '\U0001d4b9',
+ 'DScy;': '\u0405',
+ 'dscy;': '\u0455',
+ 'dsol;': '\u29f6',
+ 'Dstrok;': '\u0110',
+ 'dstrok;': '\u0111',
+ 'dtdot;': '\u22f1',
+ 'dtri;': '\u25bf',
+ 'dtrif;': '\u25be',
+ 'duarr;': '\u21f5',
+ 'duhar;': '\u296f',
+ 'dwangle;': '\u29a6',
+ 'DZcy;': '\u040f',
+ 'dzcy;': '\u045f',
+ 'dzigrarr;': '\u27ff',
+ 'Eacute': '\xc9',
+ 'eacute': '\xe9',
+ 'Eacute;': '\xc9',
+ 'eacute;': '\xe9',
+ 'easter;': '\u2a6e',
+ 'Ecaron;': '\u011a',
+ 'ecaron;': '\u011b',
+ 'ecir;': '\u2256',
+ 'Ecirc': '\xca',
+ 'ecirc': '\xea',
+ 'Ecirc;': '\xca',
+ 'ecirc;': '\xea',
+ 'ecolon;': '\u2255',
+ 'Ecy;': '\u042d',
+ 'ecy;': '\u044d',
+ 'eDDot;': '\u2a77',
+ 'Edot;': '\u0116',
+ 'eDot;': '\u2251',
+ 'edot;': '\u0117',
+ 'ee;': '\u2147',
+ 'efDot;': '\u2252',
+ 'Efr;': '\U0001d508',
+ 'efr;': '\U0001d522',
+ 'eg;': '\u2a9a',
+ 'Egrave': '\xc8',
+ 'egrave': '\xe8',
+ 'Egrave;': '\xc8',
+ 'egrave;': '\xe8',
+ 'egs;': '\u2a96',
+ 'egsdot;': '\u2a98',
+ 'el;': '\u2a99',
+ 'Element;': '\u2208',
+ 'elinters;': '\u23e7',
+ 'ell;': '\u2113',
+ 'els;': '\u2a95',
+ 'elsdot;': '\u2a97',
+ 'Emacr;': '\u0112',
+ 'emacr;': '\u0113',
+ 'empty;': '\u2205',
+ 'emptyset;': '\u2205',
+ 'EmptySmallSquare;': '\u25fb',
+ 'emptyv;': '\u2205',
+ 'EmptyVerySmallSquare;': '\u25ab',
+ 'emsp13;': '\u2004',
+ 'emsp14;': '\u2005',
+ 'emsp;': '\u2003',
+ 'ENG;': '\u014a',
+ 'eng;': '\u014b',
+ 'ensp;': '\u2002',
+ 'Eogon;': '\u0118',
+ 'eogon;': '\u0119',
+ 'Eopf;': '\U0001d53c',
+ 'eopf;': '\U0001d556',
+ 'epar;': '\u22d5',
+ 'eparsl;': '\u29e3',
+ 'eplus;': '\u2a71',
+ 'epsi;': '\u03b5',
+ 'Epsilon;': '\u0395',
+ 'epsilon;': '\u03b5',
+ 'epsiv;': '\u03f5',
+ 'eqcirc;': '\u2256',
+ 'eqcolon;': '\u2255',
+ 'eqsim;': '\u2242',
+ 'eqslantgtr;': '\u2a96',
+ 'eqslantless;': '\u2a95',
+ 'Equal;': '\u2a75',
+ 'equals;': '=',
+ 'EqualTilde;': '\u2242',
+ 'equest;': '\u225f',
+ 'Equilibrium;': '\u21cc',
+ 'equiv;': '\u2261',
+ 'equivDD;': '\u2a78',
+ 'eqvparsl;': '\u29e5',
+ 'erarr;': '\u2971',
+ 'erDot;': '\u2253',
+ 'Escr;': '\u2130',
+ 'escr;': '\u212f',
+ 'esdot;': '\u2250',
+ 'Esim;': '\u2a73',
+ 'esim;': '\u2242',
+ 'Eta;': '\u0397',
+ 'eta;': '\u03b7',
+ 'ETH': '\xd0',
+ 'eth': '\xf0',
+ 'ETH;': '\xd0',
+ 'eth;': '\xf0',
+ 'Euml': '\xcb',
+ 'euml': '\xeb',
+ 'Euml;': '\xcb',
+ 'euml;': '\xeb',
+ 'euro;': '\u20ac',
+ 'excl;': '!',
+ 'exist;': '\u2203',
+ 'Exists;': '\u2203',
+ 'expectation;': '\u2130',
+ 'ExponentialE;': '\u2147',
+ 'exponentiale;': '\u2147',
+ 'fallingdotseq;': '\u2252',
+ 'Fcy;': '\u0424',
+ 'fcy;': '\u0444',
+ 'female;': '\u2640',
+ 'ffilig;': '\ufb03',
+ 'fflig;': '\ufb00',
+ 'ffllig;': '\ufb04',
+ 'Ffr;': '\U0001d509',
+ 'ffr;': '\U0001d523',
+ 'filig;': '\ufb01',
+ 'FilledSmallSquare;': '\u25fc',
+ 'FilledVerySmallSquare;': '\u25aa',
+ 'fjlig;': 'fj',
+ 'flat;': '\u266d',
+ 'fllig;': '\ufb02',
+ 'fltns;': '\u25b1',
+ 'fnof;': '\u0192',
+ 'Fopf;': '\U0001d53d',
+ 'fopf;': '\U0001d557',
+ 'ForAll;': '\u2200',
+ 'forall;': '\u2200',
+ 'fork;': '\u22d4',
+ 'forkv;': '\u2ad9',
+ 'Fouriertrf;': '\u2131',
+ 'fpartint;': '\u2a0d',
+ 'frac12': '\xbd',
+ 'frac12;': '\xbd',
+ 'frac13;': '\u2153',
+ 'frac14': '\xbc',
+ 'frac14;': '\xbc',
+ 'frac15;': '\u2155',
+ 'frac16;': '\u2159',
+ 'frac18;': '\u215b',
+ 'frac23;': '\u2154',
+ 'frac25;': '\u2156',
+ 'frac34': '\xbe',
+ 'frac34;': '\xbe',
+ 'frac35;': '\u2157',
+ 'frac38;': '\u215c',
+ 'frac45;': '\u2158',
+ 'frac56;': '\u215a',
+ 'frac58;': '\u215d',
+ 'frac78;': '\u215e',
+ 'frasl;': '\u2044',
+ 'frown;': '\u2322',
+ 'Fscr;': '\u2131',
+ 'fscr;': '\U0001d4bb',
+ 'gacute;': '\u01f5',
+ 'Gamma;': '\u0393',
+ 'gamma;': '\u03b3',
+ 'Gammad;': '\u03dc',
+ 'gammad;': '\u03dd',
+ 'gap;': '\u2a86',
+ 'Gbreve;': '\u011e',
+ 'gbreve;': '\u011f',
+ 'Gcedil;': '\u0122',
+ 'Gcirc;': '\u011c',
+ 'gcirc;': '\u011d',
+ 'Gcy;': '\u0413',
+ 'gcy;': '\u0433',
+ 'Gdot;': '\u0120',
+ 'gdot;': '\u0121',
+ 'gE;': '\u2267',
+ 'ge;': '\u2265',
+ 'gEl;': '\u2a8c',
+ 'gel;': '\u22db',
+ 'geq;': '\u2265',
+ 'geqq;': '\u2267',
+ 'geqslant;': '\u2a7e',
+ 'ges;': '\u2a7e',
+ 'gescc;': '\u2aa9',
+ 'gesdot;': '\u2a80',
+ 'gesdoto;': '\u2a82',
+ 'gesdotol;': '\u2a84',
+ 'gesl;': '\u22db\ufe00',
+ 'gesles;': '\u2a94',
+ 'Gfr;': '\U0001d50a',
+ 'gfr;': '\U0001d524',
+ 'Gg;': '\u22d9',
+ 'gg;': '\u226b',
+ 'ggg;': '\u22d9',
+ 'gimel;': '\u2137',
+ 'GJcy;': '\u0403',
+ 'gjcy;': '\u0453',
+ 'gl;': '\u2277',
+ 'gla;': '\u2aa5',
+ 'glE;': '\u2a92',
+ 'glj;': '\u2aa4',
+ 'gnap;': '\u2a8a',
+ 'gnapprox;': '\u2a8a',
+ 'gnE;': '\u2269',
+ 'gne;': '\u2a88',
+ 'gneq;': '\u2a88',
+ 'gneqq;': '\u2269',
+ 'gnsim;': '\u22e7',
+ 'Gopf;': '\U0001d53e',
+ 'gopf;': '\U0001d558',
+ 'grave;': '`',
+ 'GreaterEqual;': '\u2265',
+ 'GreaterEqualLess;': '\u22db',
+ 'GreaterFullEqual;': '\u2267',
+ 'GreaterGreater;': '\u2aa2',
+ 'GreaterLess;': '\u2277',
+ 'GreaterSlantEqual;': '\u2a7e',
+ 'GreaterTilde;': '\u2273',
+ 'Gscr;': '\U0001d4a2',
+ 'gscr;': '\u210a',
+ 'gsim;': '\u2273',
+ 'gsime;': '\u2a8e',
+ 'gsiml;': '\u2a90',
+ 'GT': '>',
+ 'gt': '>',
+ 'GT;': '>',
+ 'Gt;': '\u226b',
+ 'gt;': '>',
+ 'gtcc;': '\u2aa7',
+ 'gtcir;': '\u2a7a',
+ 'gtdot;': '\u22d7',
+ 'gtlPar;': '\u2995',
+ 'gtquest;': '\u2a7c',
+ 'gtrapprox;': '\u2a86',
+ 'gtrarr;': '\u2978',
+ 'gtrdot;': '\u22d7',
+ 'gtreqless;': '\u22db',
+ 'gtreqqless;': '\u2a8c',
+ 'gtrless;': '\u2277',
+ 'gtrsim;': '\u2273',
+ 'gvertneqq;': '\u2269\ufe00',
+ 'gvnE;': '\u2269\ufe00',
+ 'Hacek;': '\u02c7',
+ 'hairsp;': '\u200a',
+ 'half;': '\xbd',
+ 'hamilt;': '\u210b',
+ 'HARDcy;': '\u042a',
+ 'hardcy;': '\u044a',
+ 'hArr;': '\u21d4',
+ 'harr;': '\u2194',
+ 'harrcir;': '\u2948',
+ 'harrw;': '\u21ad',
+ 'Hat;': '^',
+ 'hbar;': '\u210f',
+ 'Hcirc;': '\u0124',
+ 'hcirc;': '\u0125',
+ 'hearts;': '\u2665',
+ 'heartsuit;': '\u2665',
+ 'hellip;': '\u2026',
+ 'hercon;': '\u22b9',
+ 'Hfr;': '\u210c',
+ 'hfr;': '\U0001d525',
+ 'HilbertSpace;': '\u210b',
+ 'hksearow;': '\u2925',
+ 'hkswarow;': '\u2926',
+ 'hoarr;': '\u21ff',
+ 'homtht;': '\u223b',
+ 'hookleftarrow;': '\u21a9',
+ 'hookrightarrow;': '\u21aa',
+ 'Hopf;': '\u210d',
+ 'hopf;': '\U0001d559',
+ 'horbar;': '\u2015',
+ 'HorizontalLine;': '\u2500',
+ 'Hscr;': '\u210b',
+ 'hscr;': '\U0001d4bd',
+ 'hslash;': '\u210f',
+ 'Hstrok;': '\u0126',
+ 'hstrok;': '\u0127',
+ 'HumpDownHump;': '\u224e',
+ 'HumpEqual;': '\u224f',
+ 'hybull;': '\u2043',
+ 'hyphen;': '\u2010',
+ 'Iacute': '\xcd',
+ 'iacute': '\xed',
+ 'Iacute;': '\xcd',
+ 'iacute;': '\xed',
+ 'ic;': '\u2063',
+ 'Icirc': '\xce',
+ 'icirc': '\xee',
+ 'Icirc;': '\xce',
+ 'icirc;': '\xee',
+ 'Icy;': '\u0418',
+ 'icy;': '\u0438',
+ 'Idot;': '\u0130',
+ 'IEcy;': '\u0415',
+ 'iecy;': '\u0435',
+ 'iexcl': '\xa1',
+ 'iexcl;': '\xa1',
+ 'iff;': '\u21d4',
+ 'Ifr;': '\u2111',
+ 'ifr;': '\U0001d526',
+ 'Igrave': '\xcc',
+ 'igrave': '\xec',
+ 'Igrave;': '\xcc',
+ 'igrave;': '\xec',
+ 'ii;': '\u2148',
+ 'iiiint;': '\u2a0c',
+ 'iiint;': '\u222d',
+ 'iinfin;': '\u29dc',
+ 'iiota;': '\u2129',
+ 'IJlig;': '\u0132',
+ 'ijlig;': '\u0133',
+ 'Im;': '\u2111',
+ 'Imacr;': '\u012a',
+ 'imacr;': '\u012b',
+ 'image;': '\u2111',
+ 'ImaginaryI;': '\u2148',
+ 'imagline;': '\u2110',
+ 'imagpart;': '\u2111',
+ 'imath;': '\u0131',
+ 'imof;': '\u22b7',
+ 'imped;': '\u01b5',
+ 'Implies;': '\u21d2',
+ 'in;': '\u2208',
+ 'incare;': '\u2105',
+ 'infin;': '\u221e',
+ 'infintie;': '\u29dd',
+ 'inodot;': '\u0131',
+ 'Int;': '\u222c',
+ 'int;': '\u222b',
+ 'intcal;': '\u22ba',
+ 'integers;': '\u2124',
+ 'Integral;': '\u222b',
+ 'intercal;': '\u22ba',
+ 'Intersection;': '\u22c2',
+ 'intlarhk;': '\u2a17',
+ 'intprod;': '\u2a3c',
+ 'InvisibleComma;': '\u2063',
+ 'InvisibleTimes;': '\u2062',
+ 'IOcy;': '\u0401',
+ 'iocy;': '\u0451',
+ 'Iogon;': '\u012e',
+ 'iogon;': '\u012f',
+ 'Iopf;': '\U0001d540',
+ 'iopf;': '\U0001d55a',
+ 'Iota;': '\u0399',
+ 'iota;': '\u03b9',
+ 'iprod;': '\u2a3c',
+ 'iquest': '\xbf',
+ 'iquest;': '\xbf',
+ 'Iscr;': '\u2110',
+ 'iscr;': '\U0001d4be',
+ 'isin;': '\u2208',
+ 'isindot;': '\u22f5',
+ 'isinE;': '\u22f9',
+ 'isins;': '\u22f4',
+ 'isinsv;': '\u22f3',
+ 'isinv;': '\u2208',
+ 'it;': '\u2062',
+ 'Itilde;': '\u0128',
+ 'itilde;': '\u0129',
+ 'Iukcy;': '\u0406',
+ 'iukcy;': '\u0456',
+ 'Iuml': '\xcf',
+ 'iuml': '\xef',
+ 'Iuml;': '\xcf',
+ 'iuml;': '\xef',
+ 'Jcirc;': '\u0134',
+ 'jcirc;': '\u0135',
+ 'Jcy;': '\u0419',
+ 'jcy;': '\u0439',
+ 'Jfr;': '\U0001d50d',
+ 'jfr;': '\U0001d527',
+ 'jmath;': '\u0237',
+ 'Jopf;': '\U0001d541',
+ 'jopf;': '\U0001d55b',
+ 'Jscr;': '\U0001d4a5',
+ 'jscr;': '\U0001d4bf',
+ 'Jsercy;': '\u0408',
+ 'jsercy;': '\u0458',
+ 'Jukcy;': '\u0404',
+ 'jukcy;': '\u0454',
+ 'Kappa;': '\u039a',
+ 'kappa;': '\u03ba',
+ 'kappav;': '\u03f0',
+ 'Kcedil;': '\u0136',
+ 'kcedil;': '\u0137',
+ 'Kcy;': '\u041a',
+ 'kcy;': '\u043a',
+ 'Kfr;': '\U0001d50e',
+ 'kfr;': '\U0001d528',
+ 'kgreen;': '\u0138',
+ 'KHcy;': '\u0425',
+ 'khcy;': '\u0445',
+ 'KJcy;': '\u040c',
+ 'kjcy;': '\u045c',
+ 'Kopf;': '\U0001d542',
+ 'kopf;': '\U0001d55c',
+ 'Kscr;': '\U0001d4a6',
+ 'kscr;': '\U0001d4c0',
+ 'lAarr;': '\u21da',
+ 'Lacute;': '\u0139',
+ 'lacute;': '\u013a',
+ 'laemptyv;': '\u29b4',
+ 'lagran;': '\u2112',
+ 'Lambda;': '\u039b',
+ 'lambda;': '\u03bb',
+ 'Lang;': '\u27ea',
+ 'lang;': '\u27e8',
+ 'langd;': '\u2991',
+ 'langle;': '\u27e8',
+ 'lap;': '\u2a85',
+ 'Laplacetrf;': '\u2112',
+ 'laquo': '\xab',
+ 'laquo;': '\xab',
+ 'Larr;': '\u219e',
+ 'lArr;': '\u21d0',
+ 'larr;': '\u2190',
+ 'larrb;': '\u21e4',
+ 'larrbfs;': '\u291f',
+ 'larrfs;': '\u291d',
+ 'larrhk;': '\u21a9',
+ 'larrlp;': '\u21ab',
+ 'larrpl;': '\u2939',
+ 'larrsim;': '\u2973',
+ 'larrtl;': '\u21a2',
+ 'lat;': '\u2aab',
+ 'lAtail;': '\u291b',
+ 'latail;': '\u2919',
+ 'late;': '\u2aad',
+ 'lates;': '\u2aad\ufe00',
+ 'lBarr;': '\u290e',
+ 'lbarr;': '\u290c',
+ 'lbbrk;': '\u2772',
+ 'lbrace;': '{',
+ 'lbrack;': '[',
+ 'lbrke;': '\u298b',
+ 'lbrksld;': '\u298f',
+ 'lbrkslu;': '\u298d',
+ 'Lcaron;': '\u013d',
+ 'lcaron;': '\u013e',
+ 'Lcedil;': '\u013b',
+ 'lcedil;': '\u013c',
+ 'lceil;': '\u2308',
+ 'lcub;': '{',
+ 'Lcy;': '\u041b',
+ 'lcy;': '\u043b',
+ 'ldca;': '\u2936',
+ 'ldquo;': '\u201c',
+ 'ldquor;': '\u201e',
+ 'ldrdhar;': '\u2967',
+ 'ldrushar;': '\u294b',
+ 'ldsh;': '\u21b2',
+ 'lE;': '\u2266',
+ 'le;': '\u2264',
+ 'LeftAngleBracket;': '\u27e8',
+ 'LeftArrow;': '\u2190',
+ 'Leftarrow;': '\u21d0',
+ 'leftarrow;': '\u2190',
+ 'LeftArrowBar;': '\u21e4',
+ 'LeftArrowRightArrow;': '\u21c6',
+ 'leftarrowtail;': '\u21a2',
+ 'LeftCeiling;': '\u2308',
+ 'LeftDoubleBracket;': '\u27e6',
+ 'LeftDownTeeVector;': '\u2961',
+ 'LeftDownVector;': '\u21c3',
+ 'LeftDownVectorBar;': '\u2959',
+ 'LeftFloor;': '\u230a',
+ 'leftharpoondown;': '\u21bd',
+ 'leftharpoonup;': '\u21bc',
+ 'leftleftarrows;': '\u21c7',
+ 'LeftRightArrow;': '\u2194',
+ 'Leftrightarrow;': '\u21d4',
+ 'leftrightarrow;': '\u2194',
+ 'leftrightarrows;': '\u21c6',
+ 'leftrightharpoons;': '\u21cb',
+ 'leftrightsquigarrow;': '\u21ad',
+ 'LeftRightVector;': '\u294e',
+ 'LeftTee;': '\u22a3',
+ 'LeftTeeArrow;': '\u21a4',
+ 'LeftTeeVector;': '\u295a',
+ 'leftthreetimes;': '\u22cb',
+ 'LeftTriangle;': '\u22b2',
+ 'LeftTriangleBar;': '\u29cf',
+ 'LeftTriangleEqual;': '\u22b4',
+ 'LeftUpDownVector;': '\u2951',
+ 'LeftUpTeeVector;': '\u2960',
+ 'LeftUpVector;': '\u21bf',
+ 'LeftUpVectorBar;': '\u2958',
+ 'LeftVector;': '\u21bc',
+ 'LeftVectorBar;': '\u2952',
+ 'lEg;': '\u2a8b',
+ 'leg;': '\u22da',
+ 'leq;': '\u2264',
+ 'leqq;': '\u2266',
+ 'leqslant;': '\u2a7d',
+ 'les;': '\u2a7d',
+ 'lescc;': '\u2aa8',
+ 'lesdot;': '\u2a7f',
+ 'lesdoto;': '\u2a81',
+ 'lesdotor;': '\u2a83',
+ 'lesg;': '\u22da\ufe00',
+ 'lesges;': '\u2a93',
+ 'lessapprox;': '\u2a85',
+ 'lessdot;': '\u22d6',
+ 'lesseqgtr;': '\u22da',
+ 'lesseqqgtr;': '\u2a8b',
+ 'LessEqualGreater;': '\u22da',
+ 'LessFullEqual;': '\u2266',
+ 'LessGreater;': '\u2276',
+ 'lessgtr;': '\u2276',
+ 'LessLess;': '\u2aa1',
+ 'lesssim;': '\u2272',
+ 'LessSlantEqual;': '\u2a7d',
+ 'LessTilde;': '\u2272',
+ 'lfisht;': '\u297c',
+ 'lfloor;': '\u230a',
+ 'Lfr;': '\U0001d50f',
+ 'lfr;': '\U0001d529',
+ 'lg;': '\u2276',
+ 'lgE;': '\u2a91',
+ 'lHar;': '\u2962',
+ 'lhard;': '\u21bd',
+ 'lharu;': '\u21bc',
+ 'lharul;': '\u296a',
+ 'lhblk;': '\u2584',
+ 'LJcy;': '\u0409',
+ 'ljcy;': '\u0459',
+ 'Ll;': '\u22d8',
+ 'll;': '\u226a',
+ 'llarr;': '\u21c7',
+ 'llcorner;': '\u231e',
+ 'Lleftarrow;': '\u21da',
+ 'llhard;': '\u296b',
+ 'lltri;': '\u25fa',
+ 'Lmidot;': '\u013f',
+ 'lmidot;': '\u0140',
+ 'lmoust;': '\u23b0',
+ 'lmoustache;': '\u23b0',
+ 'lnap;': '\u2a89',
+ 'lnapprox;': '\u2a89',
+ 'lnE;': '\u2268',
+ 'lne;': '\u2a87',
+ 'lneq;': '\u2a87',
+ 'lneqq;': '\u2268',
+ 'lnsim;': '\u22e6',
+ 'loang;': '\u27ec',
+ 'loarr;': '\u21fd',
+ 'lobrk;': '\u27e6',
+ 'LongLeftArrow;': '\u27f5',
+ 'Longleftarrow;': '\u27f8',
+ 'longleftarrow;': '\u27f5',
+ 'LongLeftRightArrow;': '\u27f7',
+ 'Longleftrightarrow;': '\u27fa',
+ 'longleftrightarrow;': '\u27f7',
+ 'longmapsto;': '\u27fc',
+ 'LongRightArrow;': '\u27f6',
+ 'Longrightarrow;': '\u27f9',
+ 'longrightarrow;': '\u27f6',
+ 'looparrowleft;': '\u21ab',
+ 'looparrowright;': '\u21ac',
+ 'lopar;': '\u2985',
+ 'Lopf;': '\U0001d543',
+ 'lopf;': '\U0001d55d',
+ 'loplus;': '\u2a2d',
+ 'lotimes;': '\u2a34',
+ 'lowast;': '\u2217',
+ 'lowbar;': '_',
+ 'LowerLeftArrow;': '\u2199',
+ 'LowerRightArrow;': '\u2198',
+ 'loz;': '\u25ca',
+ 'lozenge;': '\u25ca',
+ 'lozf;': '\u29eb',
+ 'lpar;': '(',
+ 'lparlt;': '\u2993',
+ 'lrarr;': '\u21c6',
+ 'lrcorner;': '\u231f',
+ 'lrhar;': '\u21cb',
+ 'lrhard;': '\u296d',
+ 'lrm;': '\u200e',
+ 'lrtri;': '\u22bf',
+ 'lsaquo;': '\u2039',
+ 'Lscr;': '\u2112',
+ 'lscr;': '\U0001d4c1',
+ 'Lsh;': '\u21b0',
+ 'lsh;': '\u21b0',
+ 'lsim;': '\u2272',
+ 'lsime;': '\u2a8d',
+ 'lsimg;': '\u2a8f',
+ 'lsqb;': '[',
+ 'lsquo;': '\u2018',
+ 'lsquor;': '\u201a',
+ 'Lstrok;': '\u0141',
+ 'lstrok;': '\u0142',
+ 'LT': '<',
+ 'lt': '<',
+ 'LT;': '<',
+ 'Lt;': '\u226a',
+ 'lt;': '<',
+ 'ltcc;': '\u2aa6',
+ 'ltcir;': '\u2a79',
+ 'ltdot;': '\u22d6',
+ 'lthree;': '\u22cb',
+ 'ltimes;': '\u22c9',
+ 'ltlarr;': '\u2976',
+ 'ltquest;': '\u2a7b',
+ 'ltri;': '\u25c3',
+ 'ltrie;': '\u22b4',
+ 'ltrif;': '\u25c2',
+ 'ltrPar;': '\u2996',
+ 'lurdshar;': '\u294a',
+ 'luruhar;': '\u2966',
+ 'lvertneqq;': '\u2268\ufe00',
+ 'lvnE;': '\u2268\ufe00',
+ 'macr': '\xaf',
+ 'macr;': '\xaf',
+ 'male;': '\u2642',
+ 'malt;': '\u2720',
+ 'maltese;': '\u2720',
+ 'Map;': '\u2905',
+ 'map;': '\u21a6',
+ 'mapsto;': '\u21a6',
+ 'mapstodown;': '\u21a7',
+ 'mapstoleft;': '\u21a4',
+ 'mapstoup;': '\u21a5',
+ 'marker;': '\u25ae',
+ 'mcomma;': '\u2a29',
+ 'Mcy;': '\u041c',
+ 'mcy;': '\u043c',
+ 'mdash;': '\u2014',
+ 'mDDot;': '\u223a',
+ 'measuredangle;': '\u2221',
+ 'MediumSpace;': '\u205f',
+ 'Mellintrf;': '\u2133',
+ 'Mfr;': '\U0001d510',
+ 'mfr;': '\U0001d52a',
+ 'mho;': '\u2127',
+ 'micro': '\xb5',
+ 'micro;': '\xb5',
+ 'mid;': '\u2223',
+ 'midast;': '*',
+ 'midcir;': '\u2af0',
+ 'middot': '\xb7',
+ 'middot;': '\xb7',
+ 'minus;': '\u2212',
+ 'minusb;': '\u229f',
+ 'minusd;': '\u2238',
+ 'minusdu;': '\u2a2a',
+ 'MinusPlus;': '\u2213',
+ 'mlcp;': '\u2adb',
+ 'mldr;': '\u2026',
+ 'mnplus;': '\u2213',
+ 'models;': '\u22a7',
+ 'Mopf;': '\U0001d544',
+ 'mopf;': '\U0001d55e',
+ 'mp;': '\u2213',
+ 'Mscr;': '\u2133',
+ 'mscr;': '\U0001d4c2',
+ 'mstpos;': '\u223e',
+ 'Mu;': '\u039c',
+ 'mu;': '\u03bc',
+ 'multimap;': '\u22b8',
+ 'mumap;': '\u22b8',
+ 'nabla;': '\u2207',
+ 'Nacute;': '\u0143',
+ 'nacute;': '\u0144',
+ 'nang;': '\u2220\u20d2',
+ 'nap;': '\u2249',
+ 'napE;': '\u2a70\u0338',
+ 'napid;': '\u224b\u0338',
+ 'napos;': '\u0149',
+ 'napprox;': '\u2249',
+ 'natur;': '\u266e',
+ 'natural;': '\u266e',
+ 'naturals;': '\u2115',
+ 'nbsp': '\xa0',
+ 'nbsp;': '\xa0',
+ 'nbump;': '\u224e\u0338',
+ 'nbumpe;': '\u224f\u0338',
+ 'ncap;': '\u2a43',
+ 'Ncaron;': '\u0147',
+ 'ncaron;': '\u0148',
+ 'Ncedil;': '\u0145',
+ 'ncedil;': '\u0146',
+ 'ncong;': '\u2247',
+ 'ncongdot;': '\u2a6d\u0338',
+ 'ncup;': '\u2a42',
+ 'Ncy;': '\u041d',
+ 'ncy;': '\u043d',
+ 'ndash;': '\u2013',
+ 'ne;': '\u2260',
+ 'nearhk;': '\u2924',
+ 'neArr;': '\u21d7',
+ 'nearr;': '\u2197',
+ 'nearrow;': '\u2197',
+ 'nedot;': '\u2250\u0338',
+ 'NegativeMediumSpace;': '\u200b',
+ 'NegativeThickSpace;': '\u200b',
+ 'NegativeThinSpace;': '\u200b',
+ 'NegativeVeryThinSpace;': '\u200b',
+ 'nequiv;': '\u2262',
+ 'nesear;': '\u2928',
+ 'nesim;': '\u2242\u0338',
+ 'NestedGreaterGreater;': '\u226b',
+ 'NestedLessLess;': '\u226a',
+ 'NewLine;': '\n',
+ 'nexist;': '\u2204',
+ 'nexists;': '\u2204',
+ 'Nfr;': '\U0001d511',
+ 'nfr;': '\U0001d52b',
+ 'ngE;': '\u2267\u0338',
+ 'nge;': '\u2271',
+ 'ngeq;': '\u2271',
+ 'ngeqq;': '\u2267\u0338',
+ 'ngeqslant;': '\u2a7e\u0338',
+ 'nges;': '\u2a7e\u0338',
+ 'nGg;': '\u22d9\u0338',
+ 'ngsim;': '\u2275',
+ 'nGt;': '\u226b\u20d2',
+ 'ngt;': '\u226f',
+ 'ngtr;': '\u226f',
+ 'nGtv;': '\u226b\u0338',
+ 'nhArr;': '\u21ce',
+ 'nharr;': '\u21ae',
+ 'nhpar;': '\u2af2',
+ 'ni;': '\u220b',
+ 'nis;': '\u22fc',
+ 'nisd;': '\u22fa',
+ 'niv;': '\u220b',
+ 'NJcy;': '\u040a',
+ 'njcy;': '\u045a',
+ 'nlArr;': '\u21cd',
+ 'nlarr;': '\u219a',
+ 'nldr;': '\u2025',
+ 'nlE;': '\u2266\u0338',
+ 'nle;': '\u2270',
+ 'nLeftarrow;': '\u21cd',
+ 'nleftarrow;': '\u219a',
+ 'nLeftrightarrow;': '\u21ce',
+ 'nleftrightarrow;': '\u21ae',
+ 'nleq;': '\u2270',
+ 'nleqq;': '\u2266\u0338',
+ 'nleqslant;': '\u2a7d\u0338',
+ 'nles;': '\u2a7d\u0338',
+ 'nless;': '\u226e',
+ 'nLl;': '\u22d8\u0338',
+ 'nlsim;': '\u2274',
+ 'nLt;': '\u226a\u20d2',
+ 'nlt;': '\u226e',
+ 'nltri;': '\u22ea',
+ 'nltrie;': '\u22ec',
+ 'nLtv;': '\u226a\u0338',
+ 'nmid;': '\u2224',
+ 'NoBreak;': '\u2060',
+ 'NonBreakingSpace;': '\xa0',
+ 'Nopf;': '\u2115',
+ 'nopf;': '\U0001d55f',
+ 'not': '\xac',
+ 'Not;': '\u2aec',
+ 'not;': '\xac',
+ 'NotCongruent;': '\u2262',
+ 'NotCupCap;': '\u226d',
+ 'NotDoubleVerticalBar;': '\u2226',
+ 'NotElement;': '\u2209',
+ 'NotEqual;': '\u2260',
+ 'NotEqualTilde;': '\u2242\u0338',
+ 'NotExists;': '\u2204',
+ 'NotGreater;': '\u226f',
+ 'NotGreaterEqual;': '\u2271',
+ 'NotGreaterFullEqual;': '\u2267\u0338',
+ 'NotGreaterGreater;': '\u226b\u0338',
+ 'NotGreaterLess;': '\u2279',
+ 'NotGreaterSlantEqual;': '\u2a7e\u0338',
+ 'NotGreaterTilde;': '\u2275',
+ 'NotHumpDownHump;': '\u224e\u0338',
+ 'NotHumpEqual;': '\u224f\u0338',
+ 'notin;': '\u2209',
+ 'notindot;': '\u22f5\u0338',
+ 'notinE;': '\u22f9\u0338',
+ 'notinva;': '\u2209',
+ 'notinvb;': '\u22f7',
+ 'notinvc;': '\u22f6',
+ 'NotLeftTriangle;': '\u22ea',
+ 'NotLeftTriangleBar;': '\u29cf\u0338',
+ 'NotLeftTriangleEqual;': '\u22ec',
+ 'NotLess;': '\u226e',
+ 'NotLessEqual;': '\u2270',
+ 'NotLessGreater;': '\u2278',
+ 'NotLessLess;': '\u226a\u0338',
+ 'NotLessSlantEqual;': '\u2a7d\u0338',
+ 'NotLessTilde;': '\u2274',
+ 'NotNestedGreaterGreater;': '\u2aa2\u0338',
+ 'NotNestedLessLess;': '\u2aa1\u0338',
+ 'notni;': '\u220c',
+ 'notniva;': '\u220c',
+ 'notnivb;': '\u22fe',
+ 'notnivc;': '\u22fd',
+ 'NotPrecedes;': '\u2280',
+ 'NotPrecedesEqual;': '\u2aaf\u0338',
+ 'NotPrecedesSlantEqual;': '\u22e0',
+ 'NotReverseElement;': '\u220c',
+ 'NotRightTriangle;': '\u22eb',
+ 'NotRightTriangleBar;': '\u29d0\u0338',
+ 'NotRightTriangleEqual;': '\u22ed',
+ 'NotSquareSubset;': '\u228f\u0338',
+ 'NotSquareSubsetEqual;': '\u22e2',
+ 'NotSquareSuperset;': '\u2290\u0338',
+ 'NotSquareSupersetEqual;': '\u22e3',
+ 'NotSubset;': '\u2282\u20d2',
+ 'NotSubsetEqual;': '\u2288',
+ 'NotSucceeds;': '\u2281',
+ 'NotSucceedsEqual;': '\u2ab0\u0338',
+ 'NotSucceedsSlantEqual;': '\u22e1',
+ 'NotSucceedsTilde;': '\u227f\u0338',
+ 'NotSuperset;': '\u2283\u20d2',
+ 'NotSupersetEqual;': '\u2289',
+ 'NotTilde;': '\u2241',
+ 'NotTildeEqual;': '\u2244',
+ 'NotTildeFullEqual;': '\u2247',
+ 'NotTildeTilde;': '\u2249',
+ 'NotVerticalBar;': '\u2224',
+ 'npar;': '\u2226',
+ 'nparallel;': '\u2226',
+ 'nparsl;': '\u2afd\u20e5',
+ 'npart;': '\u2202\u0338',
+ 'npolint;': '\u2a14',
+ 'npr;': '\u2280',
+ 'nprcue;': '\u22e0',
+ 'npre;': '\u2aaf\u0338',
+ 'nprec;': '\u2280',
+ 'npreceq;': '\u2aaf\u0338',
+ 'nrArr;': '\u21cf',
+ 'nrarr;': '\u219b',
+ 'nrarrc;': '\u2933\u0338',
+ 'nrarrw;': '\u219d\u0338',
+ 'nRightarrow;': '\u21cf',
+ 'nrightarrow;': '\u219b',
+ 'nrtri;': '\u22eb',
+ 'nrtrie;': '\u22ed',
+ 'nsc;': '\u2281',
+ 'nsccue;': '\u22e1',
+ 'nsce;': '\u2ab0\u0338',
+ 'Nscr;': '\U0001d4a9',
+ 'nscr;': '\U0001d4c3',
+ 'nshortmid;': '\u2224',
+ 'nshortparallel;': '\u2226',
+ 'nsim;': '\u2241',
+ 'nsime;': '\u2244',
+ 'nsimeq;': '\u2244',
+ 'nsmid;': '\u2224',
+ 'nspar;': '\u2226',
+ 'nsqsube;': '\u22e2',
+ 'nsqsupe;': '\u22e3',
+ 'nsub;': '\u2284',
+ 'nsubE;': '\u2ac5\u0338',
+ 'nsube;': '\u2288',
+ 'nsubset;': '\u2282\u20d2',
+ 'nsubseteq;': '\u2288',
+ 'nsubseteqq;': '\u2ac5\u0338',
+ 'nsucc;': '\u2281',
+ 'nsucceq;': '\u2ab0\u0338',
+ 'nsup;': '\u2285',
+ 'nsupE;': '\u2ac6\u0338',
+ 'nsupe;': '\u2289',
+ 'nsupset;': '\u2283\u20d2',
+ 'nsupseteq;': '\u2289',
+ 'nsupseteqq;': '\u2ac6\u0338',
+ 'ntgl;': '\u2279',
+ 'Ntilde': '\xd1',
+ 'ntilde': '\xf1',
+ 'Ntilde;': '\xd1',
+ 'ntilde;': '\xf1',
+ 'ntlg;': '\u2278',
+ 'ntriangleleft;': '\u22ea',
+ 'ntrianglelefteq;': '\u22ec',
+ 'ntriangleright;': '\u22eb',
+ 'ntrianglerighteq;': '\u22ed',
+ 'Nu;': '\u039d',
+ 'nu;': '\u03bd',
+ 'num;': '#',
+ 'numero;': '\u2116',
+ 'numsp;': '\u2007',
+ 'nvap;': '\u224d\u20d2',
+ 'nVDash;': '\u22af',
+ 'nVdash;': '\u22ae',
+ 'nvDash;': '\u22ad',
+ 'nvdash;': '\u22ac',
+ 'nvge;': '\u2265\u20d2',
+ 'nvgt;': '>\u20d2',
+ 'nvHarr;': '\u2904',
+ 'nvinfin;': '\u29de',
+ 'nvlArr;': '\u2902',
+ 'nvle;': '\u2264\u20d2',
+ 'nvlt;': '<\u20d2',
+ 'nvltrie;': '\u22b4\u20d2',
+ 'nvrArr;': '\u2903',
+ 'nvrtrie;': '\u22b5\u20d2',
+ 'nvsim;': '\u223c\u20d2',
+ 'nwarhk;': '\u2923',
+ 'nwArr;': '\u21d6',
+ 'nwarr;': '\u2196',
+ 'nwarrow;': '\u2196',
+ 'nwnear;': '\u2927',
+ 'Oacute': '\xd3',
+ 'oacute': '\xf3',
+ 'Oacute;': '\xd3',
+ 'oacute;': '\xf3',
+ 'oast;': '\u229b',
+ 'ocir;': '\u229a',
+ 'Ocirc': '\xd4',
+ 'ocirc': '\xf4',
+ 'Ocirc;': '\xd4',
+ 'ocirc;': '\xf4',
+ 'Ocy;': '\u041e',
+ 'ocy;': '\u043e',
+ 'odash;': '\u229d',
+ 'Odblac;': '\u0150',
+ 'odblac;': '\u0151',
+ 'odiv;': '\u2a38',
+ 'odot;': '\u2299',
+ 'odsold;': '\u29bc',
+ 'OElig;': '\u0152',
+ 'oelig;': '\u0153',
+ 'ofcir;': '\u29bf',
+ 'Ofr;': '\U0001d512',
+ 'ofr;': '\U0001d52c',
+ 'ogon;': '\u02db',
+ 'Ograve': '\xd2',
+ 'ograve': '\xf2',
+ 'Ograve;': '\xd2',
+ 'ograve;': '\xf2',
+ 'ogt;': '\u29c1',
+ 'ohbar;': '\u29b5',
+ 'ohm;': '\u03a9',
+ 'oint;': '\u222e',
+ 'olarr;': '\u21ba',
+ 'olcir;': '\u29be',
+ 'olcross;': '\u29bb',
+ 'oline;': '\u203e',
+ 'olt;': '\u29c0',
+ 'Omacr;': '\u014c',
+ 'omacr;': '\u014d',
+ 'Omega;': '\u03a9',
+ 'omega;': '\u03c9',
+ 'Omicron;': '\u039f',
+ 'omicron;': '\u03bf',
+ 'omid;': '\u29b6',
+ 'ominus;': '\u2296',
+ 'Oopf;': '\U0001d546',
+ 'oopf;': '\U0001d560',
+ 'opar;': '\u29b7',
+ 'OpenCurlyDoubleQuote;': '\u201c',
+ 'OpenCurlyQuote;': '\u2018',
+ 'operp;': '\u29b9',
+ 'oplus;': '\u2295',
+ 'Or;': '\u2a54',
+ 'or;': '\u2228',
+ 'orarr;': '\u21bb',
+ 'ord;': '\u2a5d',
+ 'order;': '\u2134',
+ 'orderof;': '\u2134',
+ 'ordf': '\xaa',
+ 'ordf;': '\xaa',
+ 'ordm': '\xba',
+ 'ordm;': '\xba',
+ 'origof;': '\u22b6',
+ 'oror;': '\u2a56',
+ 'orslope;': '\u2a57',
+ 'orv;': '\u2a5b',
+ 'oS;': '\u24c8',
+ 'Oscr;': '\U0001d4aa',
+ 'oscr;': '\u2134',
+ 'Oslash': '\xd8',
+ 'oslash': '\xf8',
+ 'Oslash;': '\xd8',
+ 'oslash;': '\xf8',
+ 'osol;': '\u2298',
+ 'Otilde': '\xd5',
+ 'otilde': '\xf5',
+ 'Otilde;': '\xd5',
+ 'otilde;': '\xf5',
+ 'Otimes;': '\u2a37',
+ 'otimes;': '\u2297',
+ 'otimesas;': '\u2a36',
+ 'Ouml': '\xd6',
+ 'ouml': '\xf6',
+ 'Ouml;': '\xd6',
+ 'ouml;': '\xf6',
+ 'ovbar;': '\u233d',
+ 'OverBar;': '\u203e',
+ 'OverBrace;': '\u23de',
+ 'OverBracket;': '\u23b4',
+ 'OverParenthesis;': '\u23dc',
+ 'par;': '\u2225',
+ 'para': '\xb6',
+ 'para;': '\xb6',
+ 'parallel;': '\u2225',
+ 'parsim;': '\u2af3',
+ 'parsl;': '\u2afd',
+ 'part;': '\u2202',
+ 'PartialD;': '\u2202',
+ 'Pcy;': '\u041f',
+ 'pcy;': '\u043f',
+ 'percnt;': '%',
+ 'period;': '.',
+ 'permil;': '\u2030',
+ 'perp;': '\u22a5',
+ 'pertenk;': '\u2031',
+ 'Pfr;': '\U0001d513',
+ 'pfr;': '\U0001d52d',
+ 'Phi;': '\u03a6',
+ 'phi;': '\u03c6',
+ 'phiv;': '\u03d5',
+ 'phmmat;': '\u2133',
+ 'phone;': '\u260e',
+ 'Pi;': '\u03a0',
+ 'pi;': '\u03c0',
+ 'pitchfork;': '\u22d4',
+ 'piv;': '\u03d6',
+ 'planck;': '\u210f',
+ 'planckh;': '\u210e',
+ 'plankv;': '\u210f',
+ 'plus;': '+',
+ 'plusacir;': '\u2a23',
+ 'plusb;': '\u229e',
+ 'pluscir;': '\u2a22',
+ 'plusdo;': '\u2214',
+ 'plusdu;': '\u2a25',
+ 'pluse;': '\u2a72',
+ 'PlusMinus;': '\xb1',
+ 'plusmn': '\xb1',
+ 'plusmn;': '\xb1',
+ 'plussim;': '\u2a26',
+ 'plustwo;': '\u2a27',
+ 'pm;': '\xb1',
+ 'Poincareplane;': '\u210c',
+ 'pointint;': '\u2a15',
+ 'Popf;': '\u2119',
+ 'popf;': '\U0001d561',
+ 'pound': '\xa3',
+ 'pound;': '\xa3',
+ 'Pr;': '\u2abb',
+ 'pr;': '\u227a',
+ 'prap;': '\u2ab7',
+ 'prcue;': '\u227c',
+ 'prE;': '\u2ab3',
+ 'pre;': '\u2aaf',
+ 'prec;': '\u227a',
+ 'precapprox;': '\u2ab7',
+ 'preccurlyeq;': '\u227c',
+ 'Precedes;': '\u227a',
+ 'PrecedesEqual;': '\u2aaf',
+ 'PrecedesSlantEqual;': '\u227c',
+ 'PrecedesTilde;': '\u227e',
+ 'preceq;': '\u2aaf',
+ 'precnapprox;': '\u2ab9',
+ 'precneqq;': '\u2ab5',
+ 'precnsim;': '\u22e8',
+ 'precsim;': '\u227e',
+ 'Prime;': '\u2033',
+ 'prime;': '\u2032',
+ 'primes;': '\u2119',
+ 'prnap;': '\u2ab9',
+ 'prnE;': '\u2ab5',
+ 'prnsim;': '\u22e8',
+ 'prod;': '\u220f',
+ 'Product;': '\u220f',
+ 'profalar;': '\u232e',
+ 'profline;': '\u2312',
+ 'profsurf;': '\u2313',
+ 'prop;': '\u221d',
+ 'Proportion;': '\u2237',
+ 'Proportional;': '\u221d',
+ 'propto;': '\u221d',
+ 'prsim;': '\u227e',
+ 'prurel;': '\u22b0',
+ 'Pscr;': '\U0001d4ab',
+ 'pscr;': '\U0001d4c5',
+ 'Psi;': '\u03a8',
+ 'psi;': '\u03c8',
+ 'puncsp;': '\u2008',
+ 'Qfr;': '\U0001d514',
+ 'qfr;': '\U0001d52e',
+ 'qint;': '\u2a0c',
+ 'Qopf;': '\u211a',
+ 'qopf;': '\U0001d562',
+ 'qprime;': '\u2057',
+ 'Qscr;': '\U0001d4ac',
+ 'qscr;': '\U0001d4c6',
+ 'quaternions;': '\u210d',
+ 'quatint;': '\u2a16',
+ 'quest;': '?',
+ 'questeq;': '\u225f',
+ 'QUOT': '"',
+ 'quot': '"',
+ 'QUOT;': '"',
+ 'quot;': '"',
+ 'rAarr;': '\u21db',
+ 'race;': '\u223d\u0331',
+ 'Racute;': '\u0154',
+ 'racute;': '\u0155',
+ 'radic;': '\u221a',
+ 'raemptyv;': '\u29b3',
+ 'Rang;': '\u27eb',
+ 'rang;': '\u27e9',
+ 'rangd;': '\u2992',
+ 'range;': '\u29a5',
+ 'rangle;': '\u27e9',
+ 'raquo': '\xbb',
+ 'raquo;': '\xbb',
+ 'Rarr;': '\u21a0',
+ 'rArr;': '\u21d2',
+ 'rarr;': '\u2192',
+ 'rarrap;': '\u2975',
+ 'rarrb;': '\u21e5',
+ 'rarrbfs;': '\u2920',
+ 'rarrc;': '\u2933',
+ 'rarrfs;': '\u291e',
+ 'rarrhk;': '\u21aa',
+ 'rarrlp;': '\u21ac',
+ 'rarrpl;': '\u2945',
+ 'rarrsim;': '\u2974',
+ 'Rarrtl;': '\u2916',
+ 'rarrtl;': '\u21a3',
+ 'rarrw;': '\u219d',
+ 'rAtail;': '\u291c',
+ 'ratail;': '\u291a',
+ 'ratio;': '\u2236',
+ 'rationals;': '\u211a',
+ 'RBarr;': '\u2910',
+ 'rBarr;': '\u290f',
+ 'rbarr;': '\u290d',
+ 'rbbrk;': '\u2773',
+ 'rbrace;': '}',
+ 'rbrack;': ']',
+ 'rbrke;': '\u298c',
+ 'rbrksld;': '\u298e',
+ 'rbrkslu;': '\u2990',
+ 'Rcaron;': '\u0158',
+ 'rcaron;': '\u0159',
+ 'Rcedil;': '\u0156',
+ 'rcedil;': '\u0157',
+ 'rceil;': '\u2309',
+ 'rcub;': '}',
+ 'Rcy;': '\u0420',
+ 'rcy;': '\u0440',
+ 'rdca;': '\u2937',
+ 'rdldhar;': '\u2969',
+ 'rdquo;': '\u201d',
+ 'rdquor;': '\u201d',
+ 'rdsh;': '\u21b3',
+ 'Re;': '\u211c',
+ 'real;': '\u211c',
+ 'realine;': '\u211b',
+ 'realpart;': '\u211c',
+ 'reals;': '\u211d',
+ 'rect;': '\u25ad',
+ 'REG': '\xae',
+ 'reg': '\xae',
+ 'REG;': '\xae',
+ 'reg;': '\xae',
+ 'ReverseElement;': '\u220b',
+ 'ReverseEquilibrium;': '\u21cb',
+ 'ReverseUpEquilibrium;': '\u296f',
+ 'rfisht;': '\u297d',
+ 'rfloor;': '\u230b',
+ 'Rfr;': '\u211c',
+ 'rfr;': '\U0001d52f',
+ 'rHar;': '\u2964',
+ 'rhard;': '\u21c1',
+ 'rharu;': '\u21c0',
+ 'rharul;': '\u296c',
+ 'Rho;': '\u03a1',
+ 'rho;': '\u03c1',
+ 'rhov;': '\u03f1',
+ 'RightAngleBracket;': '\u27e9',
+ 'RightArrow;': '\u2192',
+ 'Rightarrow;': '\u21d2',
+ 'rightarrow;': '\u2192',
+ 'RightArrowBar;': '\u21e5',
+ 'RightArrowLeftArrow;': '\u21c4',
+ 'rightarrowtail;': '\u21a3',
+ 'RightCeiling;': '\u2309',
+ 'RightDoubleBracket;': '\u27e7',
+ 'RightDownTeeVector;': '\u295d',
+ 'RightDownVector;': '\u21c2',
+ 'RightDownVectorBar;': '\u2955',
+ 'RightFloor;': '\u230b',
+ 'rightharpoondown;': '\u21c1',
+ 'rightharpoonup;': '\u21c0',
+ 'rightleftarrows;': '\u21c4',
+ 'rightleftharpoons;': '\u21cc',
+ 'rightrightarrows;': '\u21c9',
+ 'rightsquigarrow;': '\u219d',
+ 'RightTee;': '\u22a2',
+ 'RightTeeArrow;': '\u21a6',
+ 'RightTeeVector;': '\u295b',
+ 'rightthreetimes;': '\u22cc',
+ 'RightTriangle;': '\u22b3',
+ 'RightTriangleBar;': '\u29d0',
+ 'RightTriangleEqual;': '\u22b5',
+ 'RightUpDownVector;': '\u294f',
+ 'RightUpTeeVector;': '\u295c',
+ 'RightUpVector;': '\u21be',
+ 'RightUpVectorBar;': '\u2954',
+ 'RightVector;': '\u21c0',
+ 'RightVectorBar;': '\u2953',
+ 'ring;': '\u02da',
+ 'risingdotseq;': '\u2253',
+ 'rlarr;': '\u21c4',
+ 'rlhar;': '\u21cc',
+ 'rlm;': '\u200f',
+ 'rmoust;': '\u23b1',
+ 'rmoustache;': '\u23b1',
+ 'rnmid;': '\u2aee',
+ 'roang;': '\u27ed',
+ 'roarr;': '\u21fe',
+ 'robrk;': '\u27e7',
+ 'ropar;': '\u2986',
+ 'Ropf;': '\u211d',
+ 'ropf;': '\U0001d563',
+ 'roplus;': '\u2a2e',
+ 'rotimes;': '\u2a35',
+ 'RoundImplies;': '\u2970',
+ 'rpar;': ')',
+ 'rpargt;': '\u2994',
+ 'rppolint;': '\u2a12',
+ 'rrarr;': '\u21c9',
+ 'Rrightarrow;': '\u21db',
+ 'rsaquo;': '\u203a',
+ 'Rscr;': '\u211b',
+ 'rscr;': '\U0001d4c7',
+ 'Rsh;': '\u21b1',
+ 'rsh;': '\u21b1',
+ 'rsqb;': ']',
+ 'rsquo;': '\u2019',
+ 'rsquor;': '\u2019',
+ 'rthree;': '\u22cc',
+ 'rtimes;': '\u22ca',
+ 'rtri;': '\u25b9',
+ 'rtrie;': '\u22b5',
+ 'rtrif;': '\u25b8',
+ 'rtriltri;': '\u29ce',
+ 'RuleDelayed;': '\u29f4',
+ 'ruluhar;': '\u2968',
+ 'rx;': '\u211e',
+ 'Sacute;': '\u015a',
+ 'sacute;': '\u015b',
+ 'sbquo;': '\u201a',
+ 'Sc;': '\u2abc',
+ 'sc;': '\u227b',
+ 'scap;': '\u2ab8',
+ 'Scaron;': '\u0160',
+ 'scaron;': '\u0161',
+ 'sccue;': '\u227d',
+ 'scE;': '\u2ab4',
+ 'sce;': '\u2ab0',
+ 'Scedil;': '\u015e',
+ 'scedil;': '\u015f',
+ 'Scirc;': '\u015c',
+ 'scirc;': '\u015d',
+ 'scnap;': '\u2aba',
+ 'scnE;': '\u2ab6',
+ 'scnsim;': '\u22e9',
+ 'scpolint;': '\u2a13',
+ 'scsim;': '\u227f',
+ 'Scy;': '\u0421',
+ 'scy;': '\u0441',
+ 'sdot;': '\u22c5',
+ 'sdotb;': '\u22a1',
+ 'sdote;': '\u2a66',
+ 'searhk;': '\u2925',
+ 'seArr;': '\u21d8',
+ 'searr;': '\u2198',
+ 'searrow;': '\u2198',
+ 'sect': '\xa7',
+ 'sect;': '\xa7',
+ 'semi;': ';',
+ 'seswar;': '\u2929',
+ 'setminus;': '\u2216',
+ 'setmn;': '\u2216',
+ 'sext;': '\u2736',
+ 'Sfr;': '\U0001d516',
+ 'sfr;': '\U0001d530',
+ 'sfrown;': '\u2322',
+ 'sharp;': '\u266f',
+ 'SHCHcy;': '\u0429',
+ 'shchcy;': '\u0449',
+ 'SHcy;': '\u0428',
+ 'shcy;': '\u0448',
+ 'ShortDownArrow;': '\u2193',
+ 'ShortLeftArrow;': '\u2190',
+ 'shortmid;': '\u2223',
+ 'shortparallel;': '\u2225',
+ 'ShortRightArrow;': '\u2192',
+ 'ShortUpArrow;': '\u2191',
+ 'shy': '\xad',
+ 'shy;': '\xad',
+ 'Sigma;': '\u03a3',
+ 'sigma;': '\u03c3',
+ 'sigmaf;': '\u03c2',
+ 'sigmav;': '\u03c2',
+ 'sim;': '\u223c',
+ 'simdot;': '\u2a6a',
+ 'sime;': '\u2243',
+ 'simeq;': '\u2243',
+ 'simg;': '\u2a9e',
+ 'simgE;': '\u2aa0',
+ 'siml;': '\u2a9d',
+ 'simlE;': '\u2a9f',
+ 'simne;': '\u2246',
+ 'simplus;': '\u2a24',
+ 'simrarr;': '\u2972',
+ 'slarr;': '\u2190',
+ 'SmallCircle;': '\u2218',
+ 'smallsetminus;': '\u2216',
+ 'smashp;': '\u2a33',
+ 'smeparsl;': '\u29e4',
+ 'smid;': '\u2223',
+ 'smile;': '\u2323',
+ 'smt;': '\u2aaa',
+ 'smte;': '\u2aac',
+ 'smtes;': '\u2aac\ufe00',
+ 'SOFTcy;': '\u042c',
+ 'softcy;': '\u044c',
+ 'sol;': '/',
+ 'solb;': '\u29c4',
+ 'solbar;': '\u233f',
+ 'Sopf;': '\U0001d54a',
+ 'sopf;': '\U0001d564',
+ 'spades;': '\u2660',
+ 'spadesuit;': '\u2660',
+ 'spar;': '\u2225',
+ 'sqcap;': '\u2293',
+ 'sqcaps;': '\u2293\ufe00',
+ 'sqcup;': '\u2294',
+ 'sqcups;': '\u2294\ufe00',
+ 'Sqrt;': '\u221a',
+ 'sqsub;': '\u228f',
+ 'sqsube;': '\u2291',
+ 'sqsubset;': '\u228f',
+ 'sqsubseteq;': '\u2291',
+ 'sqsup;': '\u2290',
+ 'sqsupe;': '\u2292',
+ 'sqsupset;': '\u2290',
+ 'sqsupseteq;': '\u2292',
+ 'squ;': '\u25a1',
+ 'Square;': '\u25a1',
+ 'square;': '\u25a1',
+ 'SquareIntersection;': '\u2293',
+ 'SquareSubset;': '\u228f',
+ 'SquareSubsetEqual;': '\u2291',
+ 'SquareSuperset;': '\u2290',
+ 'SquareSupersetEqual;': '\u2292',
+ 'SquareUnion;': '\u2294',
+ 'squarf;': '\u25aa',
+ 'squf;': '\u25aa',
+ 'srarr;': '\u2192',
+ 'Sscr;': '\U0001d4ae',
+ 'sscr;': '\U0001d4c8',
+ 'ssetmn;': '\u2216',
+ 'ssmile;': '\u2323',
+ 'sstarf;': '\u22c6',
+ 'Star;': '\u22c6',
+ 'star;': '\u2606',
+ 'starf;': '\u2605',
+ 'straightepsilon;': '\u03f5',
+ 'straightphi;': '\u03d5',
+ 'strns;': '\xaf',
+ 'Sub;': '\u22d0',
+ 'sub;': '\u2282',
+ 'subdot;': '\u2abd',
+ 'subE;': '\u2ac5',
+ 'sube;': '\u2286',
+ 'subedot;': '\u2ac3',
+ 'submult;': '\u2ac1',
+ 'subnE;': '\u2acb',
+ 'subne;': '\u228a',
+ 'subplus;': '\u2abf',
+ 'subrarr;': '\u2979',
+ 'Subset;': '\u22d0',
+ 'subset;': '\u2282',
+ 'subseteq;': '\u2286',
+ 'subseteqq;': '\u2ac5',
+ 'SubsetEqual;': '\u2286',
+ 'subsetneq;': '\u228a',
+ 'subsetneqq;': '\u2acb',
+ 'subsim;': '\u2ac7',
+ 'subsub;': '\u2ad5',
+ 'subsup;': '\u2ad3',
+ 'succ;': '\u227b',
+ 'succapprox;': '\u2ab8',
+ 'succcurlyeq;': '\u227d',
+ 'Succeeds;': '\u227b',
+ 'SucceedsEqual;': '\u2ab0',
+ 'SucceedsSlantEqual;': '\u227d',
+ 'SucceedsTilde;': '\u227f',
+ 'succeq;': '\u2ab0',
+ 'succnapprox;': '\u2aba',
+ 'succneqq;': '\u2ab6',
+ 'succnsim;': '\u22e9',
+ 'succsim;': '\u227f',
+ 'SuchThat;': '\u220b',
+ 'Sum;': '\u2211',
+ 'sum;': '\u2211',
+ 'sung;': '\u266a',
+ 'sup1': '\xb9',
+ 'sup1;': '\xb9',
+ 'sup2': '\xb2',
+ 'sup2;': '\xb2',
+ 'sup3': '\xb3',
+ 'sup3;': '\xb3',
+ 'Sup;': '\u22d1',
+ 'sup;': '\u2283',
+ 'supdot;': '\u2abe',
+ 'supdsub;': '\u2ad8',
+ 'supE;': '\u2ac6',
+ 'supe;': '\u2287',
+ 'supedot;': '\u2ac4',
+ 'Superset;': '\u2283',
+ 'SupersetEqual;': '\u2287',
+ 'suphsol;': '\u27c9',
+ 'suphsub;': '\u2ad7',
+ 'suplarr;': '\u297b',
+ 'supmult;': '\u2ac2',
+ 'supnE;': '\u2acc',
+ 'supne;': '\u228b',
+ 'supplus;': '\u2ac0',
+ 'Supset;': '\u22d1',
+ 'supset;': '\u2283',
+ 'supseteq;': '\u2287',
+ 'supseteqq;': '\u2ac6',
+ 'supsetneq;': '\u228b',
+ 'supsetneqq;': '\u2acc',
+ 'supsim;': '\u2ac8',
+ 'supsub;': '\u2ad4',
+ 'supsup;': '\u2ad6',
+ 'swarhk;': '\u2926',
+ 'swArr;': '\u21d9',
+ 'swarr;': '\u2199',
+ 'swarrow;': '\u2199',
+ 'swnwar;': '\u292a',
+ 'szlig': '\xdf',
+ 'szlig;': '\xdf',
+ 'Tab;': '\t',
+ 'target;': '\u2316',
+ 'Tau;': '\u03a4',
+ 'tau;': '\u03c4',
+ 'tbrk;': '\u23b4',
+ 'Tcaron;': '\u0164',
+ 'tcaron;': '\u0165',
+ 'Tcedil;': '\u0162',
+ 'tcedil;': '\u0163',
+ 'Tcy;': '\u0422',
+ 'tcy;': '\u0442',
+ 'tdot;': '\u20db',
+ 'telrec;': '\u2315',
+ 'Tfr;': '\U0001d517',
+ 'tfr;': '\U0001d531',
+ 'there4;': '\u2234',
+ 'Therefore;': '\u2234',
+ 'therefore;': '\u2234',
+ 'Theta;': '\u0398',
+ 'theta;': '\u03b8',
+ 'thetasym;': '\u03d1',
+ 'thetav;': '\u03d1',
+ 'thickapprox;': '\u2248',
+ 'thicksim;': '\u223c',
+ 'ThickSpace;': '\u205f\u200a',
+ 'thinsp;': '\u2009',
+ 'ThinSpace;': '\u2009',
+ 'thkap;': '\u2248',
+ 'thksim;': '\u223c',
+ 'THORN': '\xde',
+ 'thorn': '\xfe',
+ 'THORN;': '\xde',
+ 'thorn;': '\xfe',
+ 'Tilde;': '\u223c',
+ 'tilde;': '\u02dc',
+ 'TildeEqual;': '\u2243',
+ 'TildeFullEqual;': '\u2245',
+ 'TildeTilde;': '\u2248',
+ 'times': '\xd7',
+ 'times;': '\xd7',
+ 'timesb;': '\u22a0',
+ 'timesbar;': '\u2a31',
+ 'timesd;': '\u2a30',
+ 'tint;': '\u222d',
+ 'toea;': '\u2928',
+ 'top;': '\u22a4',
+ 'topbot;': '\u2336',
+ 'topcir;': '\u2af1',
+ 'Topf;': '\U0001d54b',
+ 'topf;': '\U0001d565',
+ 'topfork;': '\u2ada',
+ 'tosa;': '\u2929',
+ 'tprime;': '\u2034',
+ 'TRADE;': '\u2122',
+ 'trade;': '\u2122',
+ 'triangle;': '\u25b5',
+ 'triangledown;': '\u25bf',
+ 'triangleleft;': '\u25c3',
+ 'trianglelefteq;': '\u22b4',
+ 'triangleq;': '\u225c',
+ 'triangleright;': '\u25b9',
+ 'trianglerighteq;': '\u22b5',
+ 'tridot;': '\u25ec',
+ 'trie;': '\u225c',
+ 'triminus;': '\u2a3a',
+ 'TripleDot;': '\u20db',
+ 'triplus;': '\u2a39',
+ 'trisb;': '\u29cd',
+ 'tritime;': '\u2a3b',
+ 'trpezium;': '\u23e2',
+ 'Tscr;': '\U0001d4af',
+ 'tscr;': '\U0001d4c9',
+ 'TScy;': '\u0426',
+ 'tscy;': '\u0446',
+ 'TSHcy;': '\u040b',
+ 'tshcy;': '\u045b',
+ 'Tstrok;': '\u0166',
+ 'tstrok;': '\u0167',
+ 'twixt;': '\u226c',
+ 'twoheadleftarrow;': '\u219e',
+ 'twoheadrightarrow;': '\u21a0',
+ 'Uacute': '\xda',
+ 'uacute': '\xfa',
+ 'Uacute;': '\xda',
+ 'uacute;': '\xfa',
+ 'Uarr;': '\u219f',
+ 'uArr;': '\u21d1',
+ 'uarr;': '\u2191',
+ 'Uarrocir;': '\u2949',
+ 'Ubrcy;': '\u040e',
+ 'ubrcy;': '\u045e',
+ 'Ubreve;': '\u016c',
+ 'ubreve;': '\u016d',
+ 'Ucirc': '\xdb',
+ 'ucirc': '\xfb',
+ 'Ucirc;': '\xdb',
+ 'ucirc;': '\xfb',
+ 'Ucy;': '\u0423',
+ 'ucy;': '\u0443',
+ 'udarr;': '\u21c5',
+ 'Udblac;': '\u0170',
+ 'udblac;': '\u0171',
+ 'udhar;': '\u296e',
+ 'ufisht;': '\u297e',
+ 'Ufr;': '\U0001d518',
+ 'ufr;': '\U0001d532',
+ 'Ugrave': '\xd9',
+ 'ugrave': '\xf9',
+ 'Ugrave;': '\xd9',
+ 'ugrave;': '\xf9',
+ 'uHar;': '\u2963',
+ 'uharl;': '\u21bf',
+ 'uharr;': '\u21be',
+ 'uhblk;': '\u2580',
+ 'ulcorn;': '\u231c',
+ 'ulcorner;': '\u231c',
+ 'ulcrop;': '\u230f',
+ 'ultri;': '\u25f8',
+ 'Umacr;': '\u016a',
+ 'umacr;': '\u016b',
+ 'uml': '\xa8',
+ 'uml;': '\xa8',
+ 'UnderBar;': '_',
+ 'UnderBrace;': '\u23df',
+ 'UnderBracket;': '\u23b5',
+ 'UnderParenthesis;': '\u23dd',
+ 'Union;': '\u22c3',
+ 'UnionPlus;': '\u228e',
+ 'Uogon;': '\u0172',
+ 'uogon;': '\u0173',
+ 'Uopf;': '\U0001d54c',
+ 'uopf;': '\U0001d566',
+ 'UpArrow;': '\u2191',
+ 'Uparrow;': '\u21d1',
+ 'uparrow;': '\u2191',
+ 'UpArrowBar;': '\u2912',
+ 'UpArrowDownArrow;': '\u21c5',
+ 'UpDownArrow;': '\u2195',
+ 'Updownarrow;': '\u21d5',
+ 'updownarrow;': '\u2195',
+ 'UpEquilibrium;': '\u296e',
+ 'upharpoonleft;': '\u21bf',
+ 'upharpoonright;': '\u21be',
+ 'uplus;': '\u228e',
+ 'UpperLeftArrow;': '\u2196',
+ 'UpperRightArrow;': '\u2197',
+ 'Upsi;': '\u03d2',
+ 'upsi;': '\u03c5',
+ 'upsih;': '\u03d2',
+ 'Upsilon;': '\u03a5',
+ 'upsilon;': '\u03c5',
+ 'UpTee;': '\u22a5',
+ 'UpTeeArrow;': '\u21a5',
+ 'upuparrows;': '\u21c8',
+ 'urcorn;': '\u231d',
+ 'urcorner;': '\u231d',
+ 'urcrop;': '\u230e',
+ 'Uring;': '\u016e',
+ 'uring;': '\u016f',
+ 'urtri;': '\u25f9',
+ 'Uscr;': '\U0001d4b0',
+ 'uscr;': '\U0001d4ca',
+ 'utdot;': '\u22f0',
+ 'Utilde;': '\u0168',
+ 'utilde;': '\u0169',
+ 'utri;': '\u25b5',
+ 'utrif;': '\u25b4',
+ 'uuarr;': '\u21c8',
+ 'Uuml': '\xdc',
+ 'uuml': '\xfc',
+ 'Uuml;': '\xdc',
+ 'uuml;': '\xfc',
+ 'uwangle;': '\u29a7',
+ 'vangrt;': '\u299c',
+ 'varepsilon;': '\u03f5',
+ 'varkappa;': '\u03f0',
+ 'varnothing;': '\u2205',
+ 'varphi;': '\u03d5',
+ 'varpi;': '\u03d6',
+ 'varpropto;': '\u221d',
+ 'vArr;': '\u21d5',
+ 'varr;': '\u2195',
+ 'varrho;': '\u03f1',
+ 'varsigma;': '\u03c2',
+ 'varsubsetneq;': '\u228a\ufe00',
+ 'varsubsetneqq;': '\u2acb\ufe00',
+ 'varsupsetneq;': '\u228b\ufe00',
+ 'varsupsetneqq;': '\u2acc\ufe00',
+ 'vartheta;': '\u03d1',
+ 'vartriangleleft;': '\u22b2',
+ 'vartriangleright;': '\u22b3',
+ 'Vbar;': '\u2aeb',
+ 'vBar;': '\u2ae8',
+ 'vBarv;': '\u2ae9',
+ 'Vcy;': '\u0412',
+ 'vcy;': '\u0432',
+ 'VDash;': '\u22ab',
+ 'Vdash;': '\u22a9',
+ 'vDash;': '\u22a8',
+ 'vdash;': '\u22a2',
+ 'Vdashl;': '\u2ae6',
+ 'Vee;': '\u22c1',
+ 'vee;': '\u2228',
+ 'veebar;': '\u22bb',
+ 'veeeq;': '\u225a',
+ 'vellip;': '\u22ee',
+ 'Verbar;': '\u2016',
+ 'verbar;': '|',
+ 'Vert;': '\u2016',
+ 'vert;': '|',
+ 'VerticalBar;': '\u2223',
+ 'VerticalLine;': '|',
+ 'VerticalSeparator;': '\u2758',
+ 'VerticalTilde;': '\u2240',
+ 'VeryThinSpace;': '\u200a',
+ 'Vfr;': '\U0001d519',
+ 'vfr;': '\U0001d533',
+ 'vltri;': '\u22b2',
+ 'vnsub;': '\u2282\u20d2',
+ 'vnsup;': '\u2283\u20d2',
+ 'Vopf;': '\U0001d54d',
+ 'vopf;': '\U0001d567',
+ 'vprop;': '\u221d',
+ 'vrtri;': '\u22b3',
+ 'Vscr;': '\U0001d4b1',
+ 'vscr;': '\U0001d4cb',
+ 'vsubnE;': '\u2acb\ufe00',
+ 'vsubne;': '\u228a\ufe00',
+ 'vsupnE;': '\u2acc\ufe00',
+ 'vsupne;': '\u228b\ufe00',
+ 'Vvdash;': '\u22aa',
+ 'vzigzag;': '\u299a',
+ 'Wcirc;': '\u0174',
+ 'wcirc;': '\u0175',
+ 'wedbar;': '\u2a5f',
+ 'Wedge;': '\u22c0',
+ 'wedge;': '\u2227',
+ 'wedgeq;': '\u2259',
+ 'weierp;': '\u2118',
+ 'Wfr;': '\U0001d51a',
+ 'wfr;': '\U0001d534',
+ 'Wopf;': '\U0001d54e',
+ 'wopf;': '\U0001d568',
+ 'wp;': '\u2118',
+ 'wr;': '\u2240',
+ 'wreath;': '\u2240',
+ 'Wscr;': '\U0001d4b2',
+ 'wscr;': '\U0001d4cc',
+ 'xcap;': '\u22c2',
+ 'xcirc;': '\u25ef',
+ 'xcup;': '\u22c3',
+ 'xdtri;': '\u25bd',
+ 'Xfr;': '\U0001d51b',
+ 'xfr;': '\U0001d535',
+ 'xhArr;': '\u27fa',
+ 'xharr;': '\u27f7',
+ 'Xi;': '\u039e',
+ 'xi;': '\u03be',
+ 'xlArr;': '\u27f8',
+ 'xlarr;': '\u27f5',
+ 'xmap;': '\u27fc',
+ 'xnis;': '\u22fb',
+ 'xodot;': '\u2a00',
+ 'Xopf;': '\U0001d54f',
+ 'xopf;': '\U0001d569',
+ 'xoplus;': '\u2a01',
+ 'xotime;': '\u2a02',
+ 'xrArr;': '\u27f9',
+ 'xrarr;': '\u27f6',
+ 'Xscr;': '\U0001d4b3',
+ 'xscr;': '\U0001d4cd',
+ 'xsqcup;': '\u2a06',
+ 'xuplus;': '\u2a04',
+ 'xutri;': '\u25b3',
+ 'xvee;': '\u22c1',
+ 'xwedge;': '\u22c0',
+ 'Yacute': '\xdd',
+ 'yacute': '\xfd',
+ 'Yacute;': '\xdd',
+ 'yacute;': '\xfd',
+ 'YAcy;': '\u042f',
+ 'yacy;': '\u044f',
+ 'Ycirc;': '\u0176',
+ 'ycirc;': '\u0177',
+ 'Ycy;': '\u042b',
+ 'ycy;': '\u044b',
+ 'yen': '\xa5',
+ 'yen;': '\xa5',
+ 'Yfr;': '\U0001d51c',
+ 'yfr;': '\U0001d536',
+ 'YIcy;': '\u0407',
+ 'yicy;': '\u0457',
+ 'Yopf;': '\U0001d550',
+ 'yopf;': '\U0001d56a',
+ 'Yscr;': '\U0001d4b4',
+ 'yscr;': '\U0001d4ce',
+ 'YUcy;': '\u042e',
+ 'yucy;': '\u044e',
+ 'yuml': '\xff',
+ 'Yuml;': '\u0178',
+ 'yuml;': '\xff',
+ 'Zacute;': '\u0179',
+ 'zacute;': '\u017a',
+ 'Zcaron;': '\u017d',
+ 'zcaron;': '\u017e',
+ 'Zcy;': '\u0417',
+ 'zcy;': '\u0437',
+ 'Zdot;': '\u017b',
+ 'zdot;': '\u017c',
+ 'zeetrf;': '\u2128',
+ 'ZeroWidthSpace;': '\u200b',
+ 'Zeta;': '\u0396',
+ 'zeta;': '\u03b6',
+ 'Zfr;': '\u2128',
+ 'zfr;': '\U0001d537',
+ 'ZHcy;': '\u0416',
+ 'zhcy;': '\u0436',
+ 'zigrarr;': '\u21dd',
+ 'Zopf;': '\u2124',
+ 'zopf;': '\U0001d56b',
+ 'Zscr;': '\U0001d4b5',
+ 'zscr;': '\U0001d4cf',
+ 'zwj;': '\u200d',
+ 'zwnj;': '\u200c',
+}
+
+# maps the Unicode codepoint to the HTML entity name
+codepoint2name = {}
+
+# maps the HTML entity name to the character
+# (or a character reference if the character is outside the Latin-1 range)
+entitydefs = {}
+
+for (name, codepoint) in name2codepoint.items():
+ codepoint2name[codepoint] = name
+ entitydefs[name] = chr(codepoint)
+
+del name, codepoint
diff --git a/contrib/python/future/future/backports/html/parser.py b/contrib/python/future/future/backports/html/parser.py
index 8896867a34..fb652636d4 100644
--- a/contrib/python/future/future/backports/html/parser.py
+++ b/contrib/python/future/future/backports/html/parser.py
@@ -1,536 +1,536 @@
-"""A parser for HTML and XHTML.
-
-Backported for python-future from Python 3.3.
-"""
-
-# This file is based on sgmllib.py, but the API is slightly different.
-
-# XXX There should be a way to distinguish between PCDATA (parsed
-# character data -- the normal case), RCDATA (replaceable character
-# data -- only char and entity references and end tags are special)
-# and CDATA (character data -- only end tags are special).
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future.builtins import *
-from future.backports import _markupbase
-import re
-import warnings
-
-# Regular expressions used for parsing
-
-interesting_normal = re.compile('[&<]')
-incomplete = re.compile('&[a-zA-Z#]')
-
-entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
-charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
-
-starttagopen = re.compile('<[a-zA-Z]')
-piclose = re.compile('>')
-commentclose = re.compile(r'--\s*>')
-tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
-# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
-# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
-tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
-# Note:
-# 1) the strict attrfind isn't really strict, but we can't make it
-# correctly strict without breaking backward compatibility;
-# 2) if you change attrfind remember to update locatestarttagend too;
-# 3) if you change attrfind and/or locatestarttagend the parser will
-# explode, so don't do it.
-attrfind = re.compile(
- r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
-attrfind_tolerant = re.compile(
- r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
- r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
-locatestarttagend = re.compile(r"""
- <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
- (?:\s+ # whitespace before attribute name
- (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
- (?:\s*=\s* # value indicator
- (?:'[^']*' # LITA-enclosed value
- |\"[^\"]*\" # LIT-enclosed value
- |[^'\">\s]+ # bare value
- )
- )?
- )
- )*
- \s* # trailing whitespace
-""", re.VERBOSE)
-locatestarttagend_tolerant = re.compile(r"""
- <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
- (?:[\s/]* # optional whitespace before attribute name
- (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
- (?:\s*=+\s* # value indicator
- (?:'[^']*' # LITA-enclosed value
- |"[^"]*" # LIT-enclosed value
- |(?!['"])[^>\s]* # bare value
- )
- (?:\s*,)* # possibly followed by a comma
- )?(?:\s|/(?!>))*
- )*
- )?
- \s* # trailing whitespace
-""", re.VERBOSE)
-endendtag = re.compile('>')
-# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
-# </ and the tag name, so maybe this should be fixed
-endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
-
-
-class HTMLParseError(Exception):
- """Exception raised for all parse errors."""
-
- def __init__(self, msg, position=(None, None)):
- assert msg
- self.msg = msg
- self.lineno = position[0]
- self.offset = position[1]
-
- def __str__(self):
- result = self.msg
- if self.lineno is not None:
- result = result + ", at line %d" % self.lineno
- if self.offset is not None:
- result = result + ", column %d" % (self.offset + 1)
- return result
-
-
-class HTMLParser(_markupbase.ParserBase):
- """Find tags and other markup and call handler functions.
-
- Usage:
- p = HTMLParser()
- p.feed(data)
- ...
- p.close()
-
- Start tags are handled by calling self.handle_starttag() or
- self.handle_startendtag(); end tags by self.handle_endtag(). The
- data between tags is passed from the parser to the derived class
- by calling self.handle_data() with the data as argument (the data
- may be split up in arbitrary chunks). Entity references are
- passed by calling self.handle_entityref() with the entity
- reference as the argument. Numeric character references are
- passed to self.handle_charref() with the string containing the
- reference as the argument.
- """
-
- CDATA_CONTENT_ELEMENTS = ("script", "style")
-
- def __init__(self, strict=False):
- """Initialize and reset this instance.
-
- If strict is set to False (the default) the parser will parse invalid
- markup, otherwise it will raise an error. Note that the strict mode
- is deprecated.
- """
- if strict:
- warnings.warn("The strict mode is deprecated.",
- DeprecationWarning, stacklevel=2)
- self.strict = strict
- self.reset()
-
- def reset(self):
- """Reset this instance. Loses all unprocessed data."""
- self.rawdata = ''
- self.lasttag = '???'
- self.interesting = interesting_normal
- self.cdata_elem = None
- _markupbase.ParserBase.reset(self)
-
- def feed(self, data):
- r"""Feed data to the parser.
-
- Call this as often as you want, with as little or as much text
- as you want (may include '\n').
- """
- self.rawdata = self.rawdata + data
- self.goahead(0)
-
- def close(self):
- """Handle any buffered data."""
- self.goahead(1)
-
- def error(self, message):
- raise HTMLParseError(message, self.getpos())
-
- __starttag_text = None
-
- def get_starttag_text(self):
- """Return full source of start tag: '<...>'."""
- return self.__starttag_text
-
- def set_cdata_mode(self, elem):
- self.cdata_elem = elem.lower()
- self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
-
- def clear_cdata_mode(self):
- self.interesting = interesting_normal
- self.cdata_elem = None
-
- # Internal -- handle data as far as reasonable. May leave state
- # and data to be processed by a subsequent call. If 'end' is
- # true, force handling all data as if followed by EOF marker.
- def goahead(self, end):
- rawdata = self.rawdata
- i = 0
- n = len(rawdata)
- while i < n:
- match = self.interesting.search(rawdata, i) # < or &
- if match:
- j = match.start()
- else:
- if self.cdata_elem:
- break
- j = n
- if i < j: self.handle_data(rawdata[i:j])
- i = self.updatepos(i, j)
- if i == n: break
- startswith = rawdata.startswith
- if startswith('<', i):
- if starttagopen.match(rawdata, i): # < + letter
- k = self.parse_starttag(i)
- elif startswith("</", i):
- k = self.parse_endtag(i)
- elif startswith("<!--", i):
- k = self.parse_comment(i)
- elif startswith("<?", i):
- k = self.parse_pi(i)
- elif startswith("<!", i):
- if self.strict:
- k = self.parse_declaration(i)
- else:
- k = self.parse_html_declaration(i)
- elif (i + 1) < n:
- self.handle_data("<")
- k = i + 1
- else:
- break
- if k < 0:
- if not end:
- break
- if self.strict:
- self.error("EOF in middle of construct")
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
- else:
- k += 1
- self.handle_data(rawdata[i:k])
- i = self.updatepos(i, k)
- elif startswith("&#", i):
- match = charref.match(rawdata, i)
- if match:
- name = match.group()[2:-1]
- self.handle_charref(name)
- k = match.end()
- if not startswith(';', k-1):
- k = k - 1
- i = self.updatepos(i, k)
- continue
- else:
- if ";" in rawdata[i:]: #bail by consuming &#
- self.handle_data(rawdata[0:2])
- i = self.updatepos(i, 2)
- break
- elif startswith('&', i):
- match = entityref.match(rawdata, i)
- if match:
- name = match.group(1)
- self.handle_entityref(name)
- k = match.end()
- if not startswith(';', k-1):
- k = k - 1
- i = self.updatepos(i, k)
- continue
- match = incomplete.match(rawdata, i)
- if match:
- # match.group() will contain at least 2 chars
- if end and match.group() == rawdata[i:]:
- if self.strict:
- self.error("EOF in middle of entity or char ref")
- else:
- if k <= i:
- k = n
- i = self.updatepos(i, i + 1)
- # incomplete
- break
- elif (i + 1) < n:
- # not the end of the buffer, and can't be confused
- # with some other construct
- self.handle_data("&")
- i = self.updatepos(i, i + 1)
- else:
- break
- else:
- assert 0, "interesting.search() lied"
- # end while
- if end and i < n and not self.cdata_elem:
- self.handle_data(rawdata[i:n])
- i = self.updatepos(i, n)
- self.rawdata = rawdata[i:]
-
- # Internal -- parse html declarations, return length or -1 if not terminated
- # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
- # See also parse_declaration in _markupbase
- def parse_html_declaration(self, i):
- rawdata = self.rawdata
- assert rawdata[i:i+2] == '<!', ('unexpected call to '
- 'parse_html_declaration()')
- if rawdata[i:i+4] == '<!--':
- # this case is actually already handled in goahead()
- return self.parse_comment(i)
- elif rawdata[i:i+3] == '<![':
- return self.parse_marked_section(i)
- elif rawdata[i:i+9].lower() == '<!doctype':
- # find the closing >
- gtpos = rawdata.find('>', i+9)
- if gtpos == -1:
- return -1
- self.handle_decl(rawdata[i+2:gtpos])
- return gtpos+1
- else:
- return self.parse_bogus_comment(i)
-
- # Internal -- parse bogus comment, return length or -1 if not terminated
- # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
- def parse_bogus_comment(self, i, report=1):
- rawdata = self.rawdata
- assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
- 'parse_comment()')
- pos = rawdata.find('>', i+2)
- if pos == -1:
- return -1
- if report:
- self.handle_comment(rawdata[i+2:pos])
- return pos + 1
-
- # Internal -- parse processing instr, return end or -1 if not terminated
- def parse_pi(self, i):
- rawdata = self.rawdata
- assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
- match = piclose.search(rawdata, i+2) # >
- if not match:
- return -1
- j = match.start()
- self.handle_pi(rawdata[i+2: j])
- j = match.end()
- return j
-
- # Internal -- handle starttag, return end or -1 if not terminated
- def parse_starttag(self, i):
- self.__starttag_text = None
- endpos = self.check_for_whole_start_tag(i)
- if endpos < 0:
- return endpos
- rawdata = self.rawdata
- self.__starttag_text = rawdata[i:endpos]
-
- # Now parse the data between i+1 and j into a tag and attrs
- attrs = []
- match = tagfind.match(rawdata, i+1)
- assert match, 'unexpected call to parse_starttag()'
- k = match.end()
- self.lasttag = tag = match.group(1).lower()
- while k < endpos:
- if self.strict:
- m = attrfind.match(rawdata, k)
- else:
- m = attrfind_tolerant.match(rawdata, k)
- if not m:
- break
- attrname, rest, attrvalue = m.group(1, 2, 3)
- if not rest:
- attrvalue = None
- elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
- attrvalue[:1] == '"' == attrvalue[-1:]:
- attrvalue = attrvalue[1:-1]
- if attrvalue:
- attrvalue = self.unescape(attrvalue)
- attrs.append((attrname.lower(), attrvalue))
- k = m.end()
-
- end = rawdata[k:endpos].strip()
- if end not in (">", "/>"):
- lineno, offset = self.getpos()
- if "\n" in self.__starttag_text:
- lineno = lineno + self.__starttag_text.count("\n")
- offset = len(self.__starttag_text) \
- - self.__starttag_text.rfind("\n")
- else:
- offset = offset + len(self.__starttag_text)
- if self.strict:
- self.error("junk characters in start tag: %r"
- % (rawdata[k:endpos][:20],))
- self.handle_data(rawdata[i:endpos])
- return endpos
- if end.endswith('/>'):
- # XHTML-style empty tag: <span attr="value" />
- self.handle_startendtag(tag, attrs)
- else:
- self.handle_starttag(tag, attrs)
- if tag in self.CDATA_CONTENT_ELEMENTS:
- self.set_cdata_mode(tag)
- return endpos
-
- # Internal -- check to see if we have a complete starttag; return end
- # or -1 if incomplete.
- def check_for_whole_start_tag(self, i):
- rawdata = self.rawdata
- if self.strict:
- m = locatestarttagend.match(rawdata, i)
- else:
- m = locatestarttagend_tolerant.match(rawdata, i)
- if m:
- j = m.end()
- next = rawdata[j:j+1]
- if next == ">":
- return j + 1
- if next == "/":
- if rawdata.startswith("/>", j):
- return j + 2
- if rawdata.startswith("/", j):
- # buffer boundary
- return -1
- # else bogus input
- if self.strict:
- self.updatepos(i, j + 1)
- self.error("malformed empty start tag")
- if j > i:
- return j
- else:
- return i + 1
- if next == "":
- # end of input
- return -1
- if next in ("abcdefghijklmnopqrstuvwxyz=/"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
- # end of input in or before attribute value, or we have the
- # '/' from a '/>' ending
- return -1
- if self.strict:
- self.updatepos(i, j)
- self.error("malformed start tag")
- if j > i:
- return j
- else:
- return i + 1
- raise AssertionError("we should not get here!")
-
- # Internal -- parse endtag, return end or -1 if incomplete
- def parse_endtag(self, i):
- rawdata = self.rawdata
- assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
- match = endendtag.search(rawdata, i+1) # >
- if not match:
- return -1
- gtpos = match.end()
- match = endtagfind.match(rawdata, i) # </ + tag + >
- if not match:
- if self.cdata_elem is not None:
- self.handle_data(rawdata[i:gtpos])
- return gtpos
- if self.strict:
- self.error("bad end tag: %r" % (rawdata[i:gtpos],))
- # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
- namematch = tagfind_tolerant.match(rawdata, i+2)
- if not namematch:
- # w3.org/TR/html5/tokenization.html#end-tag-open-state
- if rawdata[i:i+3] == '</>':
- return i+3
- else:
- return self.parse_bogus_comment(i)
- tagname = namematch.group().lower()
- # consume and ignore other stuff between the name and the >
- # Note: this is not 100% correct, since we might have things like
- # </tag attr=">">, but looking for > after tha name should cover
- # most of the cases and is much simpler
- gtpos = rawdata.find('>', namematch.end())
- self.handle_endtag(tagname)
- return gtpos+1
-
- elem = match.group(1).lower() # script or style
- if self.cdata_elem is not None:
- if elem != self.cdata_elem:
- self.handle_data(rawdata[i:gtpos])
- return gtpos
-
- self.handle_endtag(elem.lower())
- self.clear_cdata_mode()
- return gtpos
-
- # Overridable -- finish processing of start+end tag: <tag.../>
- def handle_startendtag(self, tag, attrs):
- self.handle_starttag(tag, attrs)
- self.handle_endtag(tag)
-
- # Overridable -- handle start tag
- def handle_starttag(self, tag, attrs):
- pass
-
- # Overridable -- handle end tag
- def handle_endtag(self, tag):
- pass
-
- # Overridable -- handle character reference
- def handle_charref(self, name):
- pass
-
- # Overridable -- handle entity reference
- def handle_entityref(self, name):
- pass
-
- # Overridable -- handle data
- def handle_data(self, data):
- pass
-
- # Overridable -- handle comment
- def handle_comment(self, data):
- pass
-
- # Overridable -- handle declaration
- def handle_decl(self, decl):
- pass
-
- # Overridable -- handle processing instruction
- def handle_pi(self, data):
- pass
-
- def unknown_decl(self, data):
- if self.strict:
- self.error("unknown declaration: %r" % (data,))
-
- # Internal -- helper to remove special character quoting
- def unescape(self, s):
- if '&' not in s:
- return s
- def replaceEntities(s):
- s = s.groups()[0]
- try:
- if s[0] == "#":
- s = s[1:]
- if s[0] in ['x','X']:
- c = int(s[1:].rstrip(';'), 16)
- else:
- c = int(s.rstrip(';'))
- return chr(c)
- except ValueError:
- return '&#' + s
- else:
- from future.backports.html.entities import html5
- if s in html5:
- return html5[s]
- elif s.endswith(';'):
- return '&' + s
- for x in range(2, len(s)):
- if s[:x] in html5:
- return html5[s[:x]] + s[x:]
- else:
- return '&' + s
-
- return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
- replaceEntities, s)
+"""A parser for HTML and XHTML.
+
+Backported for python-future from Python 3.3.
+"""
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+from future.builtins import *
+from future.backports import _markupbase
+import re
+import warnings
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+commentclose = re.compile(r'--\s*>')
+tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
+# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
+# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
+tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
+# Note:
+# 1) the strict attrfind isn't really strict, but we can't make it
+# correctly strict without breaking backward compatibility;
+# 2) if you change attrfind remember to update locatestarttagend too;
+# 3) if you change attrfind and/or locatestarttagend the parser will
+# explode, so don't do it.
+attrfind = re.compile(
+ r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+ r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
+attrfind_tolerant = re.compile(
+ r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
+ r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+locatestarttagend = re.compile(r"""
+ <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
+ (?:\s+ # whitespace before attribute name
+ (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
+ (?:\s*=\s* # value indicator
+ (?:'[^']*' # LITA-enclosed value
+ |\"[^\"]*\" # LIT-enclosed value
+ |[^'\">\s]+ # bare value
+ )
+ )?
+ )
+ )*
+ \s* # trailing whitespace
+""", re.VERBOSE)
+locatestarttagend_tolerant = re.compile(r"""
+ <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
+ (?:[\s/]* # optional whitespace before attribute name
+ (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
+ (?:\s*=+\s* # value indicator
+ (?:'[^']*' # LITA-enclosed value
+ |"[^"]*" # LIT-enclosed value
+ |(?!['"])[^>\s]* # bare value
+ )
+ (?:\s*,)* # possibly followed by a comma
+ )?(?:\s|/(?!>))*
+ )*
+ )?
+ \s* # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
+# </ and the tag name, so maybe this should be fixed
+endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+class HTMLParseError(Exception):
+ """Exception raised for all parse errors."""
+
+ def __init__(self, msg, position=(None, None)):
+ assert msg
+ self.msg = msg
+ self.lineno = position[0]
+ self.offset = position[1]
+
+ def __str__(self):
+ result = self.msg
+ if self.lineno is not None:
+ result = result + ", at line %d" % self.lineno
+ if self.offset is not None:
+ result = result + ", column %d" % (self.offset + 1)
+ return result
+
+
+class HTMLParser(_markupbase.ParserBase):
+ """Find tags and other markup and call handler functions.
+
+ Usage:
+ p = HTMLParser()
+ p.feed(data)
+ ...
+ p.close()
+
+ Start tags are handled by calling self.handle_starttag() or
+ self.handle_startendtag(); end tags by self.handle_endtag(). The
+ data between tags is passed from the parser to the derived class
+ by calling self.handle_data() with the data as argument (the data
+ may be split up in arbitrary chunks). Entity references are
+ passed by calling self.handle_entityref() with the entity
+ reference as the argument. Numeric character references are
+ passed to self.handle_charref() with the string containing the
+ reference as the argument.
+ """
+
+ CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+ def __init__(self, strict=False):
+ """Initialize and reset this instance.
+
+ If strict is set to False (the default) the parser will parse invalid
+ markup, otherwise it will raise an error. Note that the strict mode
+ is deprecated.
+ """
+ if strict:
+ warnings.warn("The strict mode is deprecated.",
+ DeprecationWarning, stacklevel=2)
+ self.strict = strict
+ self.reset()
+
+ def reset(self):
+ """Reset this instance. Loses all unprocessed data."""
+ self.rawdata = ''
+ self.lasttag = '???'
+ self.interesting = interesting_normal
+ self.cdata_elem = None
+ _markupbase.ParserBase.reset(self)
+
+ def feed(self, data):
+ r"""Feed data to the parser.
+
+ Call this as often as you want, with as little or as much text
+ as you want (may include '\n').
+ """
+ self.rawdata = self.rawdata + data
+ self.goahead(0)
+
+ def close(self):
+ """Handle any buffered data."""
+ self.goahead(1)
+
+ def error(self, message):
+ raise HTMLParseError(message, self.getpos())
+
+ __starttag_text = None
+
+ def get_starttag_text(self):
+ """Return full source of start tag: '<...>'."""
+ return self.__starttag_text
+
+ def set_cdata_mode(self, elem):
+ self.cdata_elem = elem.lower()
+ self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+ def clear_cdata_mode(self):
+ self.interesting = interesting_normal
+ self.cdata_elem = None
+
+ # Internal -- handle data as far as reasonable. May leave state
+ # and data to be processed by a subsequent call. If 'end' is
+ # true, force handling all data as if followed by EOF marker.
+ def goahead(self, end):
+ rawdata = self.rawdata
+ i = 0
+ n = len(rawdata)
+ while i < n:
+ match = self.interesting.search(rawdata, i) # < or &
+ if match:
+ j = match.start()
+ else:
+ if self.cdata_elem:
+ break
+ j = n
+ if i < j: self.handle_data(rawdata[i:j])
+ i = self.updatepos(i, j)
+ if i == n: break
+ startswith = rawdata.startswith
+ if startswith('<', i):
+ if starttagopen.match(rawdata, i): # < + letter
+ k = self.parse_starttag(i)
+ elif startswith("</", i):
+ k = self.parse_endtag(i)
+ elif startswith("<!--", i):
+ k = self.parse_comment(i)
+ elif startswith("<?", i):
+ k = self.parse_pi(i)
+ elif startswith("<!", i):
+ if self.strict:
+ k = self.parse_declaration(i)
+ else:
+ k = self.parse_html_declaration(i)
+ elif (i + 1) < n:
+ self.handle_data("<")
+ k = i + 1
+ else:
+ break
+ if k < 0:
+ if not end:
+ break
+ if self.strict:
+ self.error("EOF in middle of construct")
+ k = rawdata.find('>', i + 1)
+ if k < 0:
+ k = rawdata.find('<', i + 1)
+ if k < 0:
+ k = i + 1
+ else:
+ k += 1
+ self.handle_data(rawdata[i:k])
+ i = self.updatepos(i, k)
+ elif startswith("&#", i):
+ match = charref.match(rawdata, i)
+ if match:
+ name = match.group()[2:-1]
+ self.handle_charref(name)
+ k = match.end()
+ if not startswith(';', k-1):
+ k = k - 1
+ i = self.updatepos(i, k)
+ continue
+ else:
+ if ";" in rawdata[i:]: #bail by consuming &#
+ self.handle_data(rawdata[0:2])
+ i = self.updatepos(i, 2)
+ break
+ elif startswith('&', i):
+ match = entityref.match(rawdata, i)
+ if match:
+ name = match.group(1)
+ self.handle_entityref(name)
+ k = match.end()
+ if not startswith(';', k-1):
+ k = k - 1
+ i = self.updatepos(i, k)
+ continue
+ match = incomplete.match(rawdata, i)
+ if match:
+ # match.group() will contain at least 2 chars
+ if end and match.group() == rawdata[i:]:
+ if self.strict:
+ self.error("EOF in middle of entity or char ref")
+ else:
+ if k <= i:
+ k = n
+ i = self.updatepos(i, i + 1)
+ # incomplete
+ break
+ elif (i + 1) < n:
+ # not the end of the buffer, and can't be confused
+ # with some other construct
+ self.handle_data("&")
+ i = self.updatepos(i, i + 1)
+ else:
+ break
+ else:
+ assert 0, "interesting.search() lied"
+ # end while
+ if end and i < n and not self.cdata_elem:
+ self.handle_data(rawdata[i:n])
+ i = self.updatepos(i, n)
+ self.rawdata = rawdata[i:]
+
+ # Internal -- parse html declarations, return length or -1 if not terminated
+ # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
+ # See also parse_declaration in _markupbase
+ def parse_html_declaration(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] == '<!', ('unexpected call to '
+ 'parse_html_declaration()')
+ if rawdata[i:i+4] == '<!--':
+ # this case is actually already handled in goahead()
+ return self.parse_comment(i)
+ elif rawdata[i:i+3] == '<![':
+ return self.parse_marked_section(i)
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ # find the closing >
+ gtpos = rawdata.find('>', i+9)
+ if gtpos == -1:
+ return -1
+ self.handle_decl(rawdata[i+2:gtpos])
+ return gtpos+1
+ else:
+ return self.parse_bogus_comment(i)
+
+ # Internal -- parse bogus comment, return length or -1 if not terminated
+ # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
+ def parse_bogus_comment(self, i, report=1):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
+ 'parse_comment()')
+ pos = rawdata.find('>', i+2)
+ if pos == -1:
+ return -1
+ if report:
+ self.handle_comment(rawdata[i+2:pos])
+ return pos + 1
+
+ # Internal -- parse processing instr, return end or -1 if not terminated
+ def parse_pi(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+ match = piclose.search(rawdata, i+2) # >
+ if not match:
+ return -1
+ j = match.start()
+ self.handle_pi(rawdata[i+2: j])
+ j = match.end()
+ return j
+
+ # Internal -- handle starttag, return end or -1 if not terminated
+ def parse_starttag(self, i):
+ self.__starttag_text = None
+ endpos = self.check_for_whole_start_tag(i)
+ if endpos < 0:
+ return endpos
+ rawdata = self.rawdata
+ self.__starttag_text = rawdata[i:endpos]
+
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ match = tagfind.match(rawdata, i+1)
+ assert match, 'unexpected call to parse_starttag()'
+ k = match.end()
+ self.lasttag = tag = match.group(1).lower()
+ while k < endpos:
+ if self.strict:
+ m = attrfind.match(rawdata, k)
+ else:
+ m = attrfind_tolerant.match(rawdata, k)
+ if not m:
+ break
+ attrname, rest, attrvalue = m.group(1, 2, 3)
+ if not rest:
+ attrvalue = None
+ elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+ attrvalue[:1] == '"' == attrvalue[-1:]:
+ attrvalue = attrvalue[1:-1]
+ if attrvalue:
+ attrvalue = self.unescape(attrvalue)
+ attrs.append((attrname.lower(), attrvalue))
+ k = m.end()
+
+ end = rawdata[k:endpos].strip()
+ if end not in (">", "/>"):
+ lineno, offset = self.getpos()
+ if "\n" in self.__starttag_text:
+ lineno = lineno + self.__starttag_text.count("\n")
+ offset = len(self.__starttag_text) \
+ - self.__starttag_text.rfind("\n")
+ else:
+ offset = offset + len(self.__starttag_text)
+ if self.strict:
+ self.error("junk characters in start tag: %r"
+ % (rawdata[k:endpos][:20],))
+ self.handle_data(rawdata[i:endpos])
+ return endpos
+ if end.endswith('/>'):
+ # XHTML-style empty tag: <span attr="value" />
+ self.handle_startendtag(tag, attrs)
+ else:
+ self.handle_starttag(tag, attrs)
+ if tag in self.CDATA_CONTENT_ELEMENTS:
+ self.set_cdata_mode(tag)
+ return endpos
+
+ # Internal -- check to see if we have a complete starttag; return end
+ # or -1 if incomplete.
+ def check_for_whole_start_tag(self, i):
+ rawdata = self.rawdata
+ if self.strict:
+ m = locatestarttagend.match(rawdata, i)
+ else:
+ m = locatestarttagend_tolerant.match(rawdata, i)
+ if m:
+ j = m.end()
+ next = rawdata[j:j+1]
+ if next == ">":
+ return j + 1
+ if next == "/":
+ if rawdata.startswith("/>", j):
+ return j + 2
+ if rawdata.startswith("/", j):
+ # buffer boundary
+ return -1
+ # else bogus input
+ if self.strict:
+ self.updatepos(i, j + 1)
+ self.error("malformed empty start tag")
+ if j > i:
+ return j
+ else:
+ return i + 1
+ if next == "":
+ # end of input
+ return -1
+ if next in ("abcdefghijklmnopqrstuvwxyz=/"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+ # end of input in or before attribute value, or we have the
+ # '/' from a '/>' ending
+ return -1
+ if self.strict:
+ self.updatepos(i, j)
+ self.error("malformed start tag")
+ if j > i:
+ return j
+ else:
+ return i + 1
+ raise AssertionError("we should not get here!")
+
+ # Internal -- parse endtag, return end or -1 if incomplete
+ def parse_endtag(self, i):
+ rawdata = self.rawdata
+ assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+ match = endendtag.search(rawdata, i+1) # >
+ if not match:
+ return -1
+ gtpos = match.end()
+ match = endtagfind.match(rawdata, i) # </ + tag + >
+ if not match:
+ if self.cdata_elem is not None:
+ self.handle_data(rawdata[i:gtpos])
+ return gtpos
+ if self.strict:
+ self.error("bad end tag: %r" % (rawdata[i:gtpos],))
+ # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
+ namematch = tagfind_tolerant.match(rawdata, i+2)
+ if not namematch:
+ # w3.org/TR/html5/tokenization.html#end-tag-open-state
+ if rawdata[i:i+3] == '</>':
+ return i+3
+ else:
+ return self.parse_bogus_comment(i)
+ tagname = namematch.group().lower()
+ # consume and ignore other stuff between the name and the >
+ # Note: this is not 100% correct, since we might have things like
+ # </tag attr=">">, but looking for > after tha name should cover
+ # most of the cases and is much simpler
+ gtpos = rawdata.find('>', namematch.end())
+ self.handle_endtag(tagname)
+ return gtpos+1
+
+ elem = match.group(1).lower() # script or style
+ if self.cdata_elem is not None:
+ if elem != self.cdata_elem:
+ self.handle_data(rawdata[i:gtpos])
+ return gtpos
+
+ self.handle_endtag(elem.lower())
+ self.clear_cdata_mode()
+ return gtpos
+
+ # Overridable -- finish processing of start+end tag: <tag.../>
+ def handle_startendtag(self, tag, attrs):
+ self.handle_starttag(tag, attrs)
+ self.handle_endtag(tag)
+
+ # Overridable -- handle start tag
+ def handle_starttag(self, tag, attrs):
+ pass
+
+ # Overridable -- handle end tag
+ def handle_endtag(self, tag):
+ pass
+
+ # Overridable -- handle character reference
+ def handle_charref(self, name):
+ pass
+
+ # Overridable -- handle entity reference
+ def handle_entityref(self, name):
+ pass
+
+ # Overridable -- handle data
+ def handle_data(self, data):
+ pass
+
+ # Overridable -- handle comment
+ def handle_comment(self, data):
+ pass
+
+ # Overridable -- handle declaration
+ def handle_decl(self, decl):
+ pass
+
+ # Overridable -- handle processing instruction
+ def handle_pi(self, data):
+ pass
+
+ def unknown_decl(self, data):
+ if self.strict:
+ self.error("unknown declaration: %r" % (data,))
+
+ # Internal -- helper to remove special character quoting
+ def unescape(self, s):
+ if '&' not in s:
+ return s
+ def replaceEntities(s):
+ s = s.groups()[0]
+ try:
+ if s[0] == "#":
+ s = s[1:]
+ if s[0] in ['x','X']:
+ c = int(s[1:].rstrip(';'), 16)
+ else:
+ c = int(s.rstrip(';'))
+ return chr(c)
+ except ValueError:
+ return '&#' + s
+ else:
+ from future.backports.html.entities import html5
+ if s in html5:
+ return html5[s]
+ elif s.endswith(';'):
+ return '&' + s
+ for x in range(2, len(s)):
+ if s[:x] in html5:
+ return html5[s[:x]] + s[x:]
+ else:
+ return '&' + s
+
+ return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
+ replaceEntities, s)
diff --git a/contrib/python/future/future/backports/http/client.py b/contrib/python/future/future/backports/http/client.py
index 0185a20ef1..e663d125c4 100644
--- a/contrib/python/future/future/backports/http/client.py
+++ b/contrib/python/future/future/backports/http/client.py
@@ -1,1346 +1,1346 @@
-"""HTTP/1.1 client library
-
-A backport of the Python 3.3 http/client.py module for python-future.
-
-<intro stuff goes here>
-<other stuff, too>
-
-HTTPConnection goes through a number of "states", which define when a client
-may legally make another request or fetch the response for a particular
-request. This diagram details these state transitions:
-
- (null)
- |
- | HTTPConnection()
- v
- Idle
- |
- | putrequest()
- v
- Request-started
- |
- | ( putheader() )* endheaders()
- v
- Request-sent
- |
- | response = getresponse()
- v
- Unread-response [Response-headers-read]
- |\____________________
- | |
- | response.read() | putrequest()
- v v
- Idle Req-started-unread-response
- ______/|
- / |
- response.read() | | ( putheader() )* endheaders()
- v v
- Request-started Req-sent-unread-response
- |
- | response.read()
- v
- Request-sent
-
-This diagram presents the following rules:
- -- a second request may not be started until {response-headers-read}
- -- a response [object] cannot be retrieved until {request-sent}
- -- there is no differentiation between an unread response body and a
- partially read response body
-
-Note: this enforcement is applied by the HTTPConnection class. The
- HTTPResponse class does not enforce this state machine, which
- implies sophisticated clients may accelerate the request/response
- pipeline. Caution should be taken, though: accelerating the states
- beyond the above pattern may imply knowledge of the server's
- connection-close behavior for certain requests. For example, it
- is impossible to tell whether the server will close the connection
- UNTIL the response headers have been read; this means that further
- requests cannot be placed into the pipeline until it is known that
- the server will NOT be closing the connection.
-
-Logical State __state __response
-------------- ------- ----------
-Idle _CS_IDLE None
-Request-started _CS_REQ_STARTED None
-Request-sent _CS_REQ_SENT None
-Unread-response _CS_IDLE <response_class>
-Req-started-unread-response _CS_REQ_STARTED <response_class>
-Req-sent-unread-response _CS_REQ_SENT <response_class>
-"""
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future.builtins import bytes, int, str, super
-from future.utils import PY2
-
-from future.backports.email import parser as email_parser
-from future.backports.email import message as email_message
-from future.backports.misc import create_connection as socket_create_connection
-import io
-import os
-import socket
-from future.backports.urllib.parse import urlsplit
-import warnings
-from array import array
-
+"""HTTP/1.1 client library
+
+A backport of the Python 3.3 http/client.py module for python-future.
+
+<intro stuff goes here>
+<other stuff, too>
+
+HTTPConnection goes through a number of "states", which define when a client
+may legally make another request or fetch the response for a particular
+request. This diagram details these state transitions:
+
+ (null)
+ |
+ | HTTPConnection()
+ v
+ Idle
+ |
+ | putrequest()
+ v
+ Request-started
+ |
+ | ( putheader() )* endheaders()
+ v
+ Request-sent
+ |
+ | response = getresponse()
+ v
+ Unread-response [Response-headers-read]
+ |\____________________
+ | |
+ | response.read() | putrequest()
+ v v
+ Idle Req-started-unread-response
+ ______/|
+ / |
+ response.read() | | ( putheader() )* endheaders()
+ v v
+ Request-started Req-sent-unread-response
+ |
+ | response.read()
+ v
+ Request-sent
+
+This diagram presents the following rules:
+ -- a second request may not be started until {response-headers-read}
+ -- a response [object] cannot be retrieved until {request-sent}
+ -- there is no differentiation between an unread response body and a
+ partially read response body
+
+Note: this enforcement is applied by the HTTPConnection class. The
+ HTTPResponse class does not enforce this state machine, which
+ implies sophisticated clients may accelerate the request/response
+ pipeline. Caution should be taken, though: accelerating the states
+ beyond the above pattern may imply knowledge of the server's
+ connection-close behavior for certain requests. For example, it
+ is impossible to tell whether the server will close the connection
+ UNTIL the response headers have been read; this means that further
+ requests cannot be placed into the pipeline until it is known that
+ the server will NOT be closing the connection.
+
+Logical State __state __response
+------------- ------- ----------
+Idle _CS_IDLE None
+Request-started _CS_REQ_STARTED None
+Request-sent _CS_REQ_SENT None
+Unread-response _CS_IDLE <response_class>
+Req-started-unread-response _CS_REQ_STARTED <response_class>
+Req-sent-unread-response _CS_REQ_SENT <response_class>
+"""
+
+from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+from future.builtins import bytes, int, str, super
+from future.utils import PY2
+
+from future.backports.email import parser as email_parser
+from future.backports.email import message as email_message
+from future.backports.misc import create_connection as socket_create_connection
+import io
+import os
+import socket
+from future.backports.urllib.parse import urlsplit
+import warnings
+from array import array
+
if PY2:
from collections import Iterable
else:
from collections.abc import Iterable
-__all__ = ["HTTPResponse", "HTTPConnection",
- "HTTPException", "NotConnected", "UnknownProtocol",
- "UnknownTransferEncoding", "UnimplementedFileMode",
- "IncompleteRead", "InvalidURL", "ImproperConnectionState",
- "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
- "BadStatusLine", "error", "responses"]
-
-HTTP_PORT = 80
-HTTPS_PORT = 443
-
-_UNKNOWN = 'UNKNOWN'
-
-# connection states
-_CS_IDLE = 'Idle'
-_CS_REQ_STARTED = 'Request-started'
-_CS_REQ_SENT = 'Request-sent'
-
-# status codes
-# informational
-CONTINUE = 100
-SWITCHING_PROTOCOLS = 101
-PROCESSING = 102
-
-# successful
-OK = 200
-CREATED = 201
-ACCEPTED = 202
-NON_AUTHORITATIVE_INFORMATION = 203
-NO_CONTENT = 204
-RESET_CONTENT = 205
-PARTIAL_CONTENT = 206
-MULTI_STATUS = 207
-IM_USED = 226
-
-# redirection
-MULTIPLE_CHOICES = 300
-MOVED_PERMANENTLY = 301
-FOUND = 302
-SEE_OTHER = 303
-NOT_MODIFIED = 304
-USE_PROXY = 305
-TEMPORARY_REDIRECT = 307
-
-# client error
-BAD_REQUEST = 400
-UNAUTHORIZED = 401
-PAYMENT_REQUIRED = 402
-FORBIDDEN = 403
-NOT_FOUND = 404
-METHOD_NOT_ALLOWED = 405
-NOT_ACCEPTABLE = 406
-PROXY_AUTHENTICATION_REQUIRED = 407
-REQUEST_TIMEOUT = 408
-CONFLICT = 409
-GONE = 410
-LENGTH_REQUIRED = 411
-PRECONDITION_FAILED = 412
-REQUEST_ENTITY_TOO_LARGE = 413
-REQUEST_URI_TOO_LONG = 414
-UNSUPPORTED_MEDIA_TYPE = 415
-REQUESTED_RANGE_NOT_SATISFIABLE = 416
-EXPECTATION_FAILED = 417
-UNPROCESSABLE_ENTITY = 422
-LOCKED = 423
-FAILED_DEPENDENCY = 424
-UPGRADE_REQUIRED = 426
-PRECONDITION_REQUIRED = 428
-TOO_MANY_REQUESTS = 429
-REQUEST_HEADER_FIELDS_TOO_LARGE = 431
-
-# server error
-INTERNAL_SERVER_ERROR = 500
-NOT_IMPLEMENTED = 501
-BAD_GATEWAY = 502
-SERVICE_UNAVAILABLE = 503
-GATEWAY_TIMEOUT = 504
-HTTP_VERSION_NOT_SUPPORTED = 505
-INSUFFICIENT_STORAGE = 507
-NOT_EXTENDED = 510
-NETWORK_AUTHENTICATION_REQUIRED = 511
-
-# Mapping status codes to official W3C names
-responses = {
- 100: 'Continue',
- 101: 'Switching Protocols',
-
- 200: 'OK',
- 201: 'Created',
- 202: 'Accepted',
- 203: 'Non-Authoritative Information',
- 204: 'No Content',
- 205: 'Reset Content',
- 206: 'Partial Content',
-
- 300: 'Multiple Choices',
- 301: 'Moved Permanently',
- 302: 'Found',
- 303: 'See Other',
- 304: 'Not Modified',
- 305: 'Use Proxy',
- 306: '(Unused)',
- 307: 'Temporary Redirect',
-
- 400: 'Bad Request',
- 401: 'Unauthorized',
- 402: 'Payment Required',
- 403: 'Forbidden',
- 404: 'Not Found',
- 405: 'Method Not Allowed',
- 406: 'Not Acceptable',
- 407: 'Proxy Authentication Required',
- 408: 'Request Timeout',
- 409: 'Conflict',
- 410: 'Gone',
- 411: 'Length Required',
- 412: 'Precondition Failed',
- 413: 'Request Entity Too Large',
- 414: 'Request-URI Too Long',
- 415: 'Unsupported Media Type',
- 416: 'Requested Range Not Satisfiable',
- 417: 'Expectation Failed',
- 428: 'Precondition Required',
- 429: 'Too Many Requests',
- 431: 'Request Header Fields Too Large',
-
- 500: 'Internal Server Error',
- 501: 'Not Implemented',
- 502: 'Bad Gateway',
- 503: 'Service Unavailable',
- 504: 'Gateway Timeout',
- 505: 'HTTP Version Not Supported',
- 511: 'Network Authentication Required',
-}
-
-# maximal amount of data to read at one time in _safe_read
-MAXAMOUNT = 1048576
-
-# maximal line length when calling readline().
-_MAXLINE = 65536
-_MAXHEADERS = 100
-
-
-class HTTPMessage(email_message.Message):
- # XXX The only usage of this method is in
- # http.server.CGIHTTPRequestHandler. Maybe move the code there so
- # that it doesn't need to be part of the public API. The API has
- # never been defined so this could cause backwards compatibility
- # issues.
-
- def getallmatchingheaders(self, name):
- """Find all header lines matching a given header name.
-
- Look through the list of headers and find all lines matching a given
- header name (and their continuation lines). A list of the lines is
- returned, without interpretation. If the header does not occur, an
- empty list is returned. If the header occurs multiple times, all
- occurrences are returned. Case is not important in the header name.
-
- """
- name = name.lower() + ':'
- n = len(name)
- lst = []
- hit = 0
- for line in self.keys():
- if line[:n].lower() == name:
- hit = 1
- elif not line[:1].isspace():
- hit = 0
- if hit:
- lst.append(line)
- return lst
-
-def parse_headers(fp, _class=HTTPMessage):
- """Parses only RFC2822 headers from a file pointer.
-
- email Parser wants to see strings rather than bytes.
- But a TextIOWrapper around self.rfile would buffer too many bytes
- from the stream, bytes which we later need to read as bytes.
- So we read the correct bytes here, as bytes, for email Parser
- to parse.
-
- """
- headers = []
- while True:
- line = fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("header line")
- headers.append(line)
- if len(headers) > _MAXHEADERS:
- raise HTTPException("got more than %d headers" % _MAXHEADERS)
- if line in (b'\r\n', b'\n', b''):
- break
- hstring = bytes(b'').join(headers).decode('iso-8859-1')
- return email_parser.Parser(_class=_class).parsestr(hstring)
-
-
-_strict_sentinel = object()
-
-class HTTPResponse(io.RawIOBase):
-
- # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
-
- # The bytes from the socket object are iso-8859-1 strings.
- # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
- # text following RFC 2047. The basic status line parsing only
- # accepts iso-8859-1.
-
- def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
- # If the response includes a content-length header, we need to
- # make sure that the client doesn't read more than the
- # specified number of bytes. If it does, it will block until
- # the server times out and closes the connection. This will
- # happen if a self.fp.read() is done (without a size) whether
- # self.fp is buffered or not. So, no self.fp.read() by
- # clients unless they know what they are doing.
- self.fp = sock.makefile("rb")
- self.debuglevel = debuglevel
- if strict is not _strict_sentinel:
- warnings.warn("the 'strict' argument isn't supported anymore; "
- "http.client now always assumes HTTP/1.x compliant servers.",
- DeprecationWarning, 2)
- self._method = method
-
- # The HTTPResponse object is returned via urllib. The clients
- # of http and urllib expect different attributes for the
- # headers. headers is used here and supports urllib. msg is
- # provided as a backwards compatibility layer for http
- # clients.
-
- self.headers = self.msg = None
-
- # from the Status-Line of the response
- self.version = _UNKNOWN # HTTP-Version
- self.status = _UNKNOWN # Status-Code
- self.reason = _UNKNOWN # Reason-Phrase
-
- self.chunked = _UNKNOWN # is "chunked" being used?
- self.chunk_left = _UNKNOWN # bytes left to read in current chunk
- self.length = _UNKNOWN # number of bytes left in response
- self.will_close = _UNKNOWN # conn will close at end of response
-
- def _read_status(self):
- line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
- if len(line) > _MAXLINE:
- raise LineTooLong("status line")
- if self.debuglevel > 0:
- print("reply:", repr(line))
- if not line:
- # Presumably, the server closed the connection before
- # sending a valid response.
- raise BadStatusLine(line)
- try:
- version, status, reason = line.split(None, 2)
- except ValueError:
- try:
- version, status = line.split(None, 1)
- reason = ""
- except ValueError:
- # empty version will cause next test to fail.
- version = ""
- if not version.startswith("HTTP/"):
- self._close_conn()
- raise BadStatusLine(line)
-
- # The status code is a three-digit number
- try:
- status = int(status)
- if status < 100 or status > 999:
- raise BadStatusLine(line)
- except ValueError:
- raise BadStatusLine(line)
- return version, status, reason
-
- def begin(self):
- if self.headers is not None:
- # we've already started reading the response
- return
-
- # read until we get a non-100 response
- while True:
- version, status, reason = self._read_status()
- if status != CONTINUE:
- break
- # skip the header from the 100 response
- while True:
- skip = self.fp.readline(_MAXLINE + 1)
- if len(skip) > _MAXLINE:
- raise LineTooLong("header line")
- skip = skip.strip()
- if not skip:
- break
- if self.debuglevel > 0:
- print("header:", skip)
-
- self.code = self.status = status
- self.reason = reason.strip()
- if version in ("HTTP/1.0", "HTTP/0.9"):
- # Some servers might still return "0.9", treat it as 1.0 anyway
- self.version = 10
- elif version.startswith("HTTP/1."):
- self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
- else:
- raise UnknownProtocol(version)
-
- self.headers = self.msg = parse_headers(self.fp)
-
- if self.debuglevel > 0:
- for hdr in self.headers:
- print("header:", hdr, end=" ")
-
- # are we using the chunked-style of transfer encoding?
- tr_enc = self.headers.get("transfer-encoding")
- if tr_enc and tr_enc.lower() == "chunked":
- self.chunked = True
- self.chunk_left = None
- else:
- self.chunked = False
-
- # will the connection close at the end of the response?
- self.will_close = self._check_close()
-
- # do we have a Content-Length?
- # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
- self.length = None
- length = self.headers.get("content-length")
-
- # are we using the chunked-style of transfer encoding?
- tr_enc = self.headers.get("transfer-encoding")
- if length and not self.chunked:
- try:
- self.length = int(length)
- except ValueError:
- self.length = None
- else:
- if self.length < 0: # ignore nonsensical negative lengths
- self.length = None
- else:
- self.length = None
-
- # does the body have a fixed length? (of zero)
- if (status == NO_CONTENT or status == NOT_MODIFIED or
- 100 <= status < 200 or # 1xx codes
- self._method == "HEAD"):
- self.length = 0
-
- # if the connection remains open, and we aren't using chunked, and
- # a content-length was not provided, then assume that the connection
- # WILL close.
- if (not self.will_close and
- not self.chunked and
- self.length is None):
- self.will_close = True
-
- def _check_close(self):
- conn = self.headers.get("connection")
- if self.version == 11:
- # An HTTP/1.1 proxy is assumed to stay open unless
- # explicitly closed.
- conn = self.headers.get("connection")
- if conn and "close" in conn.lower():
- return True
- return False
-
- # Some HTTP/1.0 implementations have support for persistent
- # connections, using rules different than HTTP/1.1.
-
- # For older HTTP, Keep-Alive indicates persistent connection.
- if self.headers.get("keep-alive"):
- return False
-
- # At least Akamai returns a "Connection: Keep-Alive" header,
- # which was supposed to be sent by the client.
- if conn and "keep-alive" in conn.lower():
- return False
-
- # Proxy-Connection is a netscape hack.
- pconn = self.headers.get("proxy-connection")
- if pconn and "keep-alive" in pconn.lower():
- return False
-
- # otherwise, assume it will close
- return True
-
- def _close_conn(self):
- fp = self.fp
- self.fp = None
- fp.close()
-
- def close(self):
- super().close() # set "closed" flag
- if self.fp:
- self._close_conn()
-
- # These implementations are for the benefit of io.BufferedReader.
-
- # XXX This class should probably be revised to act more like
- # the "raw stream" that BufferedReader expects.
-
- def flush(self):
- super().flush()
- if self.fp:
- self.fp.flush()
-
- def readable(self):
- return True
-
- # End of "raw stream" methods
-
- def isclosed(self):
- """True if the connection is closed."""
- # NOTE: it is possible that we will not ever call self.close(). This
- # case occurs when will_close is TRUE, length is None, and we
- # read up to the last byte, but NOT past it.
- #
- # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
- # called, meaning self.isclosed() is meaningful.
- return self.fp is None
-
- def read(self, amt=None):
- if self.fp is None:
- return bytes(b"")
-
- if self._method == "HEAD":
- self._close_conn()
- return bytes(b"")
-
- if amt is not None:
- # Amount is given, so call base class version
- # (which is implemented in terms of self.readinto)
- return bytes(super(HTTPResponse, self).read(amt))
- else:
- # Amount is not given (unbounded read) so we must check self.length
- # and self.chunked
-
- if self.chunked:
- return self._readall_chunked()
-
- if self.length is None:
- s = self.fp.read()
- else:
- try:
- s = self._safe_read(self.length)
- except IncompleteRead:
- self._close_conn()
- raise
- self.length = 0
- self._close_conn() # we read everything
- return bytes(s)
-
- def readinto(self, b):
- if self.fp is None:
- return 0
-
- if self._method == "HEAD":
- self._close_conn()
- return 0
-
- if self.chunked:
- return self._readinto_chunked(b)
-
- if self.length is not None:
- if len(b) > self.length:
- # clip the read to the "end of response"
- b = memoryview(b)[0:self.length]
-
- # we do not use _safe_read() here because this may be a .will_close
- # connection, and the user is reading more bytes than will be provided
- # (for example, reading in 1k chunks)
-
- if PY2:
- data = self.fp.read(len(b))
- n = len(data)
- b[:n] = data
- else:
- n = self.fp.readinto(b)
-
- if not n and b:
- # Ideally, we would raise IncompleteRead if the content-length
- # wasn't satisfied, but it might break compatibility.
- self._close_conn()
- elif self.length is not None:
- self.length -= n
- if not self.length:
- self._close_conn()
- return n
-
- def _read_next_chunk_size(self):
- # Read the next chunk size from the file
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("chunk size")
- i = line.find(b";")
- if i >= 0:
- line = line[:i] # strip chunk-extensions
- try:
- return int(line, 16)
- except ValueError:
- # close the connection as protocol synchronisation is
- # probably lost
- self._close_conn()
- raise
-
- def _read_and_discard_trailer(self):
- # read and discard trailer up to the CRLF terminator
- ### note: we shouldn't have any trailers!
- while True:
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("trailer line")
- if not line:
- # a vanishingly small number of sites EOF without
- # sending the trailer
- break
- if line in (b'\r\n', b'\n', b''):
- break
-
- def _readall_chunked(self):
- assert self.chunked != _UNKNOWN
- chunk_left = self.chunk_left
- value = []
- while True:
- if chunk_left is None:
- try:
- chunk_left = self._read_next_chunk_size()
- if chunk_left == 0:
- break
- except ValueError:
- raise IncompleteRead(bytes(b'').join(value))
- value.append(self._safe_read(chunk_left))
-
- # we read the whole chunk, get another
- self._safe_read(2) # toss the CRLF at the end of the chunk
- chunk_left = None
-
- self._read_and_discard_trailer()
-
- # we read everything; close the "file"
- self._close_conn()
-
- return bytes(b'').join(value)
-
- def _readinto_chunked(self, b):
- assert self.chunked != _UNKNOWN
- chunk_left = self.chunk_left
-
- total_bytes = 0
- mvb = memoryview(b)
- while True:
- if chunk_left is None:
- try:
- chunk_left = self._read_next_chunk_size()
- if chunk_left == 0:
- break
- except ValueError:
- raise IncompleteRead(bytes(b[0:total_bytes]))
-
- if len(mvb) < chunk_left:
- n = self._safe_readinto(mvb)
- self.chunk_left = chunk_left - n
- return total_bytes + n
- elif len(mvb) == chunk_left:
- n = self._safe_readinto(mvb)
- self._safe_read(2) # toss the CRLF at the end of the chunk
- self.chunk_left = None
- return total_bytes + n
- else:
- temp_mvb = mvb[0:chunk_left]
- n = self._safe_readinto(temp_mvb)
- mvb = mvb[n:]
- total_bytes += n
-
- # we read the whole chunk, get another
- self._safe_read(2) # toss the CRLF at the end of the chunk
- chunk_left = None
-
- self._read_and_discard_trailer()
-
- # we read everything; close the "file"
- self._close_conn()
-
- return total_bytes
-
- def _safe_read(self, amt):
- """Read the number of bytes requested, compensating for partial reads.
-
- Normally, we have a blocking socket, but a read() can be interrupted
- by a signal (resulting in a partial read).
-
- Note that we cannot distinguish between EOF and an interrupt when zero
- bytes have been read. IncompleteRead() will be raised in this
- situation.
-
- This function should be used when <amt> bytes "should" be present for
- reading. If the bytes are truly not available (due to EOF), then the
- IncompleteRead exception can be used to detect the problem.
- """
- s = []
- while amt > 0:
- chunk = self.fp.read(min(amt, MAXAMOUNT))
- if not chunk:
- raise IncompleteRead(bytes(b'').join(s), amt)
- s.append(chunk)
- amt -= len(chunk)
- return bytes(b"").join(s)
-
- def _safe_readinto(self, b):
- """Same as _safe_read, but for reading into a buffer."""
- total_bytes = 0
- mvb = memoryview(b)
- while total_bytes < len(b):
- if MAXAMOUNT < len(mvb):
- temp_mvb = mvb[0:MAXAMOUNT]
+__all__ = ["HTTPResponse", "HTTPConnection",
+ "HTTPException", "NotConnected", "UnknownProtocol",
+ "UnknownTransferEncoding", "UnimplementedFileMode",
+ "IncompleteRead", "InvalidURL", "ImproperConnectionState",
+ "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
+ "BadStatusLine", "error", "responses"]
+
+HTTP_PORT = 80
+HTTPS_PORT = 443
+
+_UNKNOWN = 'UNKNOWN'
+
+# connection states
+_CS_IDLE = 'Idle'
+_CS_REQ_STARTED = 'Request-started'
+_CS_REQ_SENT = 'Request-sent'
+
+# status codes
+# informational
+CONTINUE = 100
+SWITCHING_PROTOCOLS = 101
+PROCESSING = 102
+
+# successful
+OK = 200
+CREATED = 201
+ACCEPTED = 202
+NON_AUTHORITATIVE_INFORMATION = 203
+NO_CONTENT = 204
+RESET_CONTENT = 205
+PARTIAL_CONTENT = 206
+MULTI_STATUS = 207
+IM_USED = 226
+
+# redirection
+MULTIPLE_CHOICES = 300
+MOVED_PERMANENTLY = 301
+FOUND = 302
+SEE_OTHER = 303
+NOT_MODIFIED = 304
+USE_PROXY = 305
+TEMPORARY_REDIRECT = 307
+
+# client error
+BAD_REQUEST = 400
+UNAUTHORIZED = 401
+PAYMENT_REQUIRED = 402
+FORBIDDEN = 403
+NOT_FOUND = 404
+METHOD_NOT_ALLOWED = 405
+NOT_ACCEPTABLE = 406
+PROXY_AUTHENTICATION_REQUIRED = 407
+REQUEST_TIMEOUT = 408
+CONFLICT = 409
+GONE = 410
+LENGTH_REQUIRED = 411
+PRECONDITION_FAILED = 412
+REQUEST_ENTITY_TOO_LARGE = 413
+REQUEST_URI_TOO_LONG = 414
+UNSUPPORTED_MEDIA_TYPE = 415
+REQUESTED_RANGE_NOT_SATISFIABLE = 416
+EXPECTATION_FAILED = 417
+UNPROCESSABLE_ENTITY = 422
+LOCKED = 423
+FAILED_DEPENDENCY = 424
+UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431
+
+# server error
+INTERNAL_SERVER_ERROR = 500
+NOT_IMPLEMENTED = 501
+BAD_GATEWAY = 502
+SERVICE_UNAVAILABLE = 503
+GATEWAY_TIMEOUT = 504
+HTTP_VERSION_NOT_SUPPORTED = 505
+INSUFFICIENT_STORAGE = 507
+NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511
+
+# Mapping status codes to official W3C names
+responses = {
+ 100: 'Continue',
+ 101: 'Switching Protocols',
+
+ 200: 'OK',
+ 201: 'Created',
+ 202: 'Accepted',
+ 203: 'Non-Authoritative Information',
+ 204: 'No Content',
+ 205: 'Reset Content',
+ 206: 'Partial Content',
+
+ 300: 'Multiple Choices',
+ 301: 'Moved Permanently',
+ 302: 'Found',
+ 303: 'See Other',
+ 304: 'Not Modified',
+ 305: 'Use Proxy',
+ 306: '(Unused)',
+ 307: 'Temporary Redirect',
+
+ 400: 'Bad Request',
+ 401: 'Unauthorized',
+ 402: 'Payment Required',
+ 403: 'Forbidden',
+ 404: 'Not Found',
+ 405: 'Method Not Allowed',
+ 406: 'Not Acceptable',
+ 407: 'Proxy Authentication Required',
+ 408: 'Request Timeout',
+ 409: 'Conflict',
+ 410: 'Gone',
+ 411: 'Length Required',
+ 412: 'Precondition Failed',
+ 413: 'Request Entity Too Large',
+ 414: 'Request-URI Too Long',
+ 415: 'Unsupported Media Type',
+ 416: 'Requested Range Not Satisfiable',
+ 417: 'Expectation Failed',
+ 428: 'Precondition Required',
+ 429: 'Too Many Requests',
+ 431: 'Request Header Fields Too Large',
+
+ 500: 'Internal Server Error',
+ 501: 'Not Implemented',
+ 502: 'Bad Gateway',
+ 503: 'Service Unavailable',
+ 504: 'Gateway Timeout',
+ 505: 'HTTP Version Not Supported',
+ 511: 'Network Authentication Required',
+}
+
+# maximal amount of data to read at one time in _safe_read
+MAXAMOUNT = 1048576
+
+# maximal line length when calling readline().
+_MAXLINE = 65536
+_MAXHEADERS = 100
+
+
+class HTTPMessage(email_message.Message):
+ # XXX The only usage of this method is in
+ # http.server.CGIHTTPRequestHandler. Maybe move the code there so
+ # that it doesn't need to be part of the public API. The API has
+ # never been defined so this could cause backwards compatibility
+ # issues.
+
+ def getallmatchingheaders(self, name):
+ """Find all header lines matching a given header name.
+
+ Look through the list of headers and find all lines matching a given
+ header name (and their continuation lines). A list of the lines is
+ returned, without interpretation. If the header does not occur, an
+ empty list is returned. If the header occurs multiple times, all
+ occurrences are returned. Case is not important in the header name.
+
+ """
+ name = name.lower() + ':'
+ n = len(name)
+ lst = []
+ hit = 0
+ for line in self.keys():
+ if line[:n].lower() == name:
+ hit = 1
+ elif not line[:1].isspace():
+ hit = 0
+ if hit:
+ lst.append(line)
+ return lst
+
+def parse_headers(fp, _class=HTTPMessage):
+ """Parses only RFC2822 headers from a file pointer.
+
+ email Parser wants to see strings rather than bytes.
+ But a TextIOWrapper around self.rfile would buffer too many bytes
+ from the stream, bytes which we later need to read as bytes.
+ So we read the correct bytes here, as bytes, for email Parser
+ to parse.
+
+ """
+ headers = []
+ while True:
+ line = fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("header line")
+ headers.append(line)
+ if len(headers) > _MAXHEADERS:
+ raise HTTPException("got more than %d headers" % _MAXHEADERS)
+ if line in (b'\r\n', b'\n', b''):
+ break
+ hstring = bytes(b'').join(headers).decode('iso-8859-1')
+ return email_parser.Parser(_class=_class).parsestr(hstring)
+
+
+_strict_sentinel = object()
+
+class HTTPResponse(io.RawIOBase):
+
+ # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
+
+ # The bytes from the socket object are iso-8859-1 strings.
+ # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
+ # text following RFC 2047. The basic status line parsing only
+ # accepts iso-8859-1.
+
+ def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
+ # If the response includes a content-length header, we need to
+ # make sure that the client doesn't read more than the
+ # specified number of bytes. If it does, it will block until
+ # the server times out and closes the connection. This will
+ # happen if a self.fp.read() is done (without a size) whether
+ # self.fp is buffered or not. So, no self.fp.read() by
+ # clients unless they know what they are doing.
+ self.fp = sock.makefile("rb")
+ self.debuglevel = debuglevel
+ if strict is not _strict_sentinel:
+ warnings.warn("the 'strict' argument isn't supported anymore; "
+ "http.client now always assumes HTTP/1.x compliant servers.",
+ DeprecationWarning, 2)
+ self._method = method
+
+ # The HTTPResponse object is returned via urllib. The clients
+ # of http and urllib expect different attributes for the
+ # headers. headers is used here and supports urllib. msg is
+ # provided as a backwards compatibility layer for http
+ # clients.
+
+ self.headers = self.msg = None
+
+ # from the Status-Line of the response
+ self.version = _UNKNOWN # HTTP-Version
+ self.status = _UNKNOWN # Status-Code
+ self.reason = _UNKNOWN # Reason-Phrase
+
+ self.chunked = _UNKNOWN # is "chunked" being used?
+ self.chunk_left = _UNKNOWN # bytes left to read in current chunk
+ self.length = _UNKNOWN # number of bytes left in response
+ self.will_close = _UNKNOWN # conn will close at end of response
+
+ def _read_status(self):
+ line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
+ if len(line) > _MAXLINE:
+ raise LineTooLong("status line")
+ if self.debuglevel > 0:
+ print("reply:", repr(line))
+ if not line:
+ # Presumably, the server closed the connection before
+ # sending a valid response.
+ raise BadStatusLine(line)
+ try:
+ version, status, reason = line.split(None, 2)
+ except ValueError:
+ try:
+ version, status = line.split(None, 1)
+ reason = ""
+ except ValueError:
+ # empty version will cause next test to fail.
+ version = ""
+ if not version.startswith("HTTP/"):
+ self._close_conn()
+ raise BadStatusLine(line)
+
+ # The status code is a three-digit number
+ try:
+ status = int(status)
+ if status < 100 or status > 999:
+ raise BadStatusLine(line)
+ except ValueError:
+ raise BadStatusLine(line)
+ return version, status, reason
+
+ def begin(self):
+ if self.headers is not None:
+ # we've already started reading the response
+ return
+
+ # read until we get a non-100 response
+ while True:
+ version, status, reason = self._read_status()
+ if status != CONTINUE:
+ break
+ # skip the header from the 100 response
+ while True:
+ skip = self.fp.readline(_MAXLINE + 1)
+ if len(skip) > _MAXLINE:
+ raise LineTooLong("header line")
+ skip = skip.strip()
+ if not skip:
+ break
+ if self.debuglevel > 0:
+ print("header:", skip)
+
+ self.code = self.status = status
+ self.reason = reason.strip()
+ if version in ("HTTP/1.0", "HTTP/0.9"):
+ # Some servers might still return "0.9", treat it as 1.0 anyway
+ self.version = 10
+ elif version.startswith("HTTP/1."):
+ self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
+ else:
+ raise UnknownProtocol(version)
+
+ self.headers = self.msg = parse_headers(self.fp)
+
+ if self.debuglevel > 0:
+ for hdr in self.headers:
+ print("header:", hdr, end=" ")
+
+ # are we using the chunked-style of transfer encoding?
+ tr_enc = self.headers.get("transfer-encoding")
+ if tr_enc and tr_enc.lower() == "chunked":
+ self.chunked = True
+ self.chunk_left = None
+ else:
+ self.chunked = False
+
+ # will the connection close at the end of the response?
+ self.will_close = self._check_close()
+
+ # do we have a Content-Length?
+ # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
+ self.length = None
+ length = self.headers.get("content-length")
+
+ # are we using the chunked-style of transfer encoding?
+ tr_enc = self.headers.get("transfer-encoding")
+ if length and not self.chunked:
+ try:
+ self.length = int(length)
+ except ValueError:
+ self.length = None
+ else:
+ if self.length < 0: # ignore nonsensical negative lengths
+ self.length = None
+ else:
+ self.length = None
+
+ # does the body have a fixed length? (of zero)
+ if (status == NO_CONTENT or status == NOT_MODIFIED or
+ 100 <= status < 200 or # 1xx codes
+ self._method == "HEAD"):
+ self.length = 0
+
+ # if the connection remains open, and we aren't using chunked, and
+ # a content-length was not provided, then assume that the connection
+ # WILL close.
+ if (not self.will_close and
+ not self.chunked and
+ self.length is None):
+ self.will_close = True
+
+ def _check_close(self):
+ conn = self.headers.get("connection")
+ if self.version == 11:
+ # An HTTP/1.1 proxy is assumed to stay open unless
+ # explicitly closed.
+ conn = self.headers.get("connection")
+ if conn and "close" in conn.lower():
+ return True
+ return False
+
+ # Some HTTP/1.0 implementations have support for persistent
+ # connections, using rules different than HTTP/1.1.
+
+ # For older HTTP, Keep-Alive indicates persistent connection.
+ if self.headers.get("keep-alive"):
+ return False
+
+ # At least Akamai returns a "Connection: Keep-Alive" header,
+ # which was supposed to be sent by the client.
+ if conn and "keep-alive" in conn.lower():
+ return False
+
+ # Proxy-Connection is a netscape hack.
+ pconn = self.headers.get("proxy-connection")
+ if pconn and "keep-alive" in pconn.lower():
+ return False
+
+ # otherwise, assume it will close
+ return True
+
+ def _close_conn(self):
+ fp = self.fp
+ self.fp = None
+ fp.close()
+
+ def close(self):
+ super().close() # set "closed" flag
+ if self.fp:
+ self._close_conn()
+
+ # These implementations are for the benefit of io.BufferedReader.
+
+ # XXX This class should probably be revised to act more like
+ # the "raw stream" that BufferedReader expects.
+
+ def flush(self):
+ super().flush()
+ if self.fp:
+ self.fp.flush()
+
+ def readable(self):
+ return True
+
+ # End of "raw stream" methods
+
+ def isclosed(self):
+ """True if the connection is closed."""
+ # NOTE: it is possible that we will not ever call self.close(). This
+ # case occurs when will_close is TRUE, length is None, and we
+ # read up to the last byte, but NOT past it.
+ #
+ # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
+ # called, meaning self.isclosed() is meaningful.
+ return self.fp is None
+
+ def read(self, amt=None):
+ if self.fp is None:
+ return bytes(b"")
+
+ if self._method == "HEAD":
+ self._close_conn()
+ return bytes(b"")
+
+ if amt is not None:
+ # Amount is given, so call base class version
+ # (which is implemented in terms of self.readinto)
+ return bytes(super(HTTPResponse, self).read(amt))
+ else:
+ # Amount is not given (unbounded read) so we must check self.length
+ # and self.chunked
+
+ if self.chunked:
+ return self._readall_chunked()
+
+ if self.length is None:
+ s = self.fp.read()
+ else:
+ try:
+ s = self._safe_read(self.length)
+ except IncompleteRead:
+ self._close_conn()
+ raise
+ self.length = 0
+ self._close_conn() # we read everything
+ return bytes(s)
+
+ def readinto(self, b):
+ if self.fp is None:
+ return 0
+
+ if self._method == "HEAD":
+ self._close_conn()
+ return 0
+
+ if self.chunked:
+ return self._readinto_chunked(b)
+
+ if self.length is not None:
+ if len(b) > self.length:
+ # clip the read to the "end of response"
+ b = memoryview(b)[0:self.length]
+
+ # we do not use _safe_read() here because this may be a .will_close
+ # connection, and the user is reading more bytes than will be provided
+ # (for example, reading in 1k chunks)
+
+ if PY2:
+ data = self.fp.read(len(b))
+ n = len(data)
+ b[:n] = data
+ else:
+ n = self.fp.readinto(b)
+
+ if not n and b:
+ # Ideally, we would raise IncompleteRead if the content-length
+ # wasn't satisfied, but it might break compatibility.
+ self._close_conn()
+ elif self.length is not None:
+ self.length -= n
+ if not self.length:
+ self._close_conn()
+ return n
+
+ def _read_next_chunk_size(self):
+ # Read the next chunk size from the file
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("chunk size")
+ i = line.find(b";")
+ if i >= 0:
+ line = line[:i] # strip chunk-extensions
+ try:
+ return int(line, 16)
+ except ValueError:
+ # close the connection as protocol synchronisation is
+ # probably lost
+ self._close_conn()
+ raise
+
+ def _read_and_discard_trailer(self):
+ # read and discard trailer up to the CRLF terminator
+ ### note: we shouldn't have any trailers!
+ while True:
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("trailer line")
+ if not line:
+ # a vanishingly small number of sites EOF without
+ # sending the trailer
+ break
+ if line in (b'\r\n', b'\n', b''):
+ break
+
+ def _readall_chunked(self):
+ assert self.chunked != _UNKNOWN
+ chunk_left = self.chunk_left
+ value = []
+ while True:
+ if chunk_left is None:
+ try:
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
+ except ValueError:
+ raise IncompleteRead(bytes(b'').join(value))
+ value.append(self._safe_read(chunk_left))
+
+ # we read the whole chunk, get another
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ chunk_left = None
+
+ self._read_and_discard_trailer()
+
+ # we read everything; close the "file"
+ self._close_conn()
+
+ return bytes(b'').join(value)
+
+ def _readinto_chunked(self, b):
+ assert self.chunked != _UNKNOWN
+ chunk_left = self.chunk_left
+
+ total_bytes = 0
+ mvb = memoryview(b)
+ while True:
+ if chunk_left is None:
+ try:
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
+ except ValueError:
+ raise IncompleteRead(bytes(b[0:total_bytes]))
+
+ if len(mvb) < chunk_left:
+ n = self._safe_readinto(mvb)
+ self.chunk_left = chunk_left - n
+ return total_bytes + n
+ elif len(mvb) == chunk_left:
+ n = self._safe_readinto(mvb)
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ self.chunk_left = None
+ return total_bytes + n
+ else:
+ temp_mvb = mvb[0:chunk_left]
+ n = self._safe_readinto(temp_mvb)
+ mvb = mvb[n:]
+ total_bytes += n
+
+ # we read the whole chunk, get another
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ chunk_left = None
+
+ self._read_and_discard_trailer()
+
+ # we read everything; close the "file"
+ self._close_conn()
+
+ return total_bytes
+
+ def _safe_read(self, amt):
+ """Read the number of bytes requested, compensating for partial reads.
+
+ Normally, we have a blocking socket, but a read() can be interrupted
+ by a signal (resulting in a partial read).
+
+ Note that we cannot distinguish between EOF and an interrupt when zero
+ bytes have been read. IncompleteRead() will be raised in this
+ situation.
+
+ This function should be used when <amt> bytes "should" be present for
+ reading. If the bytes are truly not available (due to EOF), then the
+ IncompleteRead exception can be used to detect the problem.
+ """
+ s = []
+ while amt > 0:
+ chunk = self.fp.read(min(amt, MAXAMOUNT))
+ if not chunk:
+ raise IncompleteRead(bytes(b'').join(s), amt)
+ s.append(chunk)
+ amt -= len(chunk)
+ return bytes(b"").join(s)
+
+ def _safe_readinto(self, b):
+ """Same as _safe_read, but for reading into a buffer."""
+ total_bytes = 0
+ mvb = memoryview(b)
+ while total_bytes < len(b):
+ if MAXAMOUNT < len(mvb):
+ temp_mvb = mvb[0:MAXAMOUNT]
if PY2:
data = self.fp.read(len(temp_mvb))
n = len(data)
temp_mvb[:n] = data
else:
n = self.fp.readinto(temp_mvb)
- else:
+ else:
if PY2:
data = self.fp.read(len(mvb))
n = len(data)
mvb[:n] = data
else:
n = self.fp.readinto(mvb)
- if not n:
- raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
- mvb = mvb[n:]
- total_bytes += n
- return total_bytes
-
- def fileno(self):
- return self.fp.fileno()
-
- def getheader(self, name, default=None):
- if self.headers is None:
- raise ResponseNotReady()
- headers = self.headers.get_all(name) or default
- if isinstance(headers, str) or not hasattr(headers, '__iter__'):
- return headers
- else:
- return ', '.join(headers)
-
- def getheaders(self):
- """Return list of (header, value) tuples."""
- if self.headers is None:
- raise ResponseNotReady()
- return list(self.headers.items())
-
- # We override IOBase.__iter__ so that it doesn't check for closed-ness
-
- def __iter__(self):
- return self
-
- # For compatibility with old-style urllib responses.
-
- def info(self):
- return self.headers
-
- def geturl(self):
- return self.url
-
- def getcode(self):
- return self.status
-
-class HTTPConnection(object):
-
- _http_vsn = 11
- _http_vsn_str = 'HTTP/1.1'
-
- response_class = HTTPResponse
- default_port = HTTP_PORT
- auto_open = 1
- debuglevel = 0
-
- def __init__(self, host, port=None, strict=_strict_sentinel,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
- if strict is not _strict_sentinel:
- warnings.warn("the 'strict' argument isn't supported anymore; "
- "http.client now always assumes HTTP/1.x compliant servers.",
- DeprecationWarning, 2)
- self.timeout = timeout
- self.source_address = source_address
- self.sock = None
- self._buffer = []
- self.__response = None
- self.__state = _CS_IDLE
- self._method = None
- self._tunnel_host = None
- self._tunnel_port = None
- self._tunnel_headers = {}
-
- self._set_hostport(host, port)
-
- def set_tunnel(self, host, port=None, headers=None):
- """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
-
- The headers argument should be a mapping of extra HTTP headers
- to send with the CONNECT request.
- """
- self._tunnel_host = host
- self._tunnel_port = port
- if headers:
- self._tunnel_headers = headers
- else:
- self._tunnel_headers.clear()
-
- def _set_hostport(self, host, port):
- if port is None:
- i = host.rfind(':')
- j = host.rfind(']') # ipv6 addresses have [...]
- if i > j:
- try:
- port = int(host[i+1:])
- except ValueError:
- if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
- port = self.default_port
- else:
- raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
- host = host[:i]
- else:
- port = self.default_port
- if host and host[0] == '[' and host[-1] == ']':
- host = host[1:-1]
- self.host = host
- self.port = port
-
- def set_debuglevel(self, level):
- self.debuglevel = level
-
- def _tunnel(self):
- self._set_hostport(self._tunnel_host, self._tunnel_port)
- connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
- connect_bytes = connect_str.encode("ascii")
- self.send(connect_bytes)
- for header, value in self._tunnel_headers.items():
- header_str = "%s: %s\r\n" % (header, value)
- header_bytes = header_str.encode("latin-1")
- self.send(header_bytes)
- self.send(bytes(b'\r\n'))
-
- response = self.response_class(self.sock, method=self._method)
- (version, code, message) = response._read_status()
-
- if code != 200:
- self.close()
- raise socket.error("Tunnel connection failed: %d %s" % (code,
- message.strip()))
- while True:
- line = response.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("header line")
- if not line:
- # for sites which EOF without sending a trailer
- break
- if line in (b'\r\n', b'\n', b''):
- break
-
- def connect(self):
- """Connect to the host and port specified in __init__."""
- self.sock = socket_create_connection((self.host,self.port),
- self.timeout, self.source_address)
- if self._tunnel_host:
- self._tunnel()
-
- def close(self):
- """Close the connection to the HTTP server."""
- if self.sock:
- self.sock.close() # close it manually... there may be other refs
- self.sock = None
- if self.__response:
- self.__response.close()
- self.__response = None
- self.__state = _CS_IDLE
-
- def send(self, data):
- """Send `data' to the server.
- ``data`` can be a string object, a bytes object, an array object, a
- file-like object that supports a .read() method, or an iterable object.
- """
-
- if self.sock is None:
- if self.auto_open:
- self.connect()
- else:
- raise NotConnected()
-
- if self.debuglevel > 0:
- print("send:", repr(data))
- blocksize = 8192
- # Python 2.7 array objects have a read method which is incompatible
- # with the 2-arg calling syntax below.
- if hasattr(data, "read") and not isinstance(data, array):
- if self.debuglevel > 0:
- print("sendIng a read()able")
- encode = False
- try:
- mode = data.mode
- except AttributeError:
- # io.BytesIO and other file-like objects don't have a `mode`
- # attribute.
- pass
- else:
- if "b" not in mode:
- encode = True
- if self.debuglevel > 0:
- print("encoding file using iso-8859-1")
- while 1:
- datablock = data.read(blocksize)
- if not datablock:
- break
- if encode:
- datablock = datablock.encode("iso-8859-1")
- self.sock.sendall(datablock)
- return
- try:
- self.sock.sendall(data)
- except TypeError:
+ if not n:
+ raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
+ mvb = mvb[n:]
+ total_bytes += n
+ return total_bytes
+
+ def fileno(self):
+ return self.fp.fileno()
+
+ def getheader(self, name, default=None):
+ if self.headers is None:
+ raise ResponseNotReady()
+ headers = self.headers.get_all(name) or default
+ if isinstance(headers, str) or not hasattr(headers, '__iter__'):
+ return headers
+ else:
+ return ', '.join(headers)
+
+ def getheaders(self):
+ """Return list of (header, value) tuples."""
+ if self.headers is None:
+ raise ResponseNotReady()
+ return list(self.headers.items())
+
+ # We override IOBase.__iter__ so that it doesn't check for closed-ness
+
+ def __iter__(self):
+ return self
+
+ # For compatibility with old-style urllib responses.
+
+ def info(self):
+ return self.headers
+
+ def geturl(self):
+ return self.url
+
+ def getcode(self):
+ return self.status
+
+class HTTPConnection(object):
+
+ _http_vsn = 11
+ _http_vsn_str = 'HTTP/1.1'
+
+ response_class = HTTPResponse
+ default_port = HTTP_PORT
+ auto_open = 1
+ debuglevel = 0
+
+ def __init__(self, host, port=None, strict=_strict_sentinel,
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ if strict is not _strict_sentinel:
+ warnings.warn("the 'strict' argument isn't supported anymore; "
+ "http.client now always assumes HTTP/1.x compliant servers.",
+ DeprecationWarning, 2)
+ self.timeout = timeout
+ self.source_address = source_address
+ self.sock = None
+ self._buffer = []
+ self.__response = None
+ self.__state = _CS_IDLE
+ self._method = None
+ self._tunnel_host = None
+ self._tunnel_port = None
+ self._tunnel_headers = {}
+
+ self._set_hostport(host, port)
+
+ def set_tunnel(self, host, port=None, headers=None):
+ """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
+
+ The headers argument should be a mapping of extra HTTP headers
+ to send with the CONNECT request.
+ """
+ self._tunnel_host = host
+ self._tunnel_port = port
+ if headers:
+ self._tunnel_headers = headers
+ else:
+ self._tunnel_headers.clear()
+
+ def _set_hostport(self, host, port):
+ if port is None:
+ i = host.rfind(':')
+ j = host.rfind(']') # ipv6 addresses have [...]
+ if i > j:
+ try:
+ port = int(host[i+1:])
+ except ValueError:
+ if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
+ port = self.default_port
+ else:
+ raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
+ host = host[:i]
+ else:
+ port = self.default_port
+ if host and host[0] == '[' and host[-1] == ']':
+ host = host[1:-1]
+ self.host = host
+ self.port = port
+
+ def set_debuglevel(self, level):
+ self.debuglevel = level
+
+ def _tunnel(self):
+ self._set_hostport(self._tunnel_host, self._tunnel_port)
+ connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
+ connect_bytes = connect_str.encode("ascii")
+ self.send(connect_bytes)
+ for header, value in self._tunnel_headers.items():
+ header_str = "%s: %s\r\n" % (header, value)
+ header_bytes = header_str.encode("latin-1")
+ self.send(header_bytes)
+ self.send(bytes(b'\r\n'))
+
+ response = self.response_class(self.sock, method=self._method)
+ (version, code, message) = response._read_status()
+
+ if code != 200:
+ self.close()
+ raise socket.error("Tunnel connection failed: %d %s" % (code,
+ message.strip()))
+ while True:
+ line = response.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("header line")
+ if not line:
+ # for sites which EOF without sending a trailer
+ break
+ if line in (b'\r\n', b'\n', b''):
+ break
+
+ def connect(self):
+ """Connect to the host and port specified in __init__."""
+ self.sock = socket_create_connection((self.host,self.port),
+ self.timeout, self.source_address)
+ if self._tunnel_host:
+ self._tunnel()
+
+ def close(self):
+ """Close the connection to the HTTP server."""
+ if self.sock:
+ self.sock.close() # close it manually... there may be other refs
+ self.sock = None
+ if self.__response:
+ self.__response.close()
+ self.__response = None
+ self.__state = _CS_IDLE
+
+ def send(self, data):
+ """Send `data' to the server.
+ ``data`` can be a string object, a bytes object, an array object, a
+ file-like object that supports a .read() method, or an iterable object.
+ """
+
+ if self.sock is None:
+ if self.auto_open:
+ self.connect()
+ else:
+ raise NotConnected()
+
+ if self.debuglevel > 0:
+ print("send:", repr(data))
+ blocksize = 8192
+ # Python 2.7 array objects have a read method which is incompatible
+ # with the 2-arg calling syntax below.
+ if hasattr(data, "read") and not isinstance(data, array):
+ if self.debuglevel > 0:
+ print("sendIng a read()able")
+ encode = False
+ try:
+ mode = data.mode
+ except AttributeError:
+ # io.BytesIO and other file-like objects don't have a `mode`
+ # attribute.
+ pass
+ else:
+ if "b" not in mode:
+ encode = True
+ if self.debuglevel > 0:
+ print("encoding file using iso-8859-1")
+ while 1:
+ datablock = data.read(blocksize)
+ if not datablock:
+ break
+ if encode:
+ datablock = datablock.encode("iso-8859-1")
+ self.sock.sendall(datablock)
+ return
+ try:
+ self.sock.sendall(data)
+ except TypeError:
if isinstance(data, Iterable):
- for d in data:
- self.sock.sendall(d)
- else:
- raise TypeError("data should be a bytes-like object "
- "or an iterable, got %r" % type(data))
-
- def _output(self, s):
- """Add a line of output to the current request buffer.
-
- Assumes that the line does *not* end with \\r\\n.
- """
- self._buffer.append(s)
-
- def _send_output(self, message_body=None):
- """Send the currently buffered request and clear the buffer.
-
- Appends an extra \\r\\n to the buffer.
- A message_body may be specified, to be appended to the request.
- """
- self._buffer.extend((bytes(b""), bytes(b"")))
- msg = bytes(b"\r\n").join(self._buffer)
- del self._buffer[:]
- # If msg and message_body are sent in a single send() call,
- # it will avoid performance problems caused by the interaction
- # between delayed ack and the Nagle algorithm.
- if isinstance(message_body, bytes):
- msg += message_body
- message_body = None
- self.send(msg)
- if message_body is not None:
- # message_body was not a string (i.e. it is a file), and
- # we must run the risk of Nagle.
- self.send(message_body)
-
- def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
- """Send a request to the server.
-
- `method' specifies an HTTP request method, e.g. 'GET'.
- `url' specifies the object being requested, e.g. '/index.html'.
- `skip_host' if True does not add automatically a 'Host:' header
- `skip_accept_encoding' if True does not add automatically an
- 'Accept-Encoding:' header
- """
-
- # if a prior response has been completed, then forget about it.
- if self.__response and self.__response.isclosed():
- self.__response = None
-
-
- # in certain cases, we cannot issue another request on this connection.
- # this occurs when:
- # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
- # 2) a response to a previous request has signalled that it is going
- # to close the connection upon completion.
- # 3) the headers for the previous response have not been read, thus
- # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
- #
- # if there is no prior response, then we can request at will.
- #
- # if point (2) is true, then we will have passed the socket to the
- # response (effectively meaning, "there is no prior response"), and
- # will open a new one when a new request is made.
- #
- # Note: if a prior response exists, then we *can* start a new request.
- # We are not allowed to begin fetching the response to this new
- # request, however, until that prior response is complete.
- #
- if self.__state == _CS_IDLE:
- self.__state = _CS_REQ_STARTED
- else:
- raise CannotSendRequest(self.__state)
-
- # Save the method we use, we need it later in the response phase
- self._method = method
- if not url:
- url = '/'
- request = '%s %s %s' % (method, url, self._http_vsn_str)
-
- # Non-ASCII characters should have been eliminated earlier
- self._output(request.encode('ascii'))
-
- if self._http_vsn == 11:
- # Issue some standard headers for better HTTP/1.1 compliance
-
- if not skip_host:
- # this header is issued *only* for HTTP/1.1
- # connections. more specifically, this means it is
- # only issued when the client uses the new
- # HTTPConnection() class. backwards-compat clients
- # will be using HTTP/1.0 and those clients may be
- # issuing this header themselves. we should NOT issue
- # it twice; some web servers (such as Apache) barf
- # when they see two Host: headers
-
- # If we need a non-standard port,include it in the
- # header. If the request is going through a proxy,
- # but the host of the actual URL, not the host of the
- # proxy.
-
- netloc = ''
- if url.startswith('http'):
- nil, netloc, nil, nil, nil = urlsplit(url)
-
- if netloc:
- try:
- netloc_enc = netloc.encode("ascii")
- except UnicodeEncodeError:
- netloc_enc = netloc.encode("idna")
- self.putheader('Host', netloc_enc)
- else:
- try:
- host_enc = self.host.encode("ascii")
- except UnicodeEncodeError:
- host_enc = self.host.encode("idna")
-
- # As per RFC 273, IPv6 address should be wrapped with []
- # when used as Host header
-
- if self.host.find(':') >= 0:
- host_enc = bytes(b'[' + host_enc + b']')
-
- if self.port == self.default_port:
- self.putheader('Host', host_enc)
- else:
- host_enc = host_enc.decode("ascii")
- self.putheader('Host', "%s:%s" % (host_enc, self.port))
-
- # note: we are assuming that clients will not attempt to set these
- # headers since *this* library must deal with the
- # consequences. this also means that when the supporting
- # libraries are updated to recognize other forms, then this
- # code should be changed (removed or updated).
-
- # we only want a Content-Encoding of "identity" since we don't
- # support encodings such as x-gzip or x-deflate.
- if not skip_accept_encoding:
- self.putheader('Accept-Encoding', 'identity')
-
- # we can accept "chunked" Transfer-Encodings, but no others
- # NOTE: no TE header implies *only* "chunked"
- #self.putheader('TE', 'chunked')
-
- # if TE is supplied in the header, then it must appear in a
- # Connection header.
- #self.putheader('Connection', 'TE')
-
- else:
- # For HTTP/1.0, the server will assume "not chunked"
- pass
-
- def putheader(self, header, *values):
- """Send a request header line to the server.
-
- For example: h.putheader('Accept', 'text/html')
- """
- if self.__state != _CS_REQ_STARTED:
- raise CannotSendHeader()
-
- if hasattr(header, 'encode'):
- header = header.encode('ascii')
- values = list(values)
- for i, one_value in enumerate(values):
- if hasattr(one_value, 'encode'):
- values[i] = one_value.encode('latin-1')
- elif isinstance(one_value, int):
- values[i] = str(one_value).encode('ascii')
- value = bytes(b'\r\n\t').join(values)
- header = header + bytes(b': ') + value
- self._output(header)
-
- def endheaders(self, message_body=None):
- """Indicate that the last header line has been sent to the server.
-
- This method sends the request to the server. The optional message_body
- argument can be used to pass a message body associated with the
- request. The message body will be sent in the same packet as the
- message headers if it is a string, otherwise it is sent as a separate
- packet.
- """
- if self.__state == _CS_REQ_STARTED:
- self.__state = _CS_REQ_SENT
- else:
- raise CannotSendHeader()
- self._send_output(message_body)
-
- def request(self, method, url, body=None, headers={}):
- """Send a complete request to the server."""
- self._send_request(method, url, body, headers)
-
- def _set_content_length(self, body):
- # Set the content-length based on the body.
- thelen = None
- try:
- thelen = str(len(body))
- except TypeError as te:
- # If this is a file-like object, try to
- # fstat its file descriptor
- try:
- thelen = str(os.fstat(body.fileno()).st_size)
- except (AttributeError, OSError):
- # Don't send a length if this failed
- if self.debuglevel > 0: print("Cannot stat!!")
-
- if thelen is not None:
- self.putheader('Content-Length', thelen)
-
- def _send_request(self, method, url, body, headers):
- # Honor explicitly requested Host: and Accept-Encoding: headers.
- header_names = dict.fromkeys([k.lower() for k in headers])
- skips = {}
- if 'host' in header_names:
- skips['skip_host'] = 1
- if 'accept-encoding' in header_names:
- skips['skip_accept_encoding'] = 1
-
- self.putrequest(method, url, **skips)
-
- if body is not None and ('content-length' not in header_names):
- self._set_content_length(body)
- for hdr, value in headers.items():
- self.putheader(hdr, value)
- if isinstance(body, str):
- # RFC 2616 Section 3.7.1 says that text default has a
- # default charset of iso-8859-1.
- body = body.encode('iso-8859-1')
- self.endheaders(body)
-
- def getresponse(self):
- """Get the response from the server.
-
- If the HTTPConnection is in the correct state, returns an
- instance of HTTPResponse or of whatever object is returned by
- class the response_class variable.
-
- If a request has not been sent or if a previous response has
- not be handled, ResponseNotReady is raised. If the HTTP
- response indicates that the connection should be closed, then
- it will be closed before the response is returned. When the
- connection is closed, the underlying socket is closed.
- """
-
- # if a prior response has been completed, then forget about it.
- if self.__response and self.__response.isclosed():
- self.__response = None
-
- # if a prior response exists, then it must be completed (otherwise, we
- # cannot read this response's header to determine the connection-close
- # behavior)
- #
- # note: if a prior response existed, but was connection-close, then the
- # socket and response were made independent of this HTTPConnection
- # object since a new request requires that we open a whole new
- # connection
- #
- # this means the prior response had one of two states:
- # 1) will_close: this connection was reset and the prior socket and
- # response operate independently
- # 2) persistent: the response was retained and we await its
- # isclosed() status to become true.
- #
- if self.__state != _CS_REQ_SENT or self.__response:
- raise ResponseNotReady(self.__state)
-
- if self.debuglevel > 0:
- response = self.response_class(self.sock, self.debuglevel,
- method=self._method)
- else:
- response = self.response_class(self.sock, method=self._method)
-
- response.begin()
- assert response.will_close != _UNKNOWN
- self.__state = _CS_IDLE
-
- if response.will_close:
- # this effectively passes the connection to the response
- self.close()
- else:
- # remember this, so we can tell when it is complete
- self.__response = response
-
- return response
-
-try:
- import ssl
- from ssl import SSLContext
-except ImportError:
- pass
-else:
- class HTTPSConnection(HTTPConnection):
- "This class allows communication via SSL."
-
- default_port = HTTPS_PORT
-
- # XXX Should key_file and cert_file be deprecated in favour of context?
-
- def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- source_address=None, **_3to2kwargs):
- if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
- else: check_hostname = None
- if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
- else: context = None
- super(HTTPSConnection, self).__init__(host, port, strict, timeout,
- source_address)
- self.key_file = key_file
- self.cert_file = cert_file
- if context is None:
- # Some reasonable defaults
- context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
- context.options |= ssl.OP_NO_SSLv2
- will_verify = context.verify_mode != ssl.CERT_NONE
- if check_hostname is None:
- check_hostname = will_verify
- elif check_hostname and not will_verify:
- raise ValueError("check_hostname needs a SSL context with "
- "either CERT_OPTIONAL or CERT_REQUIRED")
- if key_file or cert_file:
- context.load_cert_chain(cert_file, key_file)
- self._context = context
- self._check_hostname = check_hostname
-
- def connect(self):
- "Connect to a host on a given (SSL) port."
-
- sock = socket_create_connection((self.host, self.port),
- self.timeout, self.source_address)
-
- if self._tunnel_host:
- self.sock = sock
- self._tunnel()
-
- server_hostname = self.host if ssl.HAS_SNI else None
- self.sock = self._context.wrap_socket(sock,
- server_hostname=server_hostname)
- try:
- if self._check_hostname:
- ssl.match_hostname(self.sock.getpeercert(), self.host)
- except Exception:
- self.sock.shutdown(socket.SHUT_RDWR)
- self.sock.close()
- raise
-
- __all__.append("HTTPSConnection")
-
-
- # ######################################
- # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
- # # doesn't exist in the Py2.7 stdlib
- # class HTTPSConnection(HTTPConnection):
- # "This class allows communication via SSL."
-
- # default_port = HTTPS_PORT
-
- # def __init__(self, host, port=None, key_file=None, cert_file=None,
- # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- # source_address=None):
- # HTTPConnection.__init__(self, host, port, strict, timeout,
- # source_address)
- # self.key_file = key_file
- # self.cert_file = cert_file
-
- # def connect(self):
- # "Connect to a host on a given (SSL) port."
-
- # sock = socket_create_connection((self.host, self.port),
- # self.timeout, self.source_address)
- # if self._tunnel_host:
- # self.sock = sock
- # self._tunnel()
- # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
-
- # __all__.append("HTTPSConnection")
- # ######################################
-
-
-class HTTPException(Exception):
- # Subclasses that define an __init__ must call Exception.__init__
- # or define self.args. Otherwise, str() will fail.
- pass
-
-class NotConnected(HTTPException):
- pass
-
-class InvalidURL(HTTPException):
- pass
-
-class UnknownProtocol(HTTPException):
- def __init__(self, version):
- self.args = version,
- self.version = version
-
-class UnknownTransferEncoding(HTTPException):
- pass
-
-class UnimplementedFileMode(HTTPException):
- pass
-
-class IncompleteRead(HTTPException):
- def __init__(self, partial, expected=None):
- self.args = partial,
- self.partial = partial
- self.expected = expected
- def __repr__(self):
- if self.expected is not None:
- e = ', %i more expected' % self.expected
- else:
- e = ''
- return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
- def __str__(self):
- return repr(self)
-
-class ImproperConnectionState(HTTPException):
- pass
-
-class CannotSendRequest(ImproperConnectionState):
- pass
-
-class CannotSendHeader(ImproperConnectionState):
- pass
-
-class ResponseNotReady(ImproperConnectionState):
- pass
-
-class BadStatusLine(HTTPException):
- def __init__(self, line):
- if not line:
- line = repr(line)
- self.args = line,
- self.line = line
-
-class LineTooLong(HTTPException):
- def __init__(self, line_type):
- HTTPException.__init__(self, "got more than %d bytes when reading %s"
- % (_MAXLINE, line_type))
-
-# for backwards compatibility
-error = HTTPException
+ for d in data:
+ self.sock.sendall(d)
+ else:
+ raise TypeError("data should be a bytes-like object "
+ "or an iterable, got %r" % type(data))
+
+ def _output(self, s):
+ """Add a line of output to the current request buffer.
+
+ Assumes that the line does *not* end with \\r\\n.
+ """
+ self._buffer.append(s)
+
+ def _send_output(self, message_body=None):
+ """Send the currently buffered request and clear the buffer.
+
+ Appends an extra \\r\\n to the buffer.
+ A message_body may be specified, to be appended to the request.
+ """
+ self._buffer.extend((bytes(b""), bytes(b"")))
+ msg = bytes(b"\r\n").join(self._buffer)
+ del self._buffer[:]
+ # If msg and message_body are sent in a single send() call,
+ # it will avoid performance problems caused by the interaction
+ # between delayed ack and the Nagle algorithm.
+ if isinstance(message_body, bytes):
+ msg += message_body
+ message_body = None
+ self.send(msg)
+ if message_body is not None:
+ # message_body was not a string (i.e. it is a file), and
+ # we must run the risk of Nagle.
+ self.send(message_body)
+
+ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
+ """Send a request to the server.
+
+ `method' specifies an HTTP request method, e.g. 'GET'.
+ `url' specifies the object being requested, e.g. '/index.html'.
+ `skip_host' if True does not add automatically a 'Host:' header
+ `skip_accept_encoding' if True does not add automatically an
+ 'Accept-Encoding:' header
+ """
+
+ # if a prior response has been completed, then forget about it.
+ if self.__response and self.__response.isclosed():
+ self.__response = None
+
+
+ # in certain cases, we cannot issue another request on this connection.
+ # this occurs when:
+ # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
+ # 2) a response to a previous request has signalled that it is going
+ # to close the connection upon completion.
+ # 3) the headers for the previous response have not been read, thus
+ # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
+ #
+ # if there is no prior response, then we can request at will.
+ #
+ # if point (2) is true, then we will have passed the socket to the
+ # response (effectively meaning, "there is no prior response"), and
+ # will open a new one when a new request is made.
+ #
+ # Note: if a prior response exists, then we *can* start a new request.
+ # We are not allowed to begin fetching the response to this new
+ # request, however, until that prior response is complete.
+ #
+ if self.__state == _CS_IDLE:
+ self.__state = _CS_REQ_STARTED
+ else:
+ raise CannotSendRequest(self.__state)
+
+ # Save the method we use, we need it later in the response phase
+ self._method = method
+ if not url:
+ url = '/'
+ request = '%s %s %s' % (method, url, self._http_vsn_str)
+
+ # Non-ASCII characters should have been eliminated earlier
+ self._output(request.encode('ascii'))
+
+ if self._http_vsn == 11:
+ # Issue some standard headers for better HTTP/1.1 compliance
+
+ if not skip_host:
+ # this header is issued *only* for HTTP/1.1
+ # connections. more specifically, this means it is
+ # only issued when the client uses the new
+ # HTTPConnection() class. backwards-compat clients
+ # will be using HTTP/1.0 and those clients may be
+ # issuing this header themselves. we should NOT issue
+ # it twice; some web servers (such as Apache) barf
+ # when they see two Host: headers
+
+ # If we need a non-standard port,include it in the
+ # header. If the request is going through a proxy,
+ # but the host of the actual URL, not the host of the
+ # proxy.
+
+ netloc = ''
+ if url.startswith('http'):
+ nil, netloc, nil, nil, nil = urlsplit(url)
+
+ if netloc:
+ try:
+ netloc_enc = netloc.encode("ascii")
+ except UnicodeEncodeError:
+ netloc_enc = netloc.encode("idna")
+ self.putheader('Host', netloc_enc)
+ else:
+ try:
+ host_enc = self.host.encode("ascii")
+ except UnicodeEncodeError:
+ host_enc = self.host.encode("idna")
+
+ # As per RFC 273, IPv6 address should be wrapped with []
+ # when used as Host header
+
+ if self.host.find(':') >= 0:
+ host_enc = bytes(b'[' + host_enc + b']')
+
+ if self.port == self.default_port:
+ self.putheader('Host', host_enc)
+ else:
+ host_enc = host_enc.decode("ascii")
+ self.putheader('Host', "%s:%s" % (host_enc, self.port))
+
+ # note: we are assuming that clients will not attempt to set these
+ # headers since *this* library must deal with the
+ # consequences. this also means that when the supporting
+ # libraries are updated to recognize other forms, then this
+ # code should be changed (removed or updated).
+
+ # we only want a Content-Encoding of "identity" since we don't
+ # support encodings such as x-gzip or x-deflate.
+ if not skip_accept_encoding:
+ self.putheader('Accept-Encoding', 'identity')
+
+ # we can accept "chunked" Transfer-Encodings, but no others
+ # NOTE: no TE header implies *only* "chunked"
+ #self.putheader('TE', 'chunked')
+
+ # if TE is supplied in the header, then it must appear in a
+ # Connection header.
+ #self.putheader('Connection', 'TE')
+
+ else:
+ # For HTTP/1.0, the server will assume "not chunked"
+ pass
+
+ def putheader(self, header, *values):
+ """Send a request header line to the server.
+
+ For example: h.putheader('Accept', 'text/html')
+ """
+ if self.__state != _CS_REQ_STARTED:
+ raise CannotSendHeader()
+
+ if hasattr(header, 'encode'):
+ header = header.encode('ascii')
+ values = list(values)
+ for i, one_value in enumerate(values):
+ if hasattr(one_value, 'encode'):
+ values[i] = one_value.encode('latin-1')
+ elif isinstance(one_value, int):
+ values[i] = str(one_value).encode('ascii')
+ value = bytes(b'\r\n\t').join(values)
+ header = header + bytes(b': ') + value
+ self._output(header)
+
+ def endheaders(self, message_body=None):
+ """Indicate that the last header line has been sent to the server.
+
+ This method sends the request to the server. The optional message_body
+ argument can be used to pass a message body associated with the
+ request. The message body will be sent in the same packet as the
+ message headers if it is a string, otherwise it is sent as a separate
+ packet.
+ """
+ if self.__state == _CS_REQ_STARTED:
+ self.__state = _CS_REQ_SENT
+ else:
+ raise CannotSendHeader()
+ self._send_output(message_body)
+
+ def request(self, method, url, body=None, headers={}):
+ """Send a complete request to the server."""
+ self._send_request(method, url, body, headers)
+
+ def _set_content_length(self, body):
+ # Set the content-length based on the body.
+ thelen = None
+ try:
+ thelen = str(len(body))
+ except TypeError as te:
+ # If this is a file-like object, try to
+ # fstat its file descriptor
+ try:
+ thelen = str(os.fstat(body.fileno()).st_size)
+ except (AttributeError, OSError):
+ # Don't send a length if this failed
+ if self.debuglevel > 0: print("Cannot stat!!")
+
+ if thelen is not None:
+ self.putheader('Content-Length', thelen)
+
+ def _send_request(self, method, url, body, headers):
+ # Honor explicitly requested Host: and Accept-Encoding: headers.
+ header_names = dict.fromkeys([k.lower() for k in headers])
+ skips = {}
+ if 'host' in header_names:
+ skips['skip_host'] = 1
+ if 'accept-encoding' in header_names:
+ skips['skip_accept_encoding'] = 1
+
+ self.putrequest(method, url, **skips)
+
+ if body is not None and ('content-length' not in header_names):
+ self._set_content_length(body)
+ for hdr, value in headers.items():
+ self.putheader(hdr, value)
+ if isinstance(body, str):
+ # RFC 2616 Section 3.7.1 says that text default has a
+ # default charset of iso-8859-1.
+ body = body.encode('iso-8859-1')
+ self.endheaders(body)
+
+ def getresponse(self):
+ """Get the response from the server.
+
+ If the HTTPConnection is in the correct state, returns an
+ instance of HTTPResponse or of whatever object is returned by
+ class the response_class variable.
+
+ If a request has not been sent or if a previous response has
+ not be handled, ResponseNotReady is raised. If the HTTP
+ response indicates that the connection should be closed, then
+ it will be closed before the response is returned. When the
+ connection is closed, the underlying socket is closed.
+ """
+
+ # if a prior response has been completed, then forget about it.
+ if self.__response and self.__response.isclosed():
+ self.__response = None
+
+ # if a prior response exists, then it must be completed (otherwise, we
+ # cannot read this response's header to determine the connection-close
+ # behavior)
+ #
+ # note: if a prior response existed, but was connection-close, then the
+ # socket and response were made independent of this HTTPConnection
+ # object since a new request requires that we open a whole new
+ # connection
+ #
+ # this means the prior response had one of two states:
+ # 1) will_close: this connection was reset and the prior socket and
+ # response operate independently
+ # 2) persistent: the response was retained and we await its
+ # isclosed() status to become true.
+ #
+ if self.__state != _CS_REQ_SENT or self.__response:
+ raise ResponseNotReady(self.__state)
+
+ if self.debuglevel > 0:
+ response = self.response_class(self.sock, self.debuglevel,
+ method=self._method)
+ else:
+ response = self.response_class(self.sock, method=self._method)
+
+ response.begin()
+ assert response.will_close != _UNKNOWN
+ self.__state = _CS_IDLE
+
+ if response.will_close:
+ # this effectively passes the connection to the response
+ self.close()
+ else:
+ # remember this, so we can tell when it is complete
+ self.__response = response
+
+ return response
+
+try:
+ import ssl
+ from ssl import SSLContext
+except ImportError:
+ pass
+else:
+ class HTTPSConnection(HTTPConnection):
+ "This class allows communication via SSL."
+
+ default_port = HTTPS_PORT
+
+ # XXX Should key_file and cert_file be deprecated in favour of context?
+
+ def __init__(self, host, port=None, key_file=None, cert_file=None,
+ strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, **_3to2kwargs):
+ if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
+ else: check_hostname = None
+ if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
+ else: context = None
+ super(HTTPSConnection, self).__init__(host, port, strict, timeout,
+ source_address)
+ self.key_file = key_file
+ self.cert_file = cert_file
+ if context is None:
+ # Some reasonable defaults
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ will_verify = context.verify_mode != ssl.CERT_NONE
+ if check_hostname is None:
+ check_hostname = will_verify
+ elif check_hostname and not will_verify:
+ raise ValueError("check_hostname needs a SSL context with "
+ "either CERT_OPTIONAL or CERT_REQUIRED")
+ if key_file or cert_file:
+ context.load_cert_chain(cert_file, key_file)
+ self._context = context
+ self._check_hostname = check_hostname
+
+ def connect(self):
+ "Connect to a host on a given (SSL) port."
+
+ sock = socket_create_connection((self.host, self.port),
+ self.timeout, self.source_address)
+
+ if self._tunnel_host:
+ self.sock = sock
+ self._tunnel()
+
+ server_hostname = self.host if ssl.HAS_SNI else None
+ self.sock = self._context.wrap_socket(sock,
+ server_hostname=server_hostname)
+ try:
+ if self._check_hostname:
+ ssl.match_hostname(self.sock.getpeercert(), self.host)
+ except Exception:
+ self.sock.shutdown(socket.SHUT_RDWR)
+ self.sock.close()
+ raise
+
+ __all__.append("HTTPSConnection")
+
+
+ # ######################################
+ # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
+ # # doesn't exist in the Py2.7 stdlib
+ # class HTTPSConnection(HTTPConnection):
+ # "This class allows communication via SSL."
+
+ # default_port = HTTPS_PORT
+
+ # def __init__(self, host, port=None, key_file=None, cert_file=None,
+ # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ # source_address=None):
+ # HTTPConnection.__init__(self, host, port, strict, timeout,
+ # source_address)
+ # self.key_file = key_file
+ # self.cert_file = cert_file
+
+ # def connect(self):
+ # "Connect to a host on a given (SSL) port."
+
+ # sock = socket_create_connection((self.host, self.port),
+ # self.timeout, self.source_address)
+ # if self._tunnel_host:
+ # self.sock = sock
+ # self._tunnel()
+ # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+
+ # __all__.append("HTTPSConnection")
+ # ######################################
+
+
+class HTTPException(Exception):
+ # Subclasses that define an __init__ must call Exception.__init__
+ # or define self.args. Otherwise, str() will fail.
+ pass
+
+class NotConnected(HTTPException):
+ pass
+
+class InvalidURL(HTTPException):
+ pass
+
+class UnknownProtocol(HTTPException):
+ def __init__(self, version):
+ self.args = version,
+ self.version = version
+
+class UnknownTransferEncoding(HTTPException):
+ pass
+
+class UnimplementedFileMode(HTTPException):
+ pass
+
+class IncompleteRead(HTTPException):
+ def __init__(self, partial, expected=None):
+ self.args = partial,
+ self.partial = partial
+ self.expected = expected
+ def __repr__(self):
+ if self.expected is not None:
+ e = ', %i more expected' % self.expected
+ else:
+ e = ''
+ return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
+ def __str__(self):
+ return repr(self)
+
+class ImproperConnectionState(HTTPException):
+ pass
+
+class CannotSendRequest(ImproperConnectionState):
+ pass
+
+class CannotSendHeader(ImproperConnectionState):
+ pass
+
+class ResponseNotReady(ImproperConnectionState):
+ pass
+
+class BadStatusLine(HTTPException):
+ def __init__(self, line):
+ if not line:
+ line = repr(line)
+ self.args = line,
+ self.line = line
+
+class LineTooLong(HTTPException):
+ def __init__(self, line_type):
+ HTTPException.__init__(self, "got more than %d bytes when reading %s"
+ % (_MAXLINE, line_type))
+
+# for backwards compatibility
+error = HTTPException
diff --git a/contrib/python/future/future/backports/http/cookiejar.py b/contrib/python/future/future/backports/http/cookiejar.py
index ef35e673ed..af3ef4151a 100644
--- a/contrib/python/future/future/backports/http/cookiejar.py
+++ b/contrib/python/future/future/backports/http/cookiejar.py
@@ -1,2110 +1,2110 @@
-r"""HTTP cookie handling for web clients.
-
-This is a backport of the Py3.3 ``http.cookiejar`` module for
-python-future.
-
-This module has (now fairly distant) origins in Gisle Aas' Perl module
-HTTP::Cookies, from the libwww-perl library.
-
-Docstrings, comments and debug strings in this code refer to the
-attributes of the HTTP cookie system as cookie-attributes, to distinguish
-them clearly from Python attributes.
-
-Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
-distributed with the Python standard library, but are available from
-http://wwwsearch.sf.net/):
-
- CookieJar____
- / \ \
- FileCookieJar \ \
- / | \ \ \
- MozillaCookieJar | LWPCookieJar \ \
- | | \
- | ---MSIEBase | \
- | / | | \
- | / MSIEDBCookieJar BSDDBCookieJar
- |/
- MSIECookieJar
-
-"""
-
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import filter, int, map, open, str
+r"""HTTP cookie handling for web clients.
+
+This is a backport of the Py3.3 ``http.cookiejar`` module for
+python-future.
+
+This module has (now fairly distant) origins in Gisle Aas' Perl module
+HTTP::Cookies, from the libwww-perl library.
+
+Docstrings, comments and debug strings in this code refer to the
+attributes of the HTTP cookie system as cookie-attributes, to distinguish
+them clearly from Python attributes.
+
+Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
+distributed with the Python standard library, but are available from
+http://wwwsearch.sf.net/):
+
+ CookieJar____
+ / \ \
+ FileCookieJar \ \
+ / | \ \ \
+ MozillaCookieJar | LWPCookieJar \ \
+ | | \
+ | ---MSIEBase | \
+ | / | | \
+ | / MSIEDBCookieJar BSDDBCookieJar
+ |/
+ MSIECookieJar
+
+"""
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import filter, int, map, open, str
from future.utils import as_native_str, PY2
-
-__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
- 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
-
-import copy
-import datetime
-import re
+
+__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
+ 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
+
+import copy
+import datetime
+import re
if PY2:
re.ASCII = 0
-import time
-from future.backports.urllib.parse import urlparse, urlsplit, quote
-from future.backports.http.client import HTTP_PORT
-try:
- import threading as _threading
-except ImportError:
- import dummy_threading as _threading
-from calendar import timegm
-
-debug = False # set to True to enable debugging via the logging module
-logger = None
-
-def _debug(*args):
- if not debug:
- return
- global logger
- if not logger:
- import logging
- logger = logging.getLogger("http.cookiejar")
- return logger.debug(*args)
-
-
-DEFAULT_HTTP_PORT = str(HTTP_PORT)
-MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
- "instance initialised with one)")
-
-def _warn_unhandled_exception():
- # There are a few catch-all except: statements in this module, for
- # catching input that's bad in unexpected ways. Warn if any
- # exceptions are caught there.
- import io, warnings, traceback
- f = io.StringIO()
- traceback.print_exc(None, f)
- msg = f.getvalue()
- warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
-
-
-# Date/time conversion
-# -----------------------------------------------------------------------------
-
-EPOCH_YEAR = 1970
-def _timegm(tt):
- year, month, mday, hour, min, sec = tt[:6]
- if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
- (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
- return timegm(tt)
- else:
- return None
-
-DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
-MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
-MONTHS_LOWER = []
-for month in MONTHS: MONTHS_LOWER.append(month.lower())
-
-def time2isoz(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
- representing Universal Time (UTC, aka GMT). An example of this format is:
-
- 1994-11-24 08:49:37Z
-
- """
- if t is None:
- dt = datetime.datetime.utcnow()
- else:
- dt = datetime.datetime.utcfromtimestamp(t)
- return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
- dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
-
-def time2netscape(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like this:
-
- Wed, DD-Mon-YYYY HH:MM:SS GMT
-
- """
- if t is None:
- dt = datetime.datetime.utcnow()
- else:
- dt = datetime.datetime.utcfromtimestamp(t)
- return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
- DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
- dt.year, dt.hour, dt.minute, dt.second)
-
-
-UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
-
-TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
-def offset_from_tz_string(tz):
- offset = None
- if tz in UTC_ZONES:
- offset = 0
- else:
- m = TIMEZONE_RE.search(tz)
- if m:
- offset = 3600 * int(m.group(2))
- if m.group(3):
- offset = offset + 60 * int(m.group(3))
- if m.group(1) == '-':
- offset = -offset
- return offset
-
-def _str2time(day, mon, yr, hr, min, sec, tz):
- # translate month name to number
- # month numbers start with 1 (January)
- try:
- mon = MONTHS_LOWER.index(mon.lower())+1
- except ValueError:
- # maybe it's already a number
- try:
- imon = int(mon)
- except ValueError:
- return None
- if 1 <= imon <= 12:
- mon = imon
- else:
- return None
-
- # make sure clock elements are defined
- if hr is None: hr = 0
- if min is None: min = 0
- if sec is None: sec = 0
-
- yr = int(yr)
- day = int(day)
- hr = int(hr)
- min = int(min)
- sec = int(sec)
-
- if yr < 1000:
- # find "obvious" year
- cur_yr = time.localtime(time.time())[0]
- m = cur_yr % 100
- tmp = yr
- yr = yr + cur_yr - m
- m = m - tmp
- if abs(m) > 50:
- if m > 0: yr = yr + 100
- else: yr = yr - 100
-
- # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
- t = _timegm((yr, mon, day, hr, min, sec, tz))
-
- if t is not None:
- # adjust time using timezone string, to get absolute time since epoch
- if tz is None:
- tz = "UTC"
- tz = tz.upper()
- offset = offset_from_tz_string(tz)
- if offset is None:
- return None
- t = t - offset
-
- return t
-
-STRICT_DATE_RE = re.compile(
- r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
- "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
-WEEKDAY_RE = re.compile(
- r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
-LOOSE_HTTP_DATE_RE = re.compile(
- r"""^
- (\d\d?) # day
- (?:\s+|[-\/])
- (\w+) # month
- (?:\s+|[-\/])
- (\d+) # year
- (?:
- (?:\s+|:) # separator before clock
- (\d\d?):(\d\d) # hour:min
- (?::(\d\d))? # optional seconds
- )? # optional clock
- \s*
- ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
- \s*
- (?:\(\w+\))? # ASCII representation of timezone in parens.
- \s*$""", re.X | re.ASCII)
-def http2time(text):
- """Returns time in seconds since epoch of time represented by a string.
-
- Return value is an integer.
-
- None is returned if the format of str is unrecognized, the time is outside
- the representable range, or the timezone string is not recognized. If the
- string contains no timezone, UTC is assumed.
-
- The timezone in the string may be numerical (like "-0800" or "+0100") or a
- string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
- timezone strings equivalent to UTC (zero offset) are known to the function.
-
- The function loosely parses the following formats:
-
- Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
- Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
- Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
- 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
- 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
- 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
-
- The parser ignores leading and trailing whitespace. The time may be
- absent.
-
- If the year is given with only 2 digits, the function will select the
- century that makes the year closest to the current date.
-
- """
- # fast exit for strictly conforming string
- m = STRICT_DATE_RE.search(text)
- if m:
- g = m.groups()
- mon = MONTHS_LOWER.index(g[1].lower()) + 1
- tt = (int(g[2]), mon, int(g[0]),
- int(g[3]), int(g[4]), float(g[5]))
- return _timegm(tt)
-
- # No, we need some messy parsing...
-
- # clean up
- text = text.lstrip()
- text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = LOOSE_HTTP_DATE_RE.search(text)
- if m is not None:
- day, mon, yr, hr, min, sec, tz = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
-
-ISO_DATE_RE = re.compile(
- """^
- (\d{4}) # year
- [-\/]?
- (\d\d?) # numerical month
- [-\/]?
- (\d\d?) # day
- (?:
- (?:\s+|[-:Tt]) # separator before clock
- (\d\d?):?(\d\d) # hour:min
- (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
- )? # optional clock
- \s*
- ([-+]?\d\d?:?(:?\d\d)?
- |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
- \s*$""", re.X | re. ASCII)
-def iso2time(text):
- """
- As for http2time, but parses the ISO 8601 formats:
-
- 1994-02-03 14:15:29 -0100 -- ISO 8601 format
- 1994-02-03 14:15:29 -- zone is optional
- 1994-02-03 -- only date
- 1994-02-03T14:15:29 -- Use T as separator
- 19940203T141529Z -- ISO 8601 compact format
- 19940203 -- only date
-
- """
- # clean up
- text = text.lstrip()
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = ISO_DATE_RE.search(text)
- if m is not None:
- # XXX there's an extra bit of the timezone I'm ignoring here: is
- # this the right thing to do?
- yr, mon, day, hr, min, sec, tz, _ = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
-
-
-# Header parsing
-# -----------------------------------------------------------------------------
-
-def unmatched(match):
- """Return unmatched part of re.Match object."""
- start, end = match.span(0)
- return match.string[:start]+match.string[end:]
-
-HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
-HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
-HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
-HEADER_ESCAPE_RE = re.compile(r"\\(.)")
-def split_header_words(header_values):
- r"""Parse header values into a list of lists containing key,value pairs.
-
- The function knows how to deal with ",", ";" and "=" as well as quoted
- values after "=". A list of space separated tokens are parsed as if they
- were separated by ";".
-
- If the header_values passed as argument contains multiple values, then they
- are treated as if they were a single value separated by comma ",".
-
- This means that this function is useful for parsing header fields that
- follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
- the requirement for tokens).
-
- headers = #header
- header = (token | parameter) *( [";"] (token | parameter))
-
- token = 1*<any CHAR except CTLs or separators>
- separators = "(" | ")" | "<" | ">" | "@"
- | "," | ";" | ":" | "\" | <">
- | "/" | "[" | "]" | "?" | "="
- | "{" | "}" | SP | HT
-
- quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
- qdtext = <any TEXT except <">>
- quoted-pair = "\" CHAR
-
- parameter = attribute "=" value
- attribute = token
- value = token | quoted-string
-
- Each header is represented by a list of key/value pairs. The value for a
- simple token (not part of a parameter) is None. Syntactically incorrect
- headers will not necessarily be parsed as you would want.
-
- This is easier to describe with some examples:
-
- >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
- [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
- >>> split_header_words(['text/html; charset="iso-8859-1"'])
- [[('text/html', None), ('charset', 'iso-8859-1')]]
- >>> split_header_words([r'Basic realm="\"foo\bar\""'])
- [[('Basic', None), ('realm', '"foobar"')]]
-
- """
- assert not isinstance(header_values, str)
- result = []
- for text in header_values:
- orig_text = text
- pairs = []
- while text:
- m = HEADER_TOKEN_RE.search(text)
- if m:
- text = unmatched(m)
- name = m.group(1)
- m = HEADER_QUOTED_VALUE_RE.search(text)
- if m: # quoted value
- text = unmatched(m)
- value = m.group(1)
- value = HEADER_ESCAPE_RE.sub(r"\1", value)
- else:
- m = HEADER_VALUE_RE.search(text)
- if m: # unquoted value
- text = unmatched(m)
- value = m.group(1)
- value = value.rstrip()
- else:
- # no value, a lone token
- value = None
- pairs.append((name, value))
- elif text.lstrip().startswith(","):
- # concatenated headers, as per RFC 2616 section 4.2
- text = text.lstrip()[1:]
- if pairs: result.append(pairs)
- pairs = []
- else:
- # skip junk
- non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
- assert nr_junk_chars > 0, (
- "split_header_words bug: '%s', '%s', %s" %
- (orig_text, text, pairs))
- text = non_junk
- if pairs: result.append(pairs)
- return result
-
-HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
-def join_header_words(lists):
- """Do the inverse (almost) of the conversion done by split_header_words.
-
- Takes a list of lists of (key, value) pairs and produces a single header
- value. Attribute values are quoted if needed.
-
- >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
- 'text/plain; charset="iso-8859/1"'
- >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
- 'text/plain, charset="iso-8859/1"'
-
- """
- headers = []
- for pairs in lists:
- attr = []
- for k, v in pairs:
- if v is not None:
- if not re.search(r"^\w+$", v):
- v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
- v = '"%s"' % v
- k = "%s=%s" % (k, v)
- attr.append(k)
- if attr: headers.append("; ".join(attr))
- return ", ".join(headers)
-
-def strip_quotes(text):
- if text.startswith('"'):
- text = text[1:]
- if text.endswith('"'):
- text = text[:-1]
- return text
-
-def parse_ns_headers(ns_headers):
- """Ad-hoc parser for Netscape protocol cookie-attributes.
-
- The old Netscape cookie format for Set-Cookie can for instance contain
- an unquoted "," in the expires field, so we have to use this ad-hoc
- parser instead of split_header_words.
-
- XXX This may not make the best possible effort to parse all the crap
- that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
- parser is probably better, so could do worse than following that if
- this ever gives any trouble.
-
- Currently, this is also used for parsing RFC 2109 cookies.
-
- """
- known_attrs = ("expires", "domain", "path", "secure",
- # RFC 2109 attrs (may turn up in Netscape cookies, too)
- "version", "port", "max-age")
-
- result = []
- for ns_header in ns_headers:
- pairs = []
- version_set = False
- for ii, param in enumerate(re.split(r";\s*", ns_header)):
- param = param.rstrip()
- if param == "": continue
- if "=" not in param:
- k, v = param, None
- else:
- k, v = re.split(r"\s*=\s*", param, 1)
- k = k.lstrip()
- if ii != 0:
- lc = k.lower()
- if lc in known_attrs:
- k = lc
- if k == "version":
- # This is an RFC 2109 cookie.
- v = strip_quotes(v)
- version_set = True
- if k == "expires":
- # convert expires date to seconds since epoch
- v = http2time(strip_quotes(v)) # None if invalid
- pairs.append((k, v))
-
- if pairs:
- if not version_set:
- pairs.append(("version", "0"))
- result.append(pairs)
-
- return result
-
-
-IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
-def is_HDN(text):
- """Return True if text is a host domain name."""
- # XXX
- # This may well be wrong. Which RFC is HDN defined in, if any (for
- # the purposes of RFC 2965)?
- # For the current implementation, what about IPv6? Remember to look
- # at other uses of IPV4_RE also, if change this.
- if IPV4_RE.search(text):
- return False
- if text == "":
- return False
- if text[0] == "." or text[-1] == ".":
- return False
- return True
-
-def domain_match(A, B):
- """Return True if domain A domain-matches domain B, according to RFC 2965.
-
- A and B may be host domain names or IP addresses.
-
- RFC 2965, section 1:
-
- Host names can be specified either as an IP address or a HDN string.
- Sometimes we compare one host name with another. (Such comparisons SHALL
- be case-insensitive.) Host A's name domain-matches host B's if
-
- * their host name strings string-compare equal; or
-
- * A is a HDN string and has the form NB, where N is a non-empty
- name string, B has the form .B', and B' is a HDN string. (So,
- x.y.com domain-matches .Y.com but not Y.com.)
-
- Note that domain-match is not a commutative operation: a.b.c.com
- domain-matches .c.com, but not the reverse.
-
- """
- # Note that, if A or B are IP addresses, the only relevant part of the
- # definition of the domain-match algorithm is the direct string-compare.
- A = A.lower()
- B = B.lower()
- if A == B:
- return True
- if not is_HDN(A):
- return False
- i = A.rfind(B)
- if i == -1 or i == 0:
- # A does not have form NB, or N is the empty string
- return False
- if not B.startswith("."):
- return False
- if not is_HDN(B[1:]):
- return False
- return True
-
-def liberal_is_HDN(text):
- """Return True if text is a sort-of-like a host domain name.
-
- For accepting/blocking domains.
-
- """
- if IPV4_RE.search(text):
- return False
- return True
-
-def user_domain_match(A, B):
- """For blocking/accepting domains.
-
- A and B may be host domain names or IP addresses.
-
- """
- A = A.lower()
- B = B.lower()
- if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
- if A == B:
- # equal IP addresses
- return True
- return False
- initial_dot = B.startswith(".")
- if initial_dot and A.endswith(B):
- return True
- if not initial_dot and A == B:
- return True
- return False
-
-cut_port_re = re.compile(r":\d+$", re.ASCII)
-def request_host(request):
- """Return request-host, as defined by RFC 2965.
-
- Variation from RFC: returned value is lowercased, for convenient
- comparison.
-
- """
- url = request.get_full_url()
- host = urlparse(url)[1]
- if host == "":
- host = request.get_header("Host", "")
-
- # remove port, if present
- host = cut_port_re.sub("", host, 1)
- return host.lower()
-
-def eff_request_host(request):
- """Return a tuple (request-host, effective request-host name).
-
- As defined by RFC 2965, except both are lowercased.
-
- """
- erhn = req_host = request_host(request)
- if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
- erhn = req_host + ".local"
- return req_host, erhn
-
-def request_path(request):
- """Path component of request-URI, as defined by RFC 2965."""
- url = request.get_full_url()
- parts = urlsplit(url)
- path = escape_path(parts.path)
- if not path.startswith("/"):
- # fix bad RFC 2396 absoluteURI
- path = "/" + path
- return path
-
-def request_port(request):
- host = request.host
- i = host.find(':')
- if i >= 0:
- port = host[i+1:]
- try:
- int(port)
- except ValueError:
- _debug("nonnumeric port: '%s'", port)
- return None
- else:
- port = DEFAULT_HTTP_PORT
- return port
-
-# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
-# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
-HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
-ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
-def uppercase_escaped_char(match):
- return "%%%s" % match.group(1).upper()
-def escape_path(path):
- """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
- # There's no knowing what character encoding was used to create URLs
- # containing %-escapes, but since we have to pick one to escape invalid
- # path characters, we pick UTF-8, as recommended in the HTML 4.0
- # specification:
- # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
- # And here, kind of: draft-fielding-uri-rfc2396bis-03
- # (And in draft IRI specification: draft-duerst-iri-05)
- # (And here, for new URI schemes: RFC 2718)
- path = quote(path, HTTP_PATH_SAFE)
- path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
- return path
-
-def reach(h):
- """Return reach of host h, as defined by RFC 2965, section 1.
-
- The reach R of a host name H is defined as follows:
-
- * If
-
- - H is the host domain name of a host; and,
-
- - H has the form A.B; and
-
- - A has no embedded (that is, interior) dots; and
-
- - B has at least one embedded dot, or B is the string "local".
- then the reach of H is .B.
-
- * Otherwise, the reach of H is H.
-
- >>> reach("www.acme.com")
- '.acme.com'
- >>> reach("acme.com")
- 'acme.com'
- >>> reach("acme.local")
- '.local'
-
- """
- i = h.find(".")
- if i >= 0:
- #a = h[:i] # this line is only here to show what a is
- b = h[i+1:]
- i = b.find(".")
- if is_HDN(h) and (i >= 0 or b == "local"):
- return "."+b
- return h
-
-def is_third_party(request):
- """
-
- RFC 2965, section 3.3.6:
-
- An unverifiable transaction is to a third-party host if its request-
- host U does not domain-match the reach R of the request-host O in the
- origin transaction.
-
- """
- req_host = request_host(request)
- if not domain_match(req_host, reach(request.get_origin_req_host())):
- return True
- else:
- return False
-
-
-class Cookie(object):
- """HTTP Cookie.
-
- This class represents both Netscape and RFC 2965 cookies.
-
- This is deliberately a very simple class. It just holds attributes. It's
- possible to construct Cookie instances that don't comply with the cookie
- standards. CookieJar.make_cookies is the factory function for Cookie
- objects -- it deals with cookie parsing, supplying defaults, and
- normalising to the representation used in this class. CookiePolicy is
- responsible for checking them to see whether they should be accepted from
- and returned to the server.
-
- Note that the port may be present in the headers, but unspecified ("Port"
- rather than"Port=80", for example); if this is the case, port is None.
-
- """
-
- def __init__(self, version, name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest,
- rfc2109=False,
- ):
-
- if version is not None: version = int(version)
- if expires is not None: expires = int(expires)
- if port is None and port_specified is True:
- raise ValueError("if port is None, port_specified must be false")
-
- self.version = version
- self.name = name
- self.value = value
- self.port = port
- self.port_specified = port_specified
- # normalise case, as per RFC 2965 section 3.3.3
- self.domain = domain.lower()
- self.domain_specified = domain_specified
- # Sigh. We need to know whether the domain given in the
- # cookie-attribute had an initial dot, in order to follow RFC 2965
- # (as clarified in draft errata). Needed for the returned $Domain
- # value.
- self.domain_initial_dot = domain_initial_dot
- self.path = path
- self.path_specified = path_specified
- self.secure = secure
- self.expires = expires
- self.discard = discard
- self.comment = comment
- self.comment_url = comment_url
- self.rfc2109 = rfc2109
-
- self._rest = copy.copy(rest)
-
- def has_nonstandard_attr(self, name):
- return name in self._rest
- def get_nonstandard_attr(self, name, default=None):
- return self._rest.get(name, default)
- def set_nonstandard_attr(self, name, value):
- self._rest[name] = value
-
- def is_expired(self, now=None):
- if now is None: now = time.time()
- if (self.expires is not None) and (self.expires <= now):
- return True
- return False
-
- def __str__(self):
- if self.port is None: p = ""
- else: p = ":"+self.port
- limit = self.domain + p + self.path
- if self.value is not None:
- namevalue = "%s=%s" % (self.name, self.value)
- else:
- namevalue = self.name
- return "<Cookie %s for %s>" % (namevalue, limit)
-
- @as_native_str()
- def __repr__(self):
- args = []
- for name in ("version", "name", "value",
- "port", "port_specified",
- "domain", "domain_specified", "domain_initial_dot",
- "path", "path_specified",
- "secure", "expires", "discard", "comment", "comment_url",
- ):
- attr = getattr(self, name)
- ### Python-Future:
- # Avoid u'...' prefixes for unicode strings:
- if isinstance(attr, str):
- attr = str(attr)
- ###
- args.append(str("%s=%s") % (name, repr(attr)))
- args.append("rest=%s" % repr(self._rest))
- args.append("rfc2109=%s" % repr(self.rfc2109))
- return "Cookie(%s)" % ", ".join(args)
-
-
-class CookiePolicy(object):
- """Defines which cookies get accepted from and returned to server.
-
- May also modify cookies, though this is probably a bad idea.
-
- The subclass DefaultCookiePolicy defines the standard rules for Netscape
- and RFC 2965 cookies -- override that if you want a customised policy.
-
- """
- def set_ok(self, cookie, request):
- """Return true if (and only if) cookie should be accepted from server.
-
- Currently, pre-expired cookies never get this far -- the CookieJar
- class deletes such cookies itself.
-
- """
- raise NotImplementedError()
-
- def return_ok(self, cookie, request):
- """Return true if (and only if) cookie should be returned to server."""
- raise NotImplementedError()
-
- def domain_return_ok(self, domain, request):
- """Return false if cookies should not be returned, given cookie domain.
- """
- return True
-
- def path_return_ok(self, path, request):
- """Return false if cookies should not be returned, given cookie path.
- """
- return True
-
-
-class DefaultCookiePolicy(CookiePolicy):
- """Implements the standard rules for accepting and returning cookies."""
-
- DomainStrictNoDots = 1
- DomainStrictNonDomain = 2
- DomainRFC2965Match = 4
-
- DomainLiberal = 0
- DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
-
- def __init__(self,
- blocked_domains=None, allowed_domains=None,
- netscape=True, rfc2965=False,
- rfc2109_as_netscape=None,
- hide_cookie2=False,
- strict_domain=False,
- strict_rfc2965_unverifiable=True,
- strict_ns_unverifiable=False,
- strict_ns_domain=DomainLiberal,
- strict_ns_set_initial_dollar=False,
- strict_ns_set_path=False,
- ):
- """Constructor arguments should be passed as keyword arguments only."""
- self.netscape = netscape
- self.rfc2965 = rfc2965
- self.rfc2109_as_netscape = rfc2109_as_netscape
- self.hide_cookie2 = hide_cookie2
- self.strict_domain = strict_domain
- self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
- self.strict_ns_unverifiable = strict_ns_unverifiable
- self.strict_ns_domain = strict_ns_domain
- self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
- self.strict_ns_set_path = strict_ns_set_path
-
- if blocked_domains is not None:
- self._blocked_domains = tuple(blocked_domains)
- else:
- self._blocked_domains = ()
-
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def blocked_domains(self):
- """Return the sequence of blocked domains (as a tuple)."""
- return self._blocked_domains
- def set_blocked_domains(self, blocked_domains):
- """Set the sequence of blocked domains."""
- self._blocked_domains = tuple(blocked_domains)
-
- def is_blocked(self, domain):
- for blocked_domain in self._blocked_domains:
- if user_domain_match(domain, blocked_domain):
- return True
- return False
-
- def allowed_domains(self):
- """Return None, or the sequence of allowed domains (as a tuple)."""
- return self._allowed_domains
- def set_allowed_domains(self, allowed_domains):
- """Set the sequence of allowed domains, or None."""
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def is_not_allowed(self, domain):
- if self._allowed_domains is None:
- return False
- for allowed_domain in self._allowed_domains:
- if user_domain_match(domain, allowed_domain):
- return False
- return True
-
- def set_ok(self, cookie, request):
- """
- If you override .set_ok(), be sure to call this method. If it returns
- false, so should your subclass (assuming your subclass wants to be more
- strict about which cookies to accept).
-
- """
- _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
-
- assert cookie.name is not None
-
- for n in "version", "verifiability", "name", "path", "domain", "port":
- fn_name = "set_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request):
- return False
-
- return True
-
- def set_ok_version(self, cookie, request):
- if cookie.version is None:
- # Version is always set to 0 by parse_ns_headers if it's a Netscape
- # cookie, so this must be an invalid RFC 2965 cookie.
- _debug(" Set-Cookie2 without version attribute (%s=%s)",
- cookie.name, cookie.value)
- return False
- if cookie.version > 0 and not self.rfc2965:
- _debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- _debug(" Netscape cookies are switched off")
- return False
- return True
-
- def set_ok_verifiability(self, cookie, request):
- if request.unverifiable and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- _debug(" third-party RFC 2965 cookie during "
- "unverifiable transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- _debug(" third-party Netscape cookie during "
- "unverifiable transaction")
- return False
- return True
-
- def set_ok_name(self, cookie, request):
- # Try and stop servers setting V0 cookies designed to hack other
- # servers that know both V0 and V1 protocols.
- if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
- cookie.name.startswith("$")):
- _debug(" illegal name (starts with '$'): '%s'", cookie.name)
- return False
- return True
-
- def set_ok_path(self, cookie, request):
- if cookie.path_specified:
- req_path = request_path(request)
- if ((cookie.version > 0 or
- (cookie.version == 0 and self.strict_ns_set_path)) and
- not req_path.startswith(cookie.path)):
- _debug(" path attribute %s is not a prefix of request "
- "path %s", cookie.path, req_path)
- return False
- return True
-
- def set_ok_domain(self, cookie, request):
- if self.is_blocked(cookie.domain):
- _debug(" domain %s is in user block-list", cookie.domain)
- return False
- if self.is_not_allowed(cookie.domain):
- _debug(" domain %s is not in user allow-list", cookie.domain)
- return False
- if cookie.domain_specified:
- req_host, erhn = eff_request_host(request)
- domain = cookie.domain
- if self.strict_domain and (domain.count(".") >= 2):
- # XXX This should probably be compared with the Konqueror
- # (kcookiejar.cpp) and Mozilla implementations, but it's a
- # losing battle.
- i = domain.rfind(".")
- j = domain.rfind(".", 0, i)
- if j == 0: # domain like .foo.bar
- tld = domain[i+1:]
- sld = domain[j+1:i]
- if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
- "gov", "mil", "int", "aero", "biz", "cat", "coop",
- "info", "jobs", "mobi", "museum", "name", "pro",
- "travel", "eu") and len(tld) == 2:
- # domain like .co.uk
- _debug(" country-code second level domain %s", domain)
- return False
- if domain.startswith("."):
- undotted_domain = domain[1:]
- else:
- undotted_domain = domain
- embedded_dots = (undotted_domain.find(".") >= 0)
- if not embedded_dots and domain != ".local":
- _debug(" non-local domain %s contains no embedded dot",
- domain)
- return False
- if cookie.version == 0:
- if (not erhn.endswith(domain) and
- (not erhn.startswith(".") and
- not ("."+erhn).endswith(domain))):
- _debug(" effective request-host %s (even with added "
- "initial dot) does not end with %s",
- erhn, domain)
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainRFC2965Match)):
- if not domain_match(erhn, domain):
- _debug(" effective request-host %s does not domain-match "
- "%s", erhn, domain)
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainStrictNoDots)):
- host_prefix = req_host[:-len(domain)]
- if (host_prefix.find(".") >= 0 and
- not IPV4_RE.search(req_host)):
- _debug(" host prefix %s for domain %s contains a dot",
- host_prefix, domain)
- return False
- return True
-
- def set_ok_port(self, cookie, request):
- if cookie.port_specified:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- else:
- req_port = str(req_port)
- for p in cookie.port.split(","):
- try:
- int(p)
- except ValueError:
- _debug(" bad port %s (not numeric)", p)
- return False
- if p == req_port:
- break
- else:
- _debug(" request port (%s) not found in %s",
- req_port, cookie.port)
- return False
- return True
-
- def return_ok(self, cookie, request):
- """
- If you override .return_ok(), be sure to call this method. If it
- returns false, so should your subclass (assuming your subclass wants to
- be more strict about which cookies to return).
-
- """
- # Path has already been checked by .path_return_ok(), and domain
- # blocking done by .domain_return_ok().
- _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
-
- for n in "version", "verifiability", "secure", "expires", "port", "domain":
- fn_name = "return_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request):
- return False
- return True
-
- def return_ok_version(self, cookie, request):
- if cookie.version > 0 and not self.rfc2965:
- _debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- _debug(" Netscape cookies are switched off")
- return False
- return True
-
- def return_ok_verifiability(self, cookie, request):
- if request.unverifiable and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- _debug(" third-party RFC 2965 cookie during unverifiable "
- "transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- _debug(" third-party Netscape cookie during unverifiable "
- "transaction")
- return False
- return True
-
- def return_ok_secure(self, cookie, request):
- if cookie.secure and request.type != "https":
- _debug(" secure cookie with non-secure request")
- return False
- return True
-
- def return_ok_expires(self, cookie, request):
- if cookie.is_expired(self._now):
- _debug(" cookie expired")
- return False
- return True
-
- def return_ok_port(self, cookie, request):
- if cookie.port:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- for p in cookie.port.split(","):
- if p == req_port:
- break
- else:
- _debug(" request port %s does not match cookie port %s",
- req_port, cookie.port)
- return False
- return True
-
- def return_ok_domain(self, cookie, request):
- req_host, erhn = eff_request_host(request)
- domain = cookie.domain
-
- # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
- if (cookie.version == 0 and
- (self.strict_ns_domain & self.DomainStrictNonDomain) and
- not cookie.domain_specified and domain != erhn):
- _debug(" cookie with unspecified domain does not string-compare "
- "equal to request domain")
- return False
-
- if cookie.version > 0 and not domain_match(erhn, domain):
- _debug(" effective request-host name %s does not domain-match "
- "RFC 2965 cookie domain %s", erhn, domain)
- return False
- if cookie.version == 0 and not ("."+erhn).endswith(domain):
- _debug(" request-host %s does not match Netscape cookie domain "
- "%s", req_host, domain)
- return False
- return True
-
- def domain_return_ok(self, domain, request):
- # Liberal check of. This is here as an optimization to avoid
- # having to load lots of MSIE cookie files unless necessary.
- req_host, erhn = eff_request_host(request)
- if not req_host.startswith("."):
- req_host = "."+req_host
- if not erhn.startswith("."):
- erhn = "."+erhn
- if not (req_host.endswith(domain) or erhn.endswith(domain)):
- #_debug(" request domain %s does not match cookie domain %s",
- # req_host, domain)
- return False
-
- if self.is_blocked(domain):
- _debug(" domain %s is in user block-list", domain)
- return False
- if self.is_not_allowed(domain):
- _debug(" domain %s is not in user allow-list", domain)
- return False
-
- return True
-
- def path_return_ok(self, path, request):
- _debug("- checking cookie path=%s", path)
- req_path = request_path(request)
- if not req_path.startswith(path):
- _debug(" %s does not path-match %s", req_path, path)
- return False
- return True
-
-
-def vals_sorted_by_key(adict):
- keys = sorted(adict.keys())
- return map(adict.get, keys)
-
-def deepvalues(mapping):
- """Iterates over nested mapping, depth-first, in sorted order by key."""
- values = vals_sorted_by_key(mapping)
- for obj in values:
- mapping = False
- try:
- obj.items
- except AttributeError:
- pass
- else:
- mapping = True
- for subobj in deepvalues(obj):
- yield subobj
- if not mapping:
- yield obj
-
-
-# Used as second parameter to dict.get() method, to distinguish absent
-# dict key from one with a None value.
-class Absent(object): pass
-
-class CookieJar(object):
- """Collection of HTTP cookies.
-
- You may not need to know about this class: try
- urllib.request.build_opener(HTTPCookieProcessor).open(url).
- """
-
- non_word_re = re.compile(r"\W")
- quote_re = re.compile(r"([\"\\])")
- strict_domain_re = re.compile(r"\.?[^.]*")
- domain_re = re.compile(r"[^.]*")
- dots_re = re.compile(r"^\.+")
-
- magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
-
- def __init__(self, policy=None):
- if policy is None:
- policy = DefaultCookiePolicy()
- self._policy = policy
-
- self._cookies_lock = _threading.RLock()
- self._cookies = {}
-
- def set_policy(self, policy):
- self._policy = policy
-
- def _cookies_for_domain(self, domain, request):
- cookies = []
- if not self._policy.domain_return_ok(domain, request):
- return []
- _debug("Checking %s for cookies to return", domain)
- cookies_by_path = self._cookies[domain]
- for path in cookies_by_path.keys():
- if not self._policy.path_return_ok(path, request):
- continue
- cookies_by_name = cookies_by_path[path]
- for cookie in cookies_by_name.values():
- if not self._policy.return_ok(cookie, request):
- _debug(" not returning cookie")
- continue
- _debug(" it's a match")
- cookies.append(cookie)
- return cookies
-
- def _cookies_for_request(self, request):
- """Return a list of cookies to be returned to server."""
- cookies = []
- for domain in self._cookies.keys():
- cookies.extend(self._cookies_for_domain(domain, request))
- return cookies
-
- def _cookie_attrs(self, cookies):
- """Return a list of cookie-attributes to be returned to server.
-
- like ['foo="bar"; $Path="/"', ...]
-
- The $Version attribute is also added when appropriate (currently only
- once per request).
-
- """
- # add cookies in order of most specific (ie. longest) path first
- cookies.sort(key=lambda a: len(a.path), reverse=True)
-
- version_set = False
-
- attrs = []
- for cookie in cookies:
- # set version of Cookie header
- # XXX
- # What should it be if multiple matching Set-Cookie headers have
- # different versions themselves?
- # Answer: there is no answer; was supposed to be settled by
- # RFC 2965 errata, but that may never appear...
- version = cookie.version
- if not version_set:
- version_set = True
- if version > 0:
- attrs.append("$Version=%s" % version)
-
- # quote cookie value if necessary
- # (not for Netscape protocol, which already has any quotes
- # intact, due to the poorly-specified Netscape Cookie: syntax)
- if ((cookie.value is not None) and
- self.non_word_re.search(cookie.value) and version > 0):
- value = self.quote_re.sub(r"\\\1", cookie.value)
- else:
- value = cookie.value
-
- # add cookie-attributes to be returned in Cookie header
- if cookie.value is None:
- attrs.append(cookie.name)
- else:
- attrs.append("%s=%s" % (cookie.name, value))
- if version > 0:
- if cookie.path_specified:
- attrs.append('$Path="%s"' % cookie.path)
- if cookie.domain.startswith("."):
- domain = cookie.domain
- if (not cookie.domain_initial_dot and
- domain.startswith(".")):
- domain = domain[1:]
- attrs.append('$Domain="%s"' % domain)
- if cookie.port is not None:
- p = "$Port"
- if cookie.port_specified:
- p = p + ('="%s"' % cookie.port)
- attrs.append(p)
-
- return attrs
-
- def add_cookie_header(self, request):
- """Add correct Cookie: header to request (urllib.request.Request object).
-
- The Cookie2 header is also added unless policy.hide_cookie2 is true.
-
- """
- _debug("add_cookie_header")
- self._cookies_lock.acquire()
- try:
-
- self._policy._now = self._now = int(time.time())
-
- cookies = self._cookies_for_request(request)
-
- attrs = self._cookie_attrs(cookies)
- if attrs:
- if not request.has_header("Cookie"):
- request.add_unredirected_header(
- "Cookie", "; ".join(attrs))
-
- # if necessary, advertise that we know RFC 2965
- if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
- not request.has_header("Cookie2")):
- for cookie in cookies:
- if cookie.version != 1:
- request.add_unredirected_header("Cookie2", '$Version="1"')
- break
-
- finally:
- self._cookies_lock.release()
-
- self.clear_expired_cookies()
-
- def _normalized_cookie_tuples(self, attrs_set):
- """Return list of tuples containing normalised cookie information.
-
- attrs_set is the list of lists of key,value pairs extracted from
- the Set-Cookie or Set-Cookie2 headers.
-
- Tuples are name, value, standard, rest, where name and value are the
- cookie name and value, standard is a dictionary containing the standard
- cookie-attributes (discard, secure, version, expires or max-age,
- domain, path and port) and rest is a dictionary containing the rest of
- the cookie-attributes.
-
- """
- cookie_tuples = []
-
- boolean_attrs = "discard", "secure"
- value_attrs = ("version",
- "expires", "max-age",
- "domain", "path", "port",
- "comment", "commenturl")
-
- for cookie_attrs in attrs_set:
- name, value = cookie_attrs[0]
-
- # Build dictionary of standard cookie-attributes (standard) and
- # dictionary of other cookie-attributes (rest).
-
- # Note: expiry time is normalised to seconds since epoch. V0
- # cookies should have the Expires cookie-attribute, and V1 cookies
- # should have Max-Age, but since V1 includes RFC 2109 cookies (and
- # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
- # accept either (but prefer Max-Age).
- max_age_set = False
-
- bad_cookie = False
-
- standard = {}
- rest = {}
- for k, v in cookie_attrs[1:]:
- lc = k.lower()
- # don't lose case distinction for unknown fields
- if lc in value_attrs or lc in boolean_attrs:
- k = lc
- if k in boolean_attrs and v is None:
- # boolean cookie-attribute is present, but has no value
- # (like "discard", rather than "port=80")
- v = True
- if k in standard:
- # only first value is significant
- continue
- if k == "domain":
- if v is None:
- _debug(" missing value for domain attribute")
- bad_cookie = True
- break
- # RFC 2965 section 3.3.3
- v = v.lower()
- if k == "expires":
- if max_age_set:
- # Prefer max-age to expires (like Mozilla)
- continue
- if v is None:
- _debug(" missing or invalid value for expires "
- "attribute: treating as session cookie")
- continue
- if k == "max-age":
- max_age_set = True
- try:
- v = int(v)
- except ValueError:
- _debug(" missing or invalid (non-numeric) value for "
- "max-age attribute")
- bad_cookie = True
- break
- # convert RFC 2965 Max-Age to seconds since epoch
- # XXX Strictly you're supposed to follow RFC 2616
- # age-calculation rules. Remember that zero Max-Age is a
- # is a request to discard (old and new) cookie, though.
- k = "expires"
- v = self._now + v
- if (k in value_attrs) or (k in boolean_attrs):
- if (v is None and
- k not in ("port", "comment", "commenturl")):
- _debug(" missing value for %s attribute" % k)
- bad_cookie = True
- break
- standard[k] = v
- else:
- rest[k] = v
-
- if bad_cookie:
- continue
-
- cookie_tuples.append((name, value, standard, rest))
-
- return cookie_tuples
-
- def _cookie_from_cookie_tuple(self, tup, request):
- # standard is dict of standard cookie-attributes, rest is dict of the
- # rest of them
- name, value, standard, rest = tup
-
- domain = standard.get("domain", Absent)
- path = standard.get("path", Absent)
- port = standard.get("port", Absent)
- expires = standard.get("expires", Absent)
-
- # set the easy defaults
- version = standard.get("version", None)
- if version is not None:
- try:
- version = int(version)
- except ValueError:
- return None # invalid version, ignore cookie
- secure = standard.get("secure", False)
- # (discard is also set if expires is Absent)
- discard = standard.get("discard", False)
- comment = standard.get("comment", None)
- comment_url = standard.get("commenturl", None)
-
- # set default path
- if path is not Absent and path != "":
- path_specified = True
- path = escape_path(path)
- else:
- path_specified = False
- path = request_path(request)
- i = path.rfind("/")
- if i != -1:
- if version == 0:
- # Netscape spec parts company from reality here
- path = path[:i]
- else:
- path = path[:i+1]
- if len(path) == 0: path = "/"
-
- # set default domain
- domain_specified = domain is not Absent
- # but first we have to remember whether it starts with a dot
- domain_initial_dot = False
- if domain_specified:
- domain_initial_dot = bool(domain.startswith("."))
- if domain is Absent:
- req_host, erhn = eff_request_host(request)
- domain = erhn
- elif not domain.startswith("."):
- domain = "."+domain
-
- # set default port
- port_specified = False
- if port is not Absent:
- if port is None:
- # Port attr present, but has no value: default to request port.
- # Cookie should then only be sent back on that port.
- port = request_port(request)
- else:
- port_specified = True
- port = re.sub(r"\s+", "", port)
- else:
- # No port attr present. Cookie can be sent back on any port.
- port = None
-
- # set default expires and discard
- if expires is Absent:
- expires = None
- discard = True
- elif expires <= self._now:
- # Expiry date in past is request to delete cookie. This can't be
- # in DefaultCookiePolicy, because can't delete cookies there.
- try:
- self.clear(domain, path, name)
- except KeyError:
- pass
- _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
- domain, path, name)
- return None
-
- return Cookie(version,
- name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest)
-
- def _cookies_from_attrs_set(self, attrs_set, request):
- cookie_tuples = self._normalized_cookie_tuples(attrs_set)
-
- cookies = []
- for tup in cookie_tuples:
- cookie = self._cookie_from_cookie_tuple(tup, request)
- if cookie: cookies.append(cookie)
- return cookies
-
- def _process_rfc2109_cookies(self, cookies):
- rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
- if rfc2109_as_ns is None:
- rfc2109_as_ns = not self._policy.rfc2965
- for cookie in cookies:
- if cookie.version == 1:
- cookie.rfc2109 = True
- if rfc2109_as_ns:
- # treat 2109 cookies as Netscape cookies rather than
- # as RFC2965 cookies
- cookie.version = 0
-
- def make_cookies(self, response, request):
- """Return sequence of Cookie objects extracted from response object."""
- # get cookie-attributes for RFC 2965 and Netscape protocols
- headers = response.info()
- rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
- ns_hdrs = headers.get_all("Set-Cookie", [])
-
- rfc2965 = self._policy.rfc2965
- netscape = self._policy.netscape
-
- if ((not rfc2965_hdrs and not ns_hdrs) or
- (not ns_hdrs and not rfc2965) or
- (not rfc2965_hdrs and not netscape) or
- (not netscape and not rfc2965)):
- return [] # no relevant cookie headers: quick exit
-
- try:
- cookies = self._cookies_from_attrs_set(
- split_header_words(rfc2965_hdrs), request)
- except Exception:
- _warn_unhandled_exception()
- cookies = []
-
- if ns_hdrs and netscape:
- try:
- # RFC 2109 and Netscape cookies
- ns_cookies = self._cookies_from_attrs_set(
- parse_ns_headers(ns_hdrs), request)
- except Exception:
- _warn_unhandled_exception()
- ns_cookies = []
- self._process_rfc2109_cookies(ns_cookies)
-
- # Look for Netscape cookies (from Set-Cookie headers) that match
- # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
- # For each match, keep the RFC 2965 cookie and ignore the Netscape
- # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
- # bundled in with the Netscape cookies for this purpose, which is
- # reasonable behaviour.
- if rfc2965:
- lookup = {}
- for cookie in cookies:
- lookup[(cookie.domain, cookie.path, cookie.name)] = None
-
- def no_matching_rfc2965(ns_cookie, lookup=lookup):
- key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
- return key not in lookup
- ns_cookies = filter(no_matching_rfc2965, ns_cookies)
-
- if ns_cookies:
- cookies.extend(ns_cookies)
-
- return cookies
-
- def set_cookie_if_ok(self, cookie, request):
- """Set a cookie if policy says it's OK to do so."""
- self._cookies_lock.acquire()
- try:
- self._policy._now = self._now = int(time.time())
-
- if self._policy.set_ok(cookie, request):
- self.set_cookie(cookie)
-
-
- finally:
- self._cookies_lock.release()
-
- def set_cookie(self, cookie):
- """Set a cookie, without checking whether or not it should be set."""
- c = self._cookies
- self._cookies_lock.acquire()
- try:
- if cookie.domain not in c: c[cookie.domain] = {}
- c2 = c[cookie.domain]
- if cookie.path not in c2: c2[cookie.path] = {}
- c3 = c2[cookie.path]
- c3[cookie.name] = cookie
- finally:
- self._cookies_lock.release()
-
- def extract_cookies(self, response, request):
- """Extract cookies from response, where allowable given the request."""
- _debug("extract_cookies: %s", response.info())
- self._cookies_lock.acquire()
- try:
- self._policy._now = self._now = int(time.time())
-
- for cookie in self.make_cookies(response, request):
- if self._policy.set_ok(cookie, request):
- _debug(" setting cookie: %s", cookie)
- self.set_cookie(cookie)
- finally:
- self._cookies_lock.release()
-
- def clear(self, domain=None, path=None, name=None):
- """Clear some cookies.
-
- Invoking this method without arguments will clear all cookies. If
- given a single argument, only cookies belonging to that domain will be
- removed. If given two arguments, cookies belonging to the specified
- path within that domain are removed. If given three arguments, then
- the cookie with the specified name, path and domain is removed.
-
- Raises KeyError if no matching cookie exists.
-
- """
- if name is not None:
- if (domain is None) or (path is None):
- raise ValueError(
- "domain and path must be given to remove a cookie by name")
- del self._cookies[domain][path][name]
- elif path is not None:
- if domain is None:
- raise ValueError(
- "domain must be given to remove cookies by path")
- del self._cookies[domain][path]
- elif domain is not None:
- del self._cookies[domain]
- else:
- self._cookies = {}
-
- def clear_session_cookies(self):
- """Discard all session cookies.
-
- Note that the .save() method won't save session cookies anyway, unless
- you ask otherwise by passing a true ignore_discard argument.
-
- """
- self._cookies_lock.acquire()
- try:
- for cookie in self:
- if cookie.discard:
- self.clear(cookie.domain, cookie.path, cookie.name)
- finally:
- self._cookies_lock.release()
-
- def clear_expired_cookies(self):
- """Discard all expired cookies.
-
- You probably don't need to call this method: expired cookies are never
- sent back to the server (provided you're using DefaultCookiePolicy),
- this method is called by CookieJar itself every so often, and the
- .save() method won't save expired cookies anyway (unless you ask
- otherwise by passing a true ignore_expires argument).
-
- """
- self._cookies_lock.acquire()
- try:
- now = time.time()
- for cookie in self:
- if cookie.is_expired(now):
- self.clear(cookie.domain, cookie.path, cookie.name)
- finally:
- self._cookies_lock.release()
-
- def __iter__(self):
- return deepvalues(self._cookies)
-
- def __len__(self):
- """Return number of contained cookies."""
- i = 0
- for cookie in self: i = i + 1
- return i
-
- @as_native_str()
- def __repr__(self):
- r = []
- for cookie in self: r.append(repr(cookie))
- return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
- def __str__(self):
- r = []
- for cookie in self: r.append(str(cookie))
- return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
-
-# derives from IOError for backwards-compatibility with Python 2.4.0
-class LoadError(IOError): pass
-
-class FileCookieJar(CookieJar):
- """CookieJar that can be loaded from and saved to a file."""
-
- def __init__(self, filename=None, delayload=False, policy=None):
- """
- Cookies are NOT loaded from the named file until either the .load() or
- .revert() method is called.
-
- """
- CookieJar.__init__(self, policy)
- if filename is not None:
- try:
- filename+""
- except:
- raise ValueError("filename must be string-like")
- self.filename = filename
- self.delayload = bool(delayload)
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Save cookies to a file."""
- raise NotImplementedError()
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Load cookies from a file."""
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename)
- try:
- self._really_load(f, filename, ignore_discard, ignore_expires)
- finally:
- f.close()
-
- def revert(self, filename=None,
- ignore_discard=False, ignore_expires=False):
- """Clear all cookies and reload cookies from a saved file.
-
- Raises LoadError (or IOError) if reversion is not successful; the
- object's state will not be altered if this happens.
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- self._cookies_lock.acquire()
- try:
-
- old_state = copy.deepcopy(self._cookies)
- self._cookies = {}
- try:
- self.load(filename, ignore_discard, ignore_expires)
- except (LoadError, IOError):
- self._cookies = old_state
- raise
-
- finally:
- self._cookies_lock.release()
-
-
-def lwp_cookie_str(cookie):
- """Return string representation of Cookie in an the LWP cookie file format.
-
- Actually, the format is extended a bit -- see module docstring.
-
- """
- h = [(cookie.name, cookie.value),
- ("path", cookie.path),
- ("domain", cookie.domain)]
- if cookie.port is not None: h.append(("port", cookie.port))
- if cookie.path_specified: h.append(("path_spec", None))
- if cookie.port_specified: h.append(("port_spec", None))
- if cookie.domain_initial_dot: h.append(("domain_dot", None))
- if cookie.secure: h.append(("secure", None))
- if cookie.expires: h.append(("expires",
- time2isoz(float(cookie.expires))))
- if cookie.discard: h.append(("discard", None))
- if cookie.comment: h.append(("comment", cookie.comment))
- if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
-
- keys = sorted(cookie._rest.keys())
- for k in keys:
- h.append((k, str(cookie._rest[k])))
-
- h.append(("version", str(cookie.version)))
-
- return join_header_words([h])
-
-class LWPCookieJar(FileCookieJar):
- """
- The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
- "Set-Cookie3" is the format used by the libwww-perl libary, not known
- to be compatible with any browser, but which is easy to read and
- doesn't lose information about RFC 2965 cookies.
-
- Additional methods
-
- as_lwp_str(ignore_discard=True, ignore_expired=True)
-
- """
-
- def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
- """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
-
- ignore_discard and ignore_expires: see docstring for FileCookieJar.save
-
- """
- now = time.time()
- r = []
- for cookie in self:
- if not ignore_discard and cookie.discard:
- continue
- if not ignore_expires and cookie.is_expired(now):
- continue
- r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
- return "\n".join(r+[""])
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- # There really isn't an LWP Cookies 2.0 format, but this indicates
- # that there is extra information in here (domain_dot and
- # port_spec) while still being compatible with libwww-perl, I hope.
- f.write("#LWP-Cookies-2.0\n")
- f.write(self.as_lwp_str(ignore_discard, ignore_expires))
- finally:
- f.close()
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- magic = f.readline()
- if not self.magic_re.search(magic):
- msg = ("%r does not look like a Set-Cookie3 (LWP) format "
- "file" % filename)
- raise LoadError(msg)
-
- now = time.time()
-
- header = "Set-Cookie3:"
- boolean_attrs = ("port_spec", "path_spec", "domain_dot",
- "secure", "discard")
- value_attrs = ("version",
- "port", "path", "domain",
- "expires",
- "comment", "commenturl")
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
- if not line.startswith(header):
- continue
- line = line[len(header):].strip()
-
- for data in split_header_words([line]):
- name, value = data[0]
- standard = {}
- rest = {}
- for k in boolean_attrs:
- standard[k] = False
- for k, v in data[1:]:
- if k is not None:
- lc = k.lower()
- else:
- lc = None
- # don't lose case distinction for unknown fields
- if (lc in value_attrs) or (lc in boolean_attrs):
- k = lc
- if k in boolean_attrs:
- if v is None: v = True
- standard[k] = v
- elif k in value_attrs:
- standard[k] = v
- else:
- rest[k] = v
-
- h = standard.get
- expires = h("expires")
- discard = h("discard")
- if expires is not None:
- expires = iso2time(expires)
- if expires is None:
- discard = True
- domain = h("domain")
- domain_specified = domain.startswith(".")
- c = Cookie(h("version"), name, value,
- h("port"), h("port_spec"),
- domain, domain_specified, h("domain_dot"),
- h("path"), h("path_spec"),
- h("secure"),
- expires,
- discard,
- h("comment"),
- h("commenturl"),
- rest)
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
-
- except IOError:
- raise
- except Exception:
- _warn_unhandled_exception()
- raise LoadError("invalid Set-Cookie3 format file %r: %r" %
- (filename, line))
-
-
-class MozillaCookieJar(FileCookieJar):
- """
-
- WARNING: you may want to backup your browser's cookies file if you use
- this class to save cookies. I *think* it works, but there have been
- bugs in the past!
-
- This class differs from CookieJar only in the format it uses to save and
- load cookies to and from a file. This class uses the Mozilla/Netscape
- `cookies.txt' format. lynx uses this file format, too.
-
- Don't expect cookies saved while the browser is running to be noticed by
- the browser (in fact, Mozilla on unix will overwrite your saved cookies if
- you change them on disk while it's running; on Windows, you probably can't
- save at all while the browser is running).
-
- Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
- Netscape cookies on saving.
-
- In particular, the cookie version and port number information is lost,
- together with information about whether or not Path, Port and Discard were
- specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
- domain as set in the HTTP header started with a dot (yes, I'm aware some
- domains in Netscape files start with a dot and some don't -- trust me, you
- really don't want to know any more about this).
-
- Note that though Mozilla and Netscape use the same format, they use
- slightly different headers. The class saves cookies using the Netscape
- header by default (Mozilla can cope with that).
-
- """
- magic_re = re.compile("#( Netscape)? HTTP Cookie File")
- header = """\
-# Netscape HTTP Cookie File
-# http://www.netscape.com/newsref/std/cookie_spec.html
-# This is a generated file! Do not edit.
-
-"""
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- now = time.time()
-
- magic = f.readline()
- if not self.magic_re.search(magic):
- f.close()
- raise LoadError(
- "%r does not look like a Netscape format cookies file" %
- filename)
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
-
- # last field may be absent, so keep any trailing tab
- if line.endswith("\n"): line = line[:-1]
-
- # skip comments and blank lines XXX what is $ for?
- if (line.strip().startswith(("#", "$")) or
- line.strip() == ""):
- continue
-
- domain, domain_specified, path, secure, expires, name, value = \
- line.split("\t")
- secure = (secure == "TRUE")
- domain_specified = (domain_specified == "TRUE")
- if name == "":
- # cookies.txt regards 'Set-Cookie: foo' as a cookie
- # with no name, whereas http.cookiejar regards it as a
- # cookie with no value.
- name = value
- value = None
-
- initial_dot = domain.startswith(".")
- assert domain_specified == initial_dot
-
- discard = False
- if expires == "":
- expires = None
- discard = True
-
- # assume path_specified is false
- c = Cookie(0, name, value,
- None, False,
- domain, domain_specified, initial_dot,
- path, False,
- secure,
- expires,
- discard,
- None,
- None,
- {})
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
-
- except IOError:
- raise
- except Exception:
- _warn_unhandled_exception()
- raise LoadError("invalid Netscape format cookies file %r: %r" %
- (filename, line))
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- f.write(self.header)
- now = time.time()
- for cookie in self:
- if not ignore_discard and cookie.discard:
- continue
- if not ignore_expires and cookie.is_expired(now):
- continue
- if cookie.secure: secure = "TRUE"
- else: secure = "FALSE"
- if cookie.domain.startswith("."): initial_dot = "TRUE"
- else: initial_dot = "FALSE"
- if cookie.expires is not None:
- expires = str(cookie.expires)
- else:
- expires = ""
- if cookie.value is None:
- # cookies.txt regards 'Set-Cookie: foo' as a cookie
- # with no name, whereas http.cookiejar regards it as a
- # cookie with no value.
- name = ""
- value = cookie.name
- else:
- name = cookie.name
- value = cookie.value
- f.write(
- "\t".join([cookie.domain, initial_dot, cookie.path,
- secure, expires, name, value])+
- "\n")
- finally:
- f.close()
+import time
+from future.backports.urllib.parse import urlparse, urlsplit, quote
+from future.backports.http.client import HTTP_PORT
+try:
+ import threading as _threading
+except ImportError:
+ import dummy_threading as _threading
+from calendar import timegm
+
+debug = False # set to True to enable debugging via the logging module
+logger = None
+
+def _debug(*args):
+ if not debug:
+ return
+ global logger
+ if not logger:
+ import logging
+ logger = logging.getLogger("http.cookiejar")
+ return logger.debug(*args)
+
+
+DEFAULT_HTTP_PORT = str(HTTP_PORT)
+MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
+ "instance initialised with one)")
+
+def _warn_unhandled_exception():
+ # There are a few catch-all except: statements in this module, for
+ # catching input that's bad in unexpected ways. Warn if any
+ # exceptions are caught there.
+ import io, warnings, traceback
+ f = io.StringIO()
+ traceback.print_exc(None, f)
+ msg = f.getvalue()
+ warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
+
+
+# Date/time conversion
+# -----------------------------------------------------------------------------
+
+EPOCH_YEAR = 1970
+def _timegm(tt):
+ year, month, mday, hour, min, sec = tt[:6]
+ if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
+ (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+ return timegm(tt)
+ else:
+ return None
+
+DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+MONTHS_LOWER = []
+for month in MONTHS: MONTHS_LOWER.append(month.lower())
+
+def time2isoz(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
+ representing Universal Time (UTC, aka GMT). An example of this format is:
+
+ 1994-11-24 08:49:37Z
+
+ """
+ if t is None:
+ dt = datetime.datetime.utcnow()
+ else:
+ dt = datetime.datetime.utcfromtimestamp(t)
+ return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
+ dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
+
+def time2netscape(t=None):
+ """Return a string representing time in seconds since epoch, t.
+
+ If the function is called without an argument, it will use the current
+ time.
+
+ The format of the returned string is like this:
+
+ Wed, DD-Mon-YYYY HH:MM:SS GMT
+
+ """
+ if t is None:
+ dt = datetime.datetime.utcnow()
+ else:
+ dt = datetime.datetime.utcfromtimestamp(t)
+ return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
+ DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
+ dt.year, dt.hour, dt.minute, dt.second)
+
+
+UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
+
+TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
+def offset_from_tz_string(tz):
+ offset = None
+ if tz in UTC_ZONES:
+ offset = 0
+ else:
+ m = TIMEZONE_RE.search(tz)
+ if m:
+ offset = 3600 * int(m.group(2))
+ if m.group(3):
+ offset = offset + 60 * int(m.group(3))
+ if m.group(1) == '-':
+ offset = -offset
+ return offset
+
+def _str2time(day, mon, yr, hr, min, sec, tz):
+ # translate month name to number
+ # month numbers start with 1 (January)
+ try:
+ mon = MONTHS_LOWER.index(mon.lower())+1
+ except ValueError:
+ # maybe it's already a number
+ try:
+ imon = int(mon)
+ except ValueError:
+ return None
+ if 1 <= imon <= 12:
+ mon = imon
+ else:
+ return None
+
+ # make sure clock elements are defined
+ if hr is None: hr = 0
+ if min is None: min = 0
+ if sec is None: sec = 0
+
+ yr = int(yr)
+ day = int(day)
+ hr = int(hr)
+ min = int(min)
+ sec = int(sec)
+
+ if yr < 1000:
+ # find "obvious" year
+ cur_yr = time.localtime(time.time())[0]
+ m = cur_yr % 100
+ tmp = yr
+ yr = yr + cur_yr - m
+ m = m - tmp
+ if abs(m) > 50:
+ if m > 0: yr = yr + 100
+ else: yr = yr - 100
+
+ # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
+ t = _timegm((yr, mon, day, hr, min, sec, tz))
+
+ if t is not None:
+ # adjust time using timezone string, to get absolute time since epoch
+ if tz is None:
+ tz = "UTC"
+ tz = tz.upper()
+ offset = offset_from_tz_string(tz)
+ if offset is None:
+ return None
+ t = t - offset
+
+ return t
+
+STRICT_DATE_RE = re.compile(
+ r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
+ "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
+WEEKDAY_RE = re.compile(
+ r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
+LOOSE_HTTP_DATE_RE = re.compile(
+ r"""^
+ (\d\d?) # day
+ (?:\s+|[-\/])
+ (\w+) # month
+ (?:\s+|[-\/])
+ (\d+) # year
+ (?:
+ (?:\s+|:) # separator before clock
+ (\d\d?):(\d\d) # hour:min
+ (?::(\d\d))? # optional seconds
+ )? # optional clock
+ \s*
+ ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
+ \s*
+ (?:\(\w+\))? # ASCII representation of timezone in parens.
+ \s*$""", re.X | re.ASCII)
+def http2time(text):
+ """Returns time in seconds since epoch of time represented by a string.
+
+ Return value is an integer.
+
+ None is returned if the format of str is unrecognized, the time is outside
+ the representable range, or the timezone string is not recognized. If the
+ string contains no timezone, UTC is assumed.
+
+ The timezone in the string may be numerical (like "-0800" or "+0100") or a
+ string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
+ timezone strings equivalent to UTC (zero offset) are known to the function.
+
+ The function loosely parses the following formats:
+
+ Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
+ Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
+ Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
+ 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
+ 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
+ 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
+
+ The parser ignores leading and trailing whitespace. The time may be
+ absent.
+
+ If the year is given with only 2 digits, the function will select the
+ century that makes the year closest to the current date.
+
+ """
+ # fast exit for strictly conforming string
+ m = STRICT_DATE_RE.search(text)
+ if m:
+ g = m.groups()
+ mon = MONTHS_LOWER.index(g[1].lower()) + 1
+ tt = (int(g[2]), mon, int(g[0]),
+ int(g[3]), int(g[4]), float(g[5]))
+ return _timegm(tt)
+
+ # No, we need some messy parsing...
+
+ # clean up
+ text = text.lstrip()
+ text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = LOOSE_HTTP_DATE_RE.search(text)
+ if m is not None:
+ day, mon, yr, hr, min, sec, tz = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
+
+ISO_DATE_RE = re.compile(
+ """^
+ (\d{4}) # year
+ [-\/]?
+ (\d\d?) # numerical month
+ [-\/]?
+ (\d\d?) # day
+ (?:
+ (?:\s+|[-:Tt]) # separator before clock
+ (\d\d?):?(\d\d) # hour:min
+ (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
+ )? # optional clock
+ \s*
+ ([-+]?\d\d?:?(:?\d\d)?
+ |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
+ \s*$""", re.X | re. ASCII)
+def iso2time(text):
+ """
+ As for http2time, but parses the ISO 8601 formats:
+
+ 1994-02-03 14:15:29 -0100 -- ISO 8601 format
+ 1994-02-03 14:15:29 -- zone is optional
+ 1994-02-03 -- only date
+ 1994-02-03T14:15:29 -- Use T as separator
+ 19940203T141529Z -- ISO 8601 compact format
+ 19940203 -- only date
+
+ """
+ # clean up
+ text = text.lstrip()
+
+ # tz is time zone specifier string
+ day, mon, yr, hr, min, sec, tz = [None]*7
+
+ # loose regexp parse
+ m = ISO_DATE_RE.search(text)
+ if m is not None:
+ # XXX there's an extra bit of the timezone I'm ignoring here: is
+ # this the right thing to do?
+ yr, mon, day, hr, min, sec, tz, _ = m.groups()
+ else:
+ return None # bad format
+
+ return _str2time(day, mon, yr, hr, min, sec, tz)
+
+
+# Header parsing
+# -----------------------------------------------------------------------------
+
+def unmatched(match):
+ """Return unmatched part of re.Match object."""
+ start, end = match.span(0)
+ return match.string[:start]+match.string[end:]
+
+HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
+HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
+HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
+HEADER_ESCAPE_RE = re.compile(r"\\(.)")
+def split_header_words(header_values):
+ r"""Parse header values into a list of lists containing key,value pairs.
+
+ The function knows how to deal with ",", ";" and "=" as well as quoted
+ values after "=". A list of space separated tokens are parsed as if they
+ were separated by ";".
+
+ If the header_values passed as argument contains multiple values, then they
+ are treated as if they were a single value separated by comma ",".
+
+ This means that this function is useful for parsing header fields that
+ follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+ the requirement for tokens).
+
+ headers = #header
+ header = (token | parameter) *( [";"] (token | parameter))
+
+ token = 1*<any CHAR except CTLs or separators>
+ separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+
+ quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ qdtext = <any TEXT except <">>
+ quoted-pair = "\" CHAR
+
+ parameter = attribute "=" value
+ attribute = token
+ value = token | quoted-string
+
+ Each header is represented by a list of key/value pairs. The value for a
+ simple token (not part of a parameter) is None. Syntactically incorrect
+ headers will not necessarily be parsed as you would want.
+
+ This is easier to describe with some examples:
+
+ >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
+ [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
+ >>> split_header_words(['text/html; charset="iso-8859-1"'])
+ [[('text/html', None), ('charset', 'iso-8859-1')]]
+ >>> split_header_words([r'Basic realm="\"foo\bar\""'])
+ [[('Basic', None), ('realm', '"foobar"')]]
+
+ """
+ assert not isinstance(header_values, str)
+ result = []
+ for text in header_values:
+ orig_text = text
+ pairs = []
+ while text:
+ m = HEADER_TOKEN_RE.search(text)
+ if m:
+ text = unmatched(m)
+ name = m.group(1)
+ m = HEADER_QUOTED_VALUE_RE.search(text)
+ if m: # quoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = HEADER_ESCAPE_RE.sub(r"\1", value)
+ else:
+ m = HEADER_VALUE_RE.search(text)
+ if m: # unquoted value
+ text = unmatched(m)
+ value = m.group(1)
+ value = value.rstrip()
+ else:
+ # no value, a lone token
+ value = None
+ pairs.append((name, value))
+ elif text.lstrip().startswith(","):
+ # concatenated headers, as per RFC 2616 section 4.2
+ text = text.lstrip()[1:]
+ if pairs: result.append(pairs)
+ pairs = []
+ else:
+ # skip junk
+ non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+ assert nr_junk_chars > 0, (
+ "split_header_words bug: '%s', '%s', %s" %
+ (orig_text, text, pairs))
+ text = non_junk
+ if pairs: result.append(pairs)
+ return result
+
+HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
+def join_header_words(lists):
+ """Do the inverse (almost) of the conversion done by split_header_words.
+
+ Takes a list of lists of (key, value) pairs and produces a single header
+ value. Attribute values are quoted if needed.
+
+ >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
+ 'text/plain; charset="iso-8859/1"'
+ >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+ 'text/plain, charset="iso-8859/1"'
+
+ """
+ headers = []
+ for pairs in lists:
+ attr = []
+ for k, v in pairs:
+ if v is not None:
+ if not re.search(r"^\w+$", v):
+ v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
+ v = '"%s"' % v
+ k = "%s=%s" % (k, v)
+ attr.append(k)
+ if attr: headers.append("; ".join(attr))
+ return ", ".join(headers)
+
+def strip_quotes(text):
+ if text.startswith('"'):
+ text = text[1:]
+ if text.endswith('"'):
+ text = text[:-1]
+ return text
+
+def parse_ns_headers(ns_headers):
+ """Ad-hoc parser for Netscape protocol cookie-attributes.
+
+ The old Netscape cookie format for Set-Cookie can for instance contain
+ an unquoted "," in the expires field, so we have to use this ad-hoc
+ parser instead of split_header_words.
+
+ XXX This may not make the best possible effort to parse all the crap
+ that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
+ parser is probably better, so could do worse than following that if
+ this ever gives any trouble.
+
+ Currently, this is also used for parsing RFC 2109 cookies.
+
+ """
+ known_attrs = ("expires", "domain", "path", "secure",
+ # RFC 2109 attrs (may turn up in Netscape cookies, too)
+ "version", "port", "max-age")
+
+ result = []
+ for ns_header in ns_headers:
+ pairs = []
+ version_set = False
+ for ii, param in enumerate(re.split(r";\s*", ns_header)):
+ param = param.rstrip()
+ if param == "": continue
+ if "=" not in param:
+ k, v = param, None
+ else:
+ k, v = re.split(r"\s*=\s*", param, 1)
+ k = k.lstrip()
+ if ii != 0:
+ lc = k.lower()
+ if lc in known_attrs:
+ k = lc
+ if k == "version":
+ # This is an RFC 2109 cookie.
+ v = strip_quotes(v)
+ version_set = True
+ if k == "expires":
+ # convert expires date to seconds since epoch
+ v = http2time(strip_quotes(v)) # None if invalid
+ pairs.append((k, v))
+
+ if pairs:
+ if not version_set:
+ pairs.append(("version", "0"))
+ result.append(pairs)
+
+ return result
+
+
+IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
+def is_HDN(text):
+ """Return True if text is a host domain name."""
+ # XXX
+ # This may well be wrong. Which RFC is HDN defined in, if any (for
+ # the purposes of RFC 2965)?
+ # For the current implementation, what about IPv6? Remember to look
+ # at other uses of IPV4_RE also, if change this.
+ if IPV4_RE.search(text):
+ return False
+ if text == "":
+ return False
+ if text[0] == "." or text[-1] == ".":
+ return False
+ return True
+
+def domain_match(A, B):
+ """Return True if domain A domain-matches domain B, according to RFC 2965.
+
+ A and B may be host domain names or IP addresses.
+
+ RFC 2965, section 1:
+
+ Host names can be specified either as an IP address or a HDN string.
+ Sometimes we compare one host name with another. (Such comparisons SHALL
+ be case-insensitive.) Host A's name domain-matches host B's if
+
+ * their host name strings string-compare equal; or
+
+ * A is a HDN string and has the form NB, where N is a non-empty
+ name string, B has the form .B', and B' is a HDN string. (So,
+ x.y.com domain-matches .Y.com but not Y.com.)
+
+ Note that domain-match is not a commutative operation: a.b.c.com
+ domain-matches .c.com, but not the reverse.
+
+ """
+ # Note that, if A or B are IP addresses, the only relevant part of the
+ # definition of the domain-match algorithm is the direct string-compare.
+ A = A.lower()
+ B = B.lower()
+ if A == B:
+ return True
+ if not is_HDN(A):
+ return False
+ i = A.rfind(B)
+ if i == -1 or i == 0:
+ # A does not have form NB, or N is the empty string
+ return False
+ if not B.startswith("."):
+ return False
+ if not is_HDN(B[1:]):
+ return False
+ return True
+
+def liberal_is_HDN(text):
+ """Return True if text is a sort-of-like a host domain name.
+
+ For accepting/blocking domains.
+
+ """
+ if IPV4_RE.search(text):
+ return False
+ return True
+
+def user_domain_match(A, B):
+ """For blocking/accepting domains.
+
+ A and B may be host domain names or IP addresses.
+
+ """
+ A = A.lower()
+ B = B.lower()
+ if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
+ if A == B:
+ # equal IP addresses
+ return True
+ return False
+ initial_dot = B.startswith(".")
+ if initial_dot and A.endswith(B):
+ return True
+ if not initial_dot and A == B:
+ return True
+ return False
+
+cut_port_re = re.compile(r":\d+$", re.ASCII)
+def request_host(request):
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.get_full_url()
+ host = urlparse(url)[1]
+ if host == "":
+ host = request.get_header("Host", "")
+
+ # remove port, if present
+ host = cut_port_re.sub("", host, 1)
+ return host.lower()
+
+def eff_request_host(request):
+ """Return a tuple (request-host, effective request-host name).
+
+ As defined by RFC 2965, except both are lowercased.
+
+ """
+ erhn = req_host = request_host(request)
+ if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
+ erhn = req_host + ".local"
+ return req_host, erhn
+
+def request_path(request):
+ """Path component of request-URI, as defined by RFC 2965."""
+ url = request.get_full_url()
+ parts = urlsplit(url)
+ path = escape_path(parts.path)
+ if not path.startswith("/"):
+ # fix bad RFC 2396 absoluteURI
+ path = "/" + path
+ return path
+
+def request_port(request):
+ host = request.host
+ i = host.find(':')
+ if i >= 0:
+ port = host[i+1:]
+ try:
+ int(port)
+ except ValueError:
+ _debug("nonnumeric port: '%s'", port)
+ return None
+ else:
+ port = DEFAULT_HTTP_PORT
+ return port
+
+# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
+# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
+HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
+ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
+def uppercase_escaped_char(match):
+ return "%%%s" % match.group(1).upper()
+def escape_path(path):
+ """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
+ # There's no knowing what character encoding was used to create URLs
+ # containing %-escapes, but since we have to pick one to escape invalid
+ # path characters, we pick UTF-8, as recommended in the HTML 4.0
+ # specification:
+ # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
+ # And here, kind of: draft-fielding-uri-rfc2396bis-03
+ # (And in draft IRI specification: draft-duerst-iri-05)
+ # (And here, for new URI schemes: RFC 2718)
+ path = quote(path, HTTP_PATH_SAFE)
+ path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
+ return path
+
+def reach(h):
+ """Return reach of host h, as defined by RFC 2965, section 1.
+
+ The reach R of a host name H is defined as follows:
+
+ * If
+
+ - H is the host domain name of a host; and,
+
+ - H has the form A.B; and
+
+ - A has no embedded (that is, interior) dots; and
+
+ - B has at least one embedded dot, or B is the string "local".
+ then the reach of H is .B.
+
+ * Otherwise, the reach of H is H.
+
+ >>> reach("www.acme.com")
+ '.acme.com'
+ >>> reach("acme.com")
+ 'acme.com'
+ >>> reach("acme.local")
+ '.local'
+
+ """
+ i = h.find(".")
+ if i >= 0:
+ #a = h[:i] # this line is only here to show what a is
+ b = h[i+1:]
+ i = b.find(".")
+ if is_HDN(h) and (i >= 0 or b == "local"):
+ return "."+b
+ return h
+
+def is_third_party(request):
+ """
+
+ RFC 2965, section 3.3.6:
+
+ An unverifiable transaction is to a third-party host if its request-
+ host U does not domain-match the reach R of the request-host O in the
+ origin transaction.
+
+ """
+ req_host = request_host(request)
+ if not domain_match(req_host, reach(request.get_origin_req_host())):
+ return True
+ else:
+ return False
+
+
+class Cookie(object):
+ """HTTP Cookie.
+
+ This class represents both Netscape and RFC 2965 cookies.
+
+ This is deliberately a very simple class. It just holds attributes. It's
+ possible to construct Cookie instances that don't comply with the cookie
+ standards. CookieJar.make_cookies is the factory function for Cookie
+ objects -- it deals with cookie parsing, supplying defaults, and
+ normalising to the representation used in this class. CookiePolicy is
+ responsible for checking them to see whether they should be accepted from
+ and returned to the server.
+
+ Note that the port may be present in the headers, but unspecified ("Port"
+ rather than"Port=80", for example); if this is the case, port is None.
+
+ """
+
+ def __init__(self, version, name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest,
+ rfc2109=False,
+ ):
+
+ if version is not None: version = int(version)
+ if expires is not None: expires = int(expires)
+ if port is None and port_specified is True:
+ raise ValueError("if port is None, port_specified must be false")
+
+ self.version = version
+ self.name = name
+ self.value = value
+ self.port = port
+ self.port_specified = port_specified
+ # normalise case, as per RFC 2965 section 3.3.3
+ self.domain = domain.lower()
+ self.domain_specified = domain_specified
+ # Sigh. We need to know whether the domain given in the
+ # cookie-attribute had an initial dot, in order to follow RFC 2965
+ # (as clarified in draft errata). Needed for the returned $Domain
+ # value.
+ self.domain_initial_dot = domain_initial_dot
+ self.path = path
+ self.path_specified = path_specified
+ self.secure = secure
+ self.expires = expires
+ self.discard = discard
+ self.comment = comment
+ self.comment_url = comment_url
+ self.rfc2109 = rfc2109
+
+ self._rest = copy.copy(rest)
+
+ def has_nonstandard_attr(self, name):
+ return name in self._rest
+ def get_nonstandard_attr(self, name, default=None):
+ return self._rest.get(name, default)
+ def set_nonstandard_attr(self, name, value):
+ self._rest[name] = value
+
+ def is_expired(self, now=None):
+ if now is None: now = time.time()
+ if (self.expires is not None) and (self.expires <= now):
+ return True
+ return False
+
+ def __str__(self):
+ if self.port is None: p = ""
+ else: p = ":"+self.port
+ limit = self.domain + p + self.path
+ if self.value is not None:
+ namevalue = "%s=%s" % (self.name, self.value)
+ else:
+ namevalue = self.name
+ return "<Cookie %s for %s>" % (namevalue, limit)
+
+ @as_native_str()
+ def __repr__(self):
+ args = []
+ for name in ("version", "name", "value",
+ "port", "port_specified",
+ "domain", "domain_specified", "domain_initial_dot",
+ "path", "path_specified",
+ "secure", "expires", "discard", "comment", "comment_url",
+ ):
+ attr = getattr(self, name)
+ ### Python-Future:
+ # Avoid u'...' prefixes for unicode strings:
+ if isinstance(attr, str):
+ attr = str(attr)
+ ###
+ args.append(str("%s=%s") % (name, repr(attr)))
+ args.append("rest=%s" % repr(self._rest))
+ args.append("rfc2109=%s" % repr(self.rfc2109))
+ return "Cookie(%s)" % ", ".join(args)
+
+
+class CookiePolicy(object):
+ """Defines which cookies get accepted from and returned to server.
+
+ May also modify cookies, though this is probably a bad idea.
+
+ The subclass DefaultCookiePolicy defines the standard rules for Netscape
+ and RFC 2965 cookies -- override that if you want a customised policy.
+
+ """
+ def set_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be accepted from server.
+
+ Currently, pre-expired cookies never get this far -- the CookieJar
+ class deletes such cookies itself.
+
+ """
+ raise NotImplementedError()
+
+ def return_ok(self, cookie, request):
+ """Return true if (and only if) cookie should be returned to server."""
+ raise NotImplementedError()
+
+ def domain_return_ok(self, domain, request):
+ """Return false if cookies should not be returned, given cookie domain.
+ """
+ return True
+
+ def path_return_ok(self, path, request):
+ """Return false if cookies should not be returned, given cookie path.
+ """
+ return True
+
+
+class DefaultCookiePolicy(CookiePolicy):
+ """Implements the standard rules for accepting and returning cookies."""
+
+ DomainStrictNoDots = 1
+ DomainStrictNonDomain = 2
+ DomainRFC2965Match = 4
+
+ DomainLiberal = 0
+ DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
+
+ def __init__(self,
+ blocked_domains=None, allowed_domains=None,
+ netscape=True, rfc2965=False,
+ rfc2109_as_netscape=None,
+ hide_cookie2=False,
+ strict_domain=False,
+ strict_rfc2965_unverifiable=True,
+ strict_ns_unverifiable=False,
+ strict_ns_domain=DomainLiberal,
+ strict_ns_set_initial_dollar=False,
+ strict_ns_set_path=False,
+ ):
+ """Constructor arguments should be passed as keyword arguments only."""
+ self.netscape = netscape
+ self.rfc2965 = rfc2965
+ self.rfc2109_as_netscape = rfc2109_as_netscape
+ self.hide_cookie2 = hide_cookie2
+ self.strict_domain = strict_domain
+ self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
+ self.strict_ns_unverifiable = strict_ns_unverifiable
+ self.strict_ns_domain = strict_ns_domain
+ self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
+ self.strict_ns_set_path = strict_ns_set_path
+
+ if blocked_domains is not None:
+ self._blocked_domains = tuple(blocked_domains)
+ else:
+ self._blocked_domains = ()
+
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def blocked_domains(self):
+ """Return the sequence of blocked domains (as a tuple)."""
+ return self._blocked_domains
+ def set_blocked_domains(self, blocked_domains):
+ """Set the sequence of blocked domains."""
+ self._blocked_domains = tuple(blocked_domains)
+
+ def is_blocked(self, domain):
+ for blocked_domain in self._blocked_domains:
+ if user_domain_match(domain, blocked_domain):
+ return True
+ return False
+
+ def allowed_domains(self):
+ """Return None, or the sequence of allowed domains (as a tuple)."""
+ return self._allowed_domains
+ def set_allowed_domains(self, allowed_domains):
+ """Set the sequence of allowed domains, or None."""
+ if allowed_domains is not None:
+ allowed_domains = tuple(allowed_domains)
+ self._allowed_domains = allowed_domains
+
+ def is_not_allowed(self, domain):
+ if self._allowed_domains is None:
+ return False
+ for allowed_domain in self._allowed_domains:
+ if user_domain_match(domain, allowed_domain):
+ return False
+ return True
+
+ def set_ok(self, cookie, request):
+ """
+ If you override .set_ok(), be sure to call this method. If it returns
+ false, so should your subclass (assuming your subclass wants to be more
+ strict about which cookies to accept).
+
+ """
+ _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
+
+ assert cookie.name is not None
+
+ for n in "version", "verifiability", "name", "path", "domain", "port":
+ fn_name = "set_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+
+ return True
+
+ def set_ok_version(self, cookie, request):
+ if cookie.version is None:
+ # Version is always set to 0 by parse_ns_headers if it's a Netscape
+ # cookie, so this must be an invalid RFC 2965 cookie.
+ _debug(" Set-Cookie2 without version attribute (%s=%s)",
+ cookie.name, cookie.value)
+ return False
+ if cookie.version > 0 and not self.rfc2965:
+ _debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ _debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def set_ok_verifiability(self, cookie, request):
+ if request.unverifiable and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ _debug(" third-party RFC 2965 cookie during "
+ "unverifiable transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ _debug(" third-party Netscape cookie during "
+ "unverifiable transaction")
+ return False
+ return True
+
+ def set_ok_name(self, cookie, request):
+ # Try and stop servers setting V0 cookies designed to hack other
+ # servers that know both V0 and V1 protocols.
+ if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
+ cookie.name.startswith("$")):
+ _debug(" illegal name (starts with '$'): '%s'", cookie.name)
+ return False
+ return True
+
+ def set_ok_path(self, cookie, request):
+ if cookie.path_specified:
+ req_path = request_path(request)
+ if ((cookie.version > 0 or
+ (cookie.version == 0 and self.strict_ns_set_path)) and
+ not req_path.startswith(cookie.path)):
+ _debug(" path attribute %s is not a prefix of request "
+ "path %s", cookie.path, req_path)
+ return False
+ return True
+
+ def set_ok_domain(self, cookie, request):
+ if self.is_blocked(cookie.domain):
+ _debug(" domain %s is in user block-list", cookie.domain)
+ return False
+ if self.is_not_allowed(cookie.domain):
+ _debug(" domain %s is not in user allow-list", cookie.domain)
+ return False
+ if cookie.domain_specified:
+ req_host, erhn = eff_request_host(request)
+ domain = cookie.domain
+ if self.strict_domain and (domain.count(".") >= 2):
+ # XXX This should probably be compared with the Konqueror
+ # (kcookiejar.cpp) and Mozilla implementations, but it's a
+ # losing battle.
+ i = domain.rfind(".")
+ j = domain.rfind(".", 0, i)
+ if j == 0: # domain like .foo.bar
+ tld = domain[i+1:]
+ sld = domain[j+1:i]
+ if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
+ "gov", "mil", "int", "aero", "biz", "cat", "coop",
+ "info", "jobs", "mobi", "museum", "name", "pro",
+ "travel", "eu") and len(tld) == 2:
+ # domain like .co.uk
+ _debug(" country-code second level domain %s", domain)
+ return False
+ if domain.startswith("."):
+ undotted_domain = domain[1:]
+ else:
+ undotted_domain = domain
+ embedded_dots = (undotted_domain.find(".") >= 0)
+ if not embedded_dots and domain != ".local":
+ _debug(" non-local domain %s contains no embedded dot",
+ domain)
+ return False
+ if cookie.version == 0:
+ if (not erhn.endswith(domain) and
+ (not erhn.startswith(".") and
+ not ("."+erhn).endswith(domain))):
+ _debug(" effective request-host %s (even with added "
+ "initial dot) does not end with %s",
+ erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainRFC2965Match)):
+ if not domain_match(erhn, domain):
+ _debug(" effective request-host %s does not domain-match "
+ "%s", erhn, domain)
+ return False
+ if (cookie.version > 0 or
+ (self.strict_ns_domain & self.DomainStrictNoDots)):
+ host_prefix = req_host[:-len(domain)]
+ if (host_prefix.find(".") >= 0 and
+ not IPV4_RE.search(req_host)):
+ _debug(" host prefix %s for domain %s contains a dot",
+ host_prefix, domain)
+ return False
+ return True
+
+ def set_ok_port(self, cookie, request):
+ if cookie.port_specified:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ else:
+ req_port = str(req_port)
+ for p in cookie.port.split(","):
+ try:
+ int(p)
+ except ValueError:
+ _debug(" bad port %s (not numeric)", p)
+ return False
+ if p == req_port:
+ break
+ else:
+ _debug(" request port (%s) not found in %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok(self, cookie, request):
+ """
+ If you override .return_ok(), be sure to call this method. If it
+ returns false, so should your subclass (assuming your subclass wants to
+ be more strict about which cookies to return).
+
+ """
+ # Path has already been checked by .path_return_ok(), and domain
+ # blocking done by .domain_return_ok().
+ _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
+
+ for n in "version", "verifiability", "secure", "expires", "port", "domain":
+ fn_name = "return_ok_"+n
+ fn = getattr(self, fn_name)
+ if not fn(cookie, request):
+ return False
+ return True
+
+ def return_ok_version(self, cookie, request):
+ if cookie.version > 0 and not self.rfc2965:
+ _debug(" RFC 2965 cookies are switched off")
+ return False
+ elif cookie.version == 0 and not self.netscape:
+ _debug(" Netscape cookies are switched off")
+ return False
+ return True
+
+ def return_ok_verifiability(self, cookie, request):
+ if request.unverifiable and is_third_party(request):
+ if cookie.version > 0 and self.strict_rfc2965_unverifiable:
+ _debug(" third-party RFC 2965 cookie during unverifiable "
+ "transaction")
+ return False
+ elif cookie.version == 0 and self.strict_ns_unverifiable:
+ _debug(" third-party Netscape cookie during unverifiable "
+ "transaction")
+ return False
+ return True
+
+ def return_ok_secure(self, cookie, request):
+ if cookie.secure and request.type != "https":
+ _debug(" secure cookie with non-secure request")
+ return False
+ return True
+
+ def return_ok_expires(self, cookie, request):
+ if cookie.is_expired(self._now):
+ _debug(" cookie expired")
+ return False
+ return True
+
+ def return_ok_port(self, cookie, request):
+ if cookie.port:
+ req_port = request_port(request)
+ if req_port is None:
+ req_port = "80"
+ for p in cookie.port.split(","):
+ if p == req_port:
+ break
+ else:
+ _debug(" request port %s does not match cookie port %s",
+ req_port, cookie.port)
+ return False
+ return True
+
+ def return_ok_domain(self, cookie, request):
+ req_host, erhn = eff_request_host(request)
+ domain = cookie.domain
+
+ # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
+ if (cookie.version == 0 and
+ (self.strict_ns_domain & self.DomainStrictNonDomain) and
+ not cookie.domain_specified and domain != erhn):
+ _debug(" cookie with unspecified domain does not string-compare "
+ "equal to request domain")
+ return False
+
+ if cookie.version > 0 and not domain_match(erhn, domain):
+ _debug(" effective request-host name %s does not domain-match "
+ "RFC 2965 cookie domain %s", erhn, domain)
+ return False
+ if cookie.version == 0 and not ("."+erhn).endswith(domain):
+ _debug(" request-host %s does not match Netscape cookie domain "
+ "%s", req_host, domain)
+ return False
+ return True
+
+ def domain_return_ok(self, domain, request):
+ # Liberal check of. This is here as an optimization to avoid
+ # having to load lots of MSIE cookie files unless necessary.
+ req_host, erhn = eff_request_host(request)
+ if not req_host.startswith("."):
+ req_host = "."+req_host
+ if not erhn.startswith("."):
+ erhn = "."+erhn
+ if not (req_host.endswith(domain) or erhn.endswith(domain)):
+ #_debug(" request domain %s does not match cookie domain %s",
+ # req_host, domain)
+ return False
+
+ if self.is_blocked(domain):
+ _debug(" domain %s is in user block-list", domain)
+ return False
+ if self.is_not_allowed(domain):
+ _debug(" domain %s is not in user allow-list", domain)
+ return False
+
+ return True
+
+ def path_return_ok(self, path, request):
+ _debug("- checking cookie path=%s", path)
+ req_path = request_path(request)
+ if not req_path.startswith(path):
+ _debug(" %s does not path-match %s", req_path, path)
+ return False
+ return True
+
+
+def vals_sorted_by_key(adict):
+ keys = sorted(adict.keys())
+ return map(adict.get, keys)
+
+def deepvalues(mapping):
+ """Iterates over nested mapping, depth-first, in sorted order by key."""
+ values = vals_sorted_by_key(mapping)
+ for obj in values:
+ mapping = False
+ try:
+ obj.items
+ except AttributeError:
+ pass
+ else:
+ mapping = True
+ for subobj in deepvalues(obj):
+ yield subobj
+ if not mapping:
+ yield obj
+
+
+# Used as second parameter to dict.get() method, to distinguish absent
+# dict key from one with a None value.
+class Absent(object): pass
+
+class CookieJar(object):
+ """Collection of HTTP cookies.
+
+ You may not need to know about this class: try
+ urllib.request.build_opener(HTTPCookieProcessor).open(url).
+ """
+
+ non_word_re = re.compile(r"\W")
+ quote_re = re.compile(r"([\"\\])")
+ strict_domain_re = re.compile(r"\.?[^.]*")
+ domain_re = re.compile(r"[^.]*")
+ dots_re = re.compile(r"^\.+")
+
+ magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
+
+ def __init__(self, policy=None):
+ if policy is None:
+ policy = DefaultCookiePolicy()
+ self._policy = policy
+
+ self._cookies_lock = _threading.RLock()
+ self._cookies = {}
+
+ def set_policy(self, policy):
+ self._policy = policy
+
+ def _cookies_for_domain(self, domain, request):
+ cookies = []
+ if not self._policy.domain_return_ok(domain, request):
+ return []
+ _debug("Checking %s for cookies to return", domain)
+ cookies_by_path = self._cookies[domain]
+ for path in cookies_by_path.keys():
+ if not self._policy.path_return_ok(path, request):
+ continue
+ cookies_by_name = cookies_by_path[path]
+ for cookie in cookies_by_name.values():
+ if not self._policy.return_ok(cookie, request):
+ _debug(" not returning cookie")
+ continue
+ _debug(" it's a match")
+ cookies.append(cookie)
+ return cookies
+
+ def _cookies_for_request(self, request):
+ """Return a list of cookies to be returned to server."""
+ cookies = []
+ for domain in self._cookies.keys():
+ cookies.extend(self._cookies_for_domain(domain, request))
+ return cookies
+
+ def _cookie_attrs(self, cookies):
+ """Return a list of cookie-attributes to be returned to server.
+
+ like ['foo="bar"; $Path="/"', ...]
+
+ The $Version attribute is also added when appropriate (currently only
+ once per request).
+
+ """
+ # add cookies in order of most specific (ie. longest) path first
+ cookies.sort(key=lambda a: len(a.path), reverse=True)
+
+ version_set = False
+
+ attrs = []
+ for cookie in cookies:
+ # set version of Cookie header
+ # XXX
+ # What should it be if multiple matching Set-Cookie headers have
+ # different versions themselves?
+ # Answer: there is no answer; was supposed to be settled by
+ # RFC 2965 errata, but that may never appear...
+ version = cookie.version
+ if not version_set:
+ version_set = True
+ if version > 0:
+ attrs.append("$Version=%s" % version)
+
+ # quote cookie value if necessary
+ # (not for Netscape protocol, which already has any quotes
+ # intact, due to the poorly-specified Netscape Cookie: syntax)
+ if ((cookie.value is not None) and
+ self.non_word_re.search(cookie.value) and version > 0):
+ value = self.quote_re.sub(r"\\\1", cookie.value)
+ else:
+ value = cookie.value
+
+ # add cookie-attributes to be returned in Cookie header
+ if cookie.value is None:
+ attrs.append(cookie.name)
+ else:
+ attrs.append("%s=%s" % (cookie.name, value))
+ if version > 0:
+ if cookie.path_specified:
+ attrs.append('$Path="%s"' % cookie.path)
+ if cookie.domain.startswith("."):
+ domain = cookie.domain
+ if (not cookie.domain_initial_dot and
+ domain.startswith(".")):
+ domain = domain[1:]
+ attrs.append('$Domain="%s"' % domain)
+ if cookie.port is not None:
+ p = "$Port"
+ if cookie.port_specified:
+ p = p + ('="%s"' % cookie.port)
+ attrs.append(p)
+
+ return attrs
+
+ def add_cookie_header(self, request):
+ """Add correct Cookie: header to request (urllib.request.Request object).
+
+ The Cookie2 header is also added unless policy.hide_cookie2 is true.
+
+ """
+ _debug("add_cookie_header")
+ self._cookies_lock.acquire()
+ try:
+
+ self._policy._now = self._now = int(time.time())
+
+ cookies = self._cookies_for_request(request)
+
+ attrs = self._cookie_attrs(cookies)
+ if attrs:
+ if not request.has_header("Cookie"):
+ request.add_unredirected_header(
+ "Cookie", "; ".join(attrs))
+
+ # if necessary, advertise that we know RFC 2965
+ if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
+ not request.has_header("Cookie2")):
+ for cookie in cookies:
+ if cookie.version != 1:
+ request.add_unredirected_header("Cookie2", '$Version="1"')
+ break
+
+ finally:
+ self._cookies_lock.release()
+
+ self.clear_expired_cookies()
+
+ def _normalized_cookie_tuples(self, attrs_set):
+ """Return list of tuples containing normalised cookie information.
+
+ attrs_set is the list of lists of key,value pairs extracted from
+ the Set-Cookie or Set-Cookie2 headers.
+
+ Tuples are name, value, standard, rest, where name and value are the
+ cookie name and value, standard is a dictionary containing the standard
+ cookie-attributes (discard, secure, version, expires or max-age,
+ domain, path and port) and rest is a dictionary containing the rest of
+ the cookie-attributes.
+
+ """
+ cookie_tuples = []
+
+ boolean_attrs = "discard", "secure"
+ value_attrs = ("version",
+ "expires", "max-age",
+ "domain", "path", "port",
+ "comment", "commenturl")
+
+ for cookie_attrs in attrs_set:
+ name, value = cookie_attrs[0]
+
+ # Build dictionary of standard cookie-attributes (standard) and
+ # dictionary of other cookie-attributes (rest).
+
+ # Note: expiry time is normalised to seconds since epoch. V0
+ # cookies should have the Expires cookie-attribute, and V1 cookies
+ # should have Max-Age, but since V1 includes RFC 2109 cookies (and
+ # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
+ # accept either (but prefer Max-Age).
+ max_age_set = False
+
+ bad_cookie = False
+
+ standard = {}
+ rest = {}
+ for k, v in cookie_attrs[1:]:
+ lc = k.lower()
+ # don't lose case distinction for unknown fields
+ if lc in value_attrs or lc in boolean_attrs:
+ k = lc
+ if k in boolean_attrs and v is None:
+ # boolean cookie-attribute is present, but has no value
+ # (like "discard", rather than "port=80")
+ v = True
+ if k in standard:
+ # only first value is significant
+ continue
+ if k == "domain":
+ if v is None:
+ _debug(" missing value for domain attribute")
+ bad_cookie = True
+ break
+ # RFC 2965 section 3.3.3
+ v = v.lower()
+ if k == "expires":
+ if max_age_set:
+ # Prefer max-age to expires (like Mozilla)
+ continue
+ if v is None:
+ _debug(" missing or invalid value for expires "
+ "attribute: treating as session cookie")
+ continue
+ if k == "max-age":
+ max_age_set = True
+ try:
+ v = int(v)
+ except ValueError:
+ _debug(" missing or invalid (non-numeric) value for "
+ "max-age attribute")
+ bad_cookie = True
+ break
+ # convert RFC 2965 Max-Age to seconds since epoch
+ # XXX Strictly you're supposed to follow RFC 2616
+ # age-calculation rules. Remember that zero Max-Age is a
+ # is a request to discard (old and new) cookie, though.
+ k = "expires"
+ v = self._now + v
+ if (k in value_attrs) or (k in boolean_attrs):
+ if (v is None and
+ k not in ("port", "comment", "commenturl")):
+ _debug(" missing value for %s attribute" % k)
+ bad_cookie = True
+ break
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ if bad_cookie:
+ continue
+
+ cookie_tuples.append((name, value, standard, rest))
+
+ return cookie_tuples
+
+ def _cookie_from_cookie_tuple(self, tup, request):
+ # standard is dict of standard cookie-attributes, rest is dict of the
+ # rest of them
+ name, value, standard, rest = tup
+
+ domain = standard.get("domain", Absent)
+ path = standard.get("path", Absent)
+ port = standard.get("port", Absent)
+ expires = standard.get("expires", Absent)
+
+ # set the easy defaults
+ version = standard.get("version", None)
+ if version is not None:
+ try:
+ version = int(version)
+ except ValueError:
+ return None # invalid version, ignore cookie
+ secure = standard.get("secure", False)
+ # (discard is also set if expires is Absent)
+ discard = standard.get("discard", False)
+ comment = standard.get("comment", None)
+ comment_url = standard.get("commenturl", None)
+
+ # set default path
+ if path is not Absent and path != "":
+ path_specified = True
+ path = escape_path(path)
+ else:
+ path_specified = False
+ path = request_path(request)
+ i = path.rfind("/")
+ if i != -1:
+ if version == 0:
+ # Netscape spec parts company from reality here
+ path = path[:i]
+ else:
+ path = path[:i+1]
+ if len(path) == 0: path = "/"
+
+ # set default domain
+ domain_specified = domain is not Absent
+ # but first we have to remember whether it starts with a dot
+ domain_initial_dot = False
+ if domain_specified:
+ domain_initial_dot = bool(domain.startswith("."))
+ if domain is Absent:
+ req_host, erhn = eff_request_host(request)
+ domain = erhn
+ elif not domain.startswith("."):
+ domain = "."+domain
+
+ # set default port
+ port_specified = False
+ if port is not Absent:
+ if port is None:
+ # Port attr present, but has no value: default to request port.
+ # Cookie should then only be sent back on that port.
+ port = request_port(request)
+ else:
+ port_specified = True
+ port = re.sub(r"\s+", "", port)
+ else:
+ # No port attr present. Cookie can be sent back on any port.
+ port = None
+
+ # set default expires and discard
+ if expires is Absent:
+ expires = None
+ discard = True
+ elif expires <= self._now:
+ # Expiry date in past is request to delete cookie. This can't be
+ # in DefaultCookiePolicy, because can't delete cookies there.
+ try:
+ self.clear(domain, path, name)
+ except KeyError:
+ pass
+ _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
+ domain, path, name)
+ return None
+
+ return Cookie(version,
+ name, value,
+ port, port_specified,
+ domain, domain_specified, domain_initial_dot,
+ path, path_specified,
+ secure,
+ expires,
+ discard,
+ comment,
+ comment_url,
+ rest)
+
+ def _cookies_from_attrs_set(self, attrs_set, request):
+ cookie_tuples = self._normalized_cookie_tuples(attrs_set)
+
+ cookies = []
+ for tup in cookie_tuples:
+ cookie = self._cookie_from_cookie_tuple(tup, request)
+ if cookie: cookies.append(cookie)
+ return cookies
+
+ def _process_rfc2109_cookies(self, cookies):
+ rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
+ if rfc2109_as_ns is None:
+ rfc2109_as_ns = not self._policy.rfc2965
+ for cookie in cookies:
+ if cookie.version == 1:
+ cookie.rfc2109 = True
+ if rfc2109_as_ns:
+ # treat 2109 cookies as Netscape cookies rather than
+ # as RFC2965 cookies
+ cookie.version = 0
+
+ def make_cookies(self, response, request):
+ """Return sequence of Cookie objects extracted from response object."""
+ # get cookie-attributes for RFC 2965 and Netscape protocols
+ headers = response.info()
+ rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
+ ns_hdrs = headers.get_all("Set-Cookie", [])
+
+ rfc2965 = self._policy.rfc2965
+ netscape = self._policy.netscape
+
+ if ((not rfc2965_hdrs and not ns_hdrs) or
+ (not ns_hdrs and not rfc2965) or
+ (not rfc2965_hdrs and not netscape) or
+ (not netscape and not rfc2965)):
+ return [] # no relevant cookie headers: quick exit
+
+ try:
+ cookies = self._cookies_from_attrs_set(
+ split_header_words(rfc2965_hdrs), request)
+ except Exception:
+ _warn_unhandled_exception()
+ cookies = []
+
+ if ns_hdrs and netscape:
+ try:
+ # RFC 2109 and Netscape cookies
+ ns_cookies = self._cookies_from_attrs_set(
+ parse_ns_headers(ns_hdrs), request)
+ except Exception:
+ _warn_unhandled_exception()
+ ns_cookies = []
+ self._process_rfc2109_cookies(ns_cookies)
+
+ # Look for Netscape cookies (from Set-Cookie headers) that match
+ # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
+ # For each match, keep the RFC 2965 cookie and ignore the Netscape
+ # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
+ # bundled in with the Netscape cookies for this purpose, which is
+ # reasonable behaviour.
+ if rfc2965:
+ lookup = {}
+ for cookie in cookies:
+ lookup[(cookie.domain, cookie.path, cookie.name)] = None
+
+ def no_matching_rfc2965(ns_cookie, lookup=lookup):
+ key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
+ return key not in lookup
+ ns_cookies = filter(no_matching_rfc2965, ns_cookies)
+
+ if ns_cookies:
+ cookies.extend(ns_cookies)
+
+ return cookies
+
+ def set_cookie_if_ok(self, cookie, request):
+ """Set a cookie if policy says it's OK to do so."""
+ self._cookies_lock.acquire()
+ try:
+ self._policy._now = self._now = int(time.time())
+
+ if self._policy.set_ok(cookie, request):
+ self.set_cookie(cookie)
+
+
+ finally:
+ self._cookies_lock.release()
+
+ def set_cookie(self, cookie):
+ """Set a cookie, without checking whether or not it should be set."""
+ c = self._cookies
+ self._cookies_lock.acquire()
+ try:
+ if cookie.domain not in c: c[cookie.domain] = {}
+ c2 = c[cookie.domain]
+ if cookie.path not in c2: c2[cookie.path] = {}
+ c3 = c2[cookie.path]
+ c3[cookie.name] = cookie
+ finally:
+ self._cookies_lock.release()
+
+ def extract_cookies(self, response, request):
+ """Extract cookies from response, where allowable given the request."""
+ _debug("extract_cookies: %s", response.info())
+ self._cookies_lock.acquire()
+ try:
+ self._policy._now = self._now = int(time.time())
+
+ for cookie in self.make_cookies(response, request):
+ if self._policy.set_ok(cookie, request):
+ _debug(" setting cookie: %s", cookie)
+ self.set_cookie(cookie)
+ finally:
+ self._cookies_lock.release()
+
+ def clear(self, domain=None, path=None, name=None):
+ """Clear some cookies.
+
+ Invoking this method without arguments will clear all cookies. If
+ given a single argument, only cookies belonging to that domain will be
+ removed. If given two arguments, cookies belonging to the specified
+ path within that domain are removed. If given three arguments, then
+ the cookie with the specified name, path and domain is removed.
+
+ Raises KeyError if no matching cookie exists.
+
+ """
+ if name is not None:
+ if (domain is None) or (path is None):
+ raise ValueError(
+ "domain and path must be given to remove a cookie by name")
+ del self._cookies[domain][path][name]
+ elif path is not None:
+ if domain is None:
+ raise ValueError(
+ "domain must be given to remove cookies by path")
+ del self._cookies[domain][path]
+ elif domain is not None:
+ del self._cookies[domain]
+ else:
+ self._cookies = {}
+
+ def clear_session_cookies(self):
+ """Discard all session cookies.
+
+ Note that the .save() method won't save session cookies anyway, unless
+ you ask otherwise by passing a true ignore_discard argument.
+
+ """
+ self._cookies_lock.acquire()
+ try:
+ for cookie in self:
+ if cookie.discard:
+ self.clear(cookie.domain, cookie.path, cookie.name)
+ finally:
+ self._cookies_lock.release()
+
+ def clear_expired_cookies(self):
+ """Discard all expired cookies.
+
+ You probably don't need to call this method: expired cookies are never
+ sent back to the server (provided you're using DefaultCookiePolicy),
+ this method is called by CookieJar itself every so often, and the
+ .save() method won't save expired cookies anyway (unless you ask
+ otherwise by passing a true ignore_expires argument).
+
+ """
+ self._cookies_lock.acquire()
+ try:
+ now = time.time()
+ for cookie in self:
+ if cookie.is_expired(now):
+ self.clear(cookie.domain, cookie.path, cookie.name)
+ finally:
+ self._cookies_lock.release()
+
+ def __iter__(self):
+ return deepvalues(self._cookies)
+
+ def __len__(self):
+ """Return number of contained cookies."""
+ i = 0
+ for cookie in self: i = i + 1
+ return i
+
+ @as_native_str()
+ def __repr__(self):
+ r = []
+ for cookie in self: r.append(repr(cookie))
+ return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+ def __str__(self):
+ r = []
+ for cookie in self: r.append(str(cookie))
+ return "<%s[%s]>" % (self.__class__, ", ".join(r))
+
+
+# derives from IOError for backwards-compatibility with Python 2.4.0
+class LoadError(IOError): pass
+
+class FileCookieJar(CookieJar):
+ """CookieJar that can be loaded from and saved to a file."""
+
+ def __init__(self, filename=None, delayload=False, policy=None):
+ """
+ Cookies are NOT loaded from the named file until either the .load() or
+ .revert() method is called.
+
+ """
+ CookieJar.__init__(self, policy)
+ if filename is not None:
+ try:
+ filename+""
+ except:
+ raise ValueError("filename must be string-like")
+ self.filename = filename
+ self.delayload = bool(delayload)
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Save cookies to a file."""
+ raise NotImplementedError()
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename)
+ try:
+ self._really_load(f, filename, ignore_discard, ignore_expires)
+ finally:
+ f.close()
+
+ def revert(self, filename=None,
+ ignore_discard=False, ignore_expires=False):
+ """Clear all cookies and reload cookies from a saved file.
+
+ Raises LoadError (or IOError) if reversion is not successful; the
+ object's state will not be altered if this happens.
+
+ """
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ self._cookies_lock.acquire()
+ try:
+
+ old_state = copy.deepcopy(self._cookies)
+ self._cookies = {}
+ try:
+ self.load(filename, ignore_discard, ignore_expires)
+ except (LoadError, IOError):
+ self._cookies = old_state
+ raise
+
+ finally:
+ self._cookies_lock.release()
+
+
+def lwp_cookie_str(cookie):
+ """Return string representation of Cookie in an the LWP cookie file format.
+
+ Actually, the format is extended a bit -- see module docstring.
+
+ """
+ h = [(cookie.name, cookie.value),
+ ("path", cookie.path),
+ ("domain", cookie.domain)]
+ if cookie.port is not None: h.append(("port", cookie.port))
+ if cookie.path_specified: h.append(("path_spec", None))
+ if cookie.port_specified: h.append(("port_spec", None))
+ if cookie.domain_initial_dot: h.append(("domain_dot", None))
+ if cookie.secure: h.append(("secure", None))
+ if cookie.expires: h.append(("expires",
+ time2isoz(float(cookie.expires))))
+ if cookie.discard: h.append(("discard", None))
+ if cookie.comment: h.append(("comment", cookie.comment))
+ if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
+
+ keys = sorted(cookie._rest.keys())
+ for k in keys:
+ h.append((k, str(cookie._rest[k])))
+
+ h.append(("version", str(cookie.version)))
+
+ return join_header_words([h])
+
+class LWPCookieJar(FileCookieJar):
+ """
+ The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
+ "Set-Cookie3" is the format used by the libwww-perl libary, not known
+ to be compatible with any browser, but which is easy to read and
+ doesn't lose information about RFC 2965 cookies.
+
+ Additional methods
+
+ as_lwp_str(ignore_discard=True, ignore_expired=True)
+
+ """
+
+ def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
+ """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
+
+ ignore_discard and ignore_expires: see docstring for FileCookieJar.save
+
+ """
+ now = time.time()
+ r = []
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
+ return "\n".join(r+[""])
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ # There really isn't an LWP Cookies 2.0 format, but this indicates
+ # that there is extra information in here (domain_dot and
+ # port_spec) while still being compatible with libwww-perl, I hope.
+ f.write("#LWP-Cookies-2.0\n")
+ f.write(self.as_lwp_str(ignore_discard, ignore_expires))
+ finally:
+ f.close()
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ magic = f.readline()
+ if not self.magic_re.search(magic):
+ msg = ("%r does not look like a Set-Cookie3 (LWP) format "
+ "file" % filename)
+ raise LoadError(msg)
+
+ now = time.time()
+
+ header = "Set-Cookie3:"
+ boolean_attrs = ("port_spec", "path_spec", "domain_dot",
+ "secure", "discard")
+ value_attrs = ("version",
+ "port", "path", "domain",
+ "expires",
+ "comment", "commenturl")
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+ if not line.startswith(header):
+ continue
+ line = line[len(header):].strip()
+
+ for data in split_header_words([line]):
+ name, value = data[0]
+ standard = {}
+ rest = {}
+ for k in boolean_attrs:
+ standard[k] = False
+ for k, v in data[1:]:
+ if k is not None:
+ lc = k.lower()
+ else:
+ lc = None
+ # don't lose case distinction for unknown fields
+ if (lc in value_attrs) or (lc in boolean_attrs):
+ k = lc
+ if k in boolean_attrs:
+ if v is None: v = True
+ standard[k] = v
+ elif k in value_attrs:
+ standard[k] = v
+ else:
+ rest[k] = v
+
+ h = standard.get
+ expires = h("expires")
+ discard = h("discard")
+ if expires is not None:
+ expires = iso2time(expires)
+ if expires is None:
+ discard = True
+ domain = h("domain")
+ domain_specified = domain.startswith(".")
+ c = Cookie(h("version"), name, value,
+ h("port"), h("port_spec"),
+ domain, domain_specified, h("domain_dot"),
+ h("path"), h("path_spec"),
+ h("secure"),
+ expires,
+ discard,
+ h("comment"),
+ h("commenturl"),
+ rest)
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Set-Cookie3 format file %r: %r" %
+ (filename, line))
+
+
+class MozillaCookieJar(FileCookieJar):
+ """
+
+ WARNING: you may want to backup your browser's cookies file if you use
+ this class to save cookies. I *think* it works, but there have been
+ bugs in the past!
+
+ This class differs from CookieJar only in the format it uses to save and
+ load cookies to and from a file. This class uses the Mozilla/Netscape
+ `cookies.txt' format. lynx uses this file format, too.
+
+ Don't expect cookies saved while the browser is running to be noticed by
+ the browser (in fact, Mozilla on unix will overwrite your saved cookies if
+ you change them on disk while it's running; on Windows, you probably can't
+ save at all while the browser is running).
+
+ Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
+ Netscape cookies on saving.
+
+ In particular, the cookie version and port number information is lost,
+ together with information about whether or not Path, Port and Discard were
+ specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
+ domain as set in the HTTP header started with a dot (yes, I'm aware some
+ domains in Netscape files start with a dot and some don't -- trust me, you
+ really don't want to know any more about this).
+
+ Note that though Mozilla and Netscape use the same format, they use
+ slightly different headers. The class saves cookies using the Netscape
+ header by default (Mozilla can cope with that).
+
+ """
+ magic_re = re.compile("#( Netscape)? HTTP Cookie File")
+ header = """\
+# Netscape HTTP Cookie File
+# http://www.netscape.com/newsref/std/cookie_spec.html
+# This is a generated file! Do not edit.
+
+"""
+
+ def _really_load(self, f, filename, ignore_discard, ignore_expires):
+ now = time.time()
+
+ magic = f.readline()
+ if not self.magic_re.search(magic):
+ f.close()
+ raise LoadError(
+ "%r does not look like a Netscape format cookies file" %
+ filename)
+
+ try:
+ while 1:
+ line = f.readline()
+ if line == "": break
+
+ # last field may be absent, so keep any trailing tab
+ if line.endswith("\n"): line = line[:-1]
+
+ # skip comments and blank lines XXX what is $ for?
+ if (line.strip().startswith(("#", "$")) or
+ line.strip() == ""):
+ continue
+
+ domain, domain_specified, path, secure, expires, name, value = \
+ line.split("\t")
+ secure = (secure == "TRUE")
+ domain_specified = (domain_specified == "TRUE")
+ if name == "":
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = value
+ value = None
+
+ initial_dot = domain.startswith(".")
+ assert domain_specified == initial_dot
+
+ discard = False
+ if expires == "":
+ expires = None
+ discard = True
+
+ # assume path_specified is false
+ c = Cookie(0, name, value,
+ None, False,
+ domain, domain_specified, initial_dot,
+ path, False,
+ secure,
+ expires,
+ discard,
+ None,
+ None,
+ {})
+ if not ignore_discard and c.discard:
+ continue
+ if not ignore_expires and c.is_expired(now):
+ continue
+ self.set_cookie(c)
+
+ except IOError:
+ raise
+ except Exception:
+ _warn_unhandled_exception()
+ raise LoadError("invalid Netscape format cookies file %r: %r" %
+ (filename, line))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ if filename is None:
+ if self.filename is not None: filename = self.filename
+ else: raise ValueError(MISSING_FILENAME_TEXT)
+
+ f = open(filename, "w")
+ try:
+ f.write(self.header)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure: secure = "TRUE"
+ else: secure = "FALSE"
+ if cookie.domain.startswith("."): initial_dot = "TRUE"
+ else: initial_dot = "FALSE"
+ if cookie.expires is not None:
+ expires = str(cookie.expires)
+ else:
+ expires = ""
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ""
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ "\t".join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value])+
+ "\n")
+ finally:
+ f.close()
diff --git a/contrib/python/future/future/backports/http/cookies.py b/contrib/python/future/future/backports/http/cookies.py
index ec4e4a150e..8bb61e22c4 100644
--- a/contrib/python/future/future/backports/http/cookies.py
+++ b/contrib/python/future/future/backports/http/cookies.py
@@ -1,598 +1,598 @@
-####
-# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
-#
-# All Rights Reserved
-#
-# Permission to use, copy, modify, and distribute this software
-# and its documentation for any purpose and without fee is hereby
-# granted, provided that the above copyright notice appear in all
-# copies and that both that copyright notice and this permission
-# notice appear in supporting documentation, and that the name of
-# Timothy O'Malley not be used in advertising or publicity
-# pertaining to distribution of the software without specific, written
-# prior permission.
-#
-# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
-# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
-# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
-# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-# PERFORMANCE OF THIS SOFTWARE.
-#
-####
-#
-# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
-# by Timothy O'Malley <timo@alum.mit.edu>
-#
-# Cookie.py is a Python module for the handling of HTTP
-# cookies as a Python dictionary. See RFC 2109 for more
-# information on cookies.
-#
-# The original idea to treat Cookies as a dictionary came from
-# Dave Mitchell (davem@magnet.com) in 1995, when he released the
-# first version of nscookie.py.
-#
-####
-
-r"""
-http.cookies module ported to python-future from Py3.3
-
-Here's a sample session to show how to use this module.
-At the moment, this is the only documentation.
-
-The Basics
-----------
-
-Importing is easy...
-
- >>> from http import cookies
-
-Most of the time you start by creating a cookie.
-
- >>> C = cookies.SimpleCookie()
-
-Once you've created your Cookie, you can add values just as if it were
-a dictionary.
-
- >>> C = cookies.SimpleCookie()
- >>> C["fig"] = "newton"
- >>> C["sugar"] = "wafer"
- >>> C.output()
- 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
-
-Notice that the printable representation of a Cookie is the
-appropriate format for a Set-Cookie: header. This is the
-default behavior. You can change the header and printed
-attributes by using the .output() function
-
- >>> C = cookies.SimpleCookie()
- >>> C["rocky"] = "road"
- >>> C["rocky"]["path"] = "/cookie"
- >>> print(C.output(header="Cookie:"))
- Cookie: rocky=road; Path=/cookie
- >>> print(C.output(attrs=[], header="Cookie:"))
- Cookie: rocky=road
-
-The load() method of a Cookie extracts cookies from a string. In a
-CGI script, you would use this method to extract the cookies from the
-HTTP_COOKIE environment variable.
-
- >>> C = cookies.SimpleCookie()
- >>> C.load("chips=ahoy; vienna=finger")
- >>> C.output()
- 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
-
-The load() method is darn-tootin smart about identifying cookies
-within a string. Escaped quotation marks, nested semicolons, and other
-such trickeries do not confuse it.
-
- >>> C = cookies.SimpleCookie()
- >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
- >>> print(C)
- Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
-
-Each element of the Cookie also supports all of the RFC 2109
-Cookie attributes. Here's an example which sets the Path
-attribute.
-
- >>> C = cookies.SimpleCookie()
- >>> C["oreo"] = "doublestuff"
- >>> C["oreo"]["path"] = "/"
- >>> print(C)
- Set-Cookie: oreo=doublestuff; Path=/
-
-Each dictionary element has a 'value' attribute, which gives you
-back the value associated with the key.
-
- >>> C = cookies.SimpleCookie()
- >>> C["twix"] = "none for you"
- >>> C["twix"].value
- 'none for you'
-
-The SimpleCookie expects that all values should be standard strings.
-Just to be sure, SimpleCookie invokes the str() builtin to convert
-the value to a string, when the values are set dictionary-style.
-
- >>> C = cookies.SimpleCookie()
- >>> C["number"] = 7
- >>> C["string"] = "seven"
- >>> C["number"].value
- '7'
- >>> C["string"].value
- 'seven'
- >>> C.output()
- 'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
-
-Finis.
-"""
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import chr, dict, int, str
-from future.utils import PY2, as_native_str
-
-#
-# Import our required modules
-#
-import re
+####
+# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
+#
+# All Rights Reserved
+#
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for any purpose and without fee is hereby
+# granted, provided that the above copyright notice appear in all
+# copies and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Timothy O'Malley not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
+# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+#
+####
+#
+# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
+# by Timothy O'Malley <timo@alum.mit.edu>
+#
+# Cookie.py is a Python module for the handling of HTTP
+# cookies as a Python dictionary. See RFC 2109 for more
+# information on cookies.
+#
+# The original idea to treat Cookies as a dictionary came from
+# Dave Mitchell (davem@magnet.com) in 1995, when he released the
+# first version of nscookie.py.
+#
+####
+
+r"""
+http.cookies module ported to python-future from Py3.3
+
+Here's a sample session to show how to use this module.
+At the moment, this is the only documentation.
+
+The Basics
+----------
+
+Importing is easy...
+
+ >>> from http import cookies
+
+Most of the time you start by creating a cookie.
+
+ >>> C = cookies.SimpleCookie()
+
+Once you've created your Cookie, you can add values just as if it were
+a dictionary.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["fig"] = "newton"
+ >>> C["sugar"] = "wafer"
+ >>> C.output()
+ 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
+
+Notice that the printable representation of a Cookie is the
+appropriate format for a Set-Cookie: header. This is the
+default behavior. You can change the header and printed
+attributes by using the .output() function
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["rocky"] = "road"
+ >>> C["rocky"]["path"] = "/cookie"
+ >>> print(C.output(header="Cookie:"))
+ Cookie: rocky=road; Path=/cookie
+ >>> print(C.output(attrs=[], header="Cookie:"))
+ Cookie: rocky=road
+
+The load() method of a Cookie extracts cookies from a string. In a
+CGI script, you would use this method to extract the cookies from the
+HTTP_COOKIE environment variable.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C.load("chips=ahoy; vienna=finger")
+ >>> C.output()
+ 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
+
+The load() method is darn-tootin smart about identifying cookies
+within a string. Escaped quotation marks, nested semicolons, and other
+such trickeries do not confuse it.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
+ >>> print(C)
+ Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
+
+Each element of the Cookie also supports all of the RFC 2109
+Cookie attributes. Here's an example which sets the Path
+attribute.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["oreo"] = "doublestuff"
+ >>> C["oreo"]["path"] = "/"
+ >>> print(C)
+ Set-Cookie: oreo=doublestuff; Path=/
+
+Each dictionary element has a 'value' attribute, which gives you
+back the value associated with the key.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["twix"] = "none for you"
+ >>> C["twix"].value
+ 'none for you'
+
+The SimpleCookie expects that all values should be standard strings.
+Just to be sure, SimpleCookie invokes the str() builtin to convert
+the value to a string, when the values are set dictionary-style.
+
+ >>> C = cookies.SimpleCookie()
+ >>> C["number"] = 7
+ >>> C["string"] = "seven"
+ >>> C["number"].value
+ '7'
+ >>> C["string"].value
+ 'seven'
+ >>> C.output()
+ 'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
+
+Finis.
+"""
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import chr, dict, int, str
+from future.utils import PY2, as_native_str
+
+#
+# Import our required modules
+#
+import re
if PY2:
re.ASCII = 0 # for py2 compatibility
-import string
-
-__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
-
-_nulljoin = ''.join
-_semispacejoin = '; '.join
-_spacejoin = ' '.join
-
-#
-# Define an exception visible to External modules
-#
-class CookieError(Exception):
- pass
-
-
-# These quoting routines conform to the RFC2109 specification, which in
-# turn references the character definitions from RFC2068. They provide
-# a two-way quoting algorithm. Any non-text character is translated
-# into a 4 character sequence: a forward-slash followed by the
-# three-digit octal equivalent of the character. Any '\' or '"' is
-# quoted with a preceeding '\' slash.
-#
-# These are taken from RFC2068 and RFC2109.
-# _LegalChars is the list of chars which don't require "'s
-# _Translator hash-table for fast quoting
-#
-_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
-_Translator = {
- '\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
- '\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
- '\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
- '\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
- '\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
- '\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
- '\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
- '\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
- '\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
- '\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
- '\036' : '\\036', '\037' : '\\037',
-
- # Because of the way browsers really handle cookies (as opposed
- # to what the RFC says) we also encode , and ;
-
- ',' : '\\054', ';' : '\\073',
-
- '"' : '\\"', '\\' : '\\\\',
-
- '\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
- '\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
- '\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
- '\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
- '\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
- '\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
- '\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
- '\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
- '\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
- '\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
- '\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
- '\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
- '\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
- '\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
- '\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
- '\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
- '\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
- '\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
- '\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
- '\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
- '\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
- '\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
- '\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
- '\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
- '\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
- '\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
- '\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
- '\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
- '\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
- '\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
- '\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
- '\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
- '\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
- '\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
- '\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
- '\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
- '\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
- '\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
- '\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
- '\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
- '\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
- '\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
- '\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
- }
-
-def _quote(str, LegalChars=_LegalChars):
- r"""Quote a string for use in a cookie header.
-
- If the string does not need to be double-quoted, then just return the
- string. Otherwise, surround the string in doublequotes and quote
- (with a \) special characters.
- """
- if all(c in LegalChars for c in str):
- return str
- else:
- return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
-
-
-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
-_QuotePatt = re.compile(r"[\\].")
-
-def _unquote(mystr):
- # If there aren't any doublequotes,
- # then there can't be any special characters. See RFC 2109.
- if len(mystr) < 2:
- return mystr
- if mystr[0] != '"' or mystr[-1] != '"':
- return mystr
-
- # We have to assume that we must decode this string.
- # Down to work.
-
- # Remove the "s
- mystr = mystr[1:-1]
-
- # Check for special sequences. Examples:
- # \012 --> \n
- # \" --> "
- #
- i = 0
- n = len(mystr)
- res = []
- while 0 <= i < n:
- o_match = _OctalPatt.search(mystr, i)
- q_match = _QuotePatt.search(mystr, i)
- if not o_match and not q_match: # Neither matched
- res.append(mystr[i:])
- break
- # else:
- j = k = -1
- if o_match:
- j = o_match.start(0)
- if q_match:
- k = q_match.start(0)
- if q_match and (not o_match or k < j): # QuotePatt matched
- res.append(mystr[i:k])
- res.append(mystr[k+1])
- i = k + 2
- else: # OctalPatt matched
- res.append(mystr[i:j])
- res.append(chr(int(mystr[j+1:j+4], 8)))
- i = j + 4
- return _nulljoin(res)
-
-# The _getdate() routine is used to set the expiration time in the cookie's HTTP
-# header. By default, _getdate() returns the current time in the appropriate
-# "expires" format for a Set-Cookie header. The one optional argument is an
-# offset from now, in seconds. For example, an offset of -3600 means "one hour
-# ago". The offset may be a floating point number.
-#
-
-_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-
-_monthname = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-
-def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
- from time import gmtime, time
- now = time()
- year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
- return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
- (weekdayname[wd], day, monthname[month], year, hh, mm, ss)
-
-
-class Morsel(dict):
- """A class to hold ONE (key, value) pair.
-
- In a cookie, each such pair may have several attributes, so this class is
- used to keep the attributes associated with the appropriate key,value pair.
- This class also includes a coded_value attribute, which is used to hold
- the network representation of the value. This is most useful when Python
- objects are pickled for network transit.
- """
- # RFC 2109 lists these attributes as reserved:
- # path comment domain
- # max-age secure version
- #
- # For historical reasons, these attributes are also reserved:
- # expires
- #
- # This is an extension from Microsoft:
- # httponly
- #
- # This dictionary provides a mapping from the lowercase
- # variant on the left to the appropriate traditional
- # formatting on the right.
- _reserved = {
- "expires" : "expires",
- "path" : "Path",
- "comment" : "Comment",
- "domain" : "Domain",
- "max-age" : "Max-Age",
- "secure" : "secure",
- "httponly" : "httponly",
- "version" : "Version",
- }
-
- _flags = set(['secure', 'httponly'])
-
- def __init__(self):
- # Set defaults
- self.key = self.value = self.coded_value = None
-
- # Set default attributes
- for key in self._reserved:
- dict.__setitem__(self, key, "")
-
- def __setitem__(self, K, V):
- K = K.lower()
- if not K in self._reserved:
- raise CookieError("Invalid Attribute %s" % K)
- dict.__setitem__(self, K, V)
-
- def isReservedKey(self, K):
- return K.lower() in self._reserved
-
- def set(self, key, val, coded_val, LegalChars=_LegalChars):
- # First we verify that the key isn't a reserved word
- # Second we make sure it only contains legal characters
- if key.lower() in self._reserved:
- raise CookieError("Attempt to set a reserved key: %s" % key)
- if any(c not in LegalChars for c in key):
- raise CookieError("Illegal key value: %s" % key)
-
- # It's a good key, so save it.
- self.key = key
- self.value = val
- self.coded_value = coded_val
-
- def output(self, attrs=None, header="Set-Cookie:"):
- return "%s %s" % (header, self.OutputString(attrs))
-
- __str__ = output
-
- @as_native_str()
- def __repr__(self):
- if PY2 and isinstance(self.value, unicode):
- val = str(self.value) # make it a newstr to remove the u prefix
- else:
- val = self.value
- return '<%s: %s=%s>' % (self.__class__.__name__,
- str(self.key), repr(val))
-
- def js_output(self, attrs=None):
- # Print javascript
- return """
- <script type="text/javascript">
- <!-- begin hiding
- document.cookie = \"%s\";
- // end hiding -->
- </script>
- """ % (self.OutputString(attrs).replace('"', r'\"'))
-
- def OutputString(self, attrs=None):
- # Build up our result
- #
- result = []
- append = result.append
-
- # First, the key=value pair
- append("%s=%s" % (self.key, self.coded_value))
-
- # Now add any defined attributes
- if attrs is None:
- attrs = self._reserved
- items = sorted(self.items())
- for key, value in items:
- if value == "":
- continue
- if key not in attrs:
- continue
- if key == "expires" and isinstance(value, int):
- append("%s=%s" % (self._reserved[key], _getdate(value)))
- elif key == "max-age" and isinstance(value, int):
- append("%s=%d" % (self._reserved[key], value))
- elif key == "secure":
- append(str(self._reserved[key]))
- elif key == "httponly":
- append(str(self._reserved[key]))
- else:
- append("%s=%s" % (self._reserved[key], value))
-
- # Return the result
- return _semispacejoin(result)
-
-
-#
-# Pattern for finding cookie
-#
-# This used to be strict parsing based on the RFC2109 and RFC2068
-# specifications. I have since discovered that MSIE 3.0x doesn't
-# follow the character rules outlined in those specs. As a
-# result, the parsing rules here are less strict.
-#
-
-_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
-_CookiePattern = re.compile(r"""
- (?x) # This is a verbose pattern
- (?P<key> # Start of group 'key'
- """ + _LegalCharsPatt + r"""+? # Any word of at least one letter
- ) # End of group 'key'
- ( # Optional group: there may not be a value.
- \s*=\s* # Equal Sign
- (?P<val> # Start of group 'val'
- "(?:[^\\"]|\\.)*" # Any doublequoted string
- | # or
- \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
- | # or
- """ + _LegalCharsPatt + r"""* # Any word or empty string
- ) # End of group 'val'
- )? # End of optional value group
- \s* # Any number of spaces.
- (\s+|;|$) # Ending either at space, semicolon, or EOS.
- """, re.ASCII) # May be removed if safe.
-
-
-# At long last, here is the cookie class. Using this class is almost just like
-# using a dictionary. See this module's docstring for example usage.
-#
-class BaseCookie(dict):
- """A container class for a set of Morsels."""
-
- def value_decode(self, val):
- """real_value, coded_value = value_decode(STRING)
- Called prior to setting a cookie's value from the network
- representation. The VALUE is the value read from HTTP
- header.
- Override this function to modify the behavior of cookies.
- """
- return val, val
-
- def value_encode(self, val):
- """real_value, coded_value = value_encode(VALUE)
- Called prior to setting a cookie's value from the dictionary
- representation. The VALUE is the value being assigned.
- Override this function to modify the behavior of cookies.
- """
- strval = str(val)
- return strval, strval
-
- def __init__(self, input=None):
- if input:
- self.load(input)
-
- def __set(self, key, real_value, coded_value):
- """Private method for setting a cookie's value"""
- M = self.get(key, Morsel())
- M.set(key, real_value, coded_value)
- dict.__setitem__(self, key, M)
-
- def __setitem__(self, key, value):
- """Dictionary style assignment."""
- rval, cval = self.value_encode(value)
- self.__set(key, rval, cval)
-
- def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
- """Return a string suitable for HTTP."""
- result = []
- items = sorted(self.items())
- for key, value in items:
- result.append(value.output(attrs, header))
- return sep.join(result)
-
- __str__ = output
-
- @as_native_str()
- def __repr__(self):
- l = []
- items = sorted(self.items())
- for key, value in items:
- if PY2 and isinstance(value.value, unicode):
- val = str(value.value) # make it a newstr to remove the u prefix
- else:
- val = value.value
- l.append('%s=%s' % (str(key), repr(val)))
- return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
-
- def js_output(self, attrs=None):
- """Return a string suitable for JavaScript."""
- result = []
- items = sorted(self.items())
- for key, value in items:
- result.append(value.js_output(attrs))
- return _nulljoin(result)
-
- def load(self, rawdata):
- """Load cookies from a string (presumably HTTP_COOKIE) or
- from a dictionary. Loading cookies from a dictionary 'd'
- is equivalent to calling:
- map(Cookie.__setitem__, d.keys(), d.values())
- """
- if isinstance(rawdata, str):
- self.__parse_string(rawdata)
- else:
- # self.update() wouldn't call our custom __setitem__
- for key, value in rawdata.items():
- self[key] = value
- return
-
- def __parse_string(self, mystr, patt=_CookiePattern):
- i = 0 # Our starting point
- n = len(mystr) # Length of string
- M = None # current morsel
-
- while 0 <= i < n:
- # Start looking for a cookie
- match = patt.search(mystr, i)
- if not match:
- # No more cookies
- break
-
- key, value = match.group("key"), match.group("val")
-
- i = match.end(0)
-
- # Parse the key, value in case it's metainfo
- if key[0] == "$":
- # We ignore attributes which pertain to the cookie
- # mechanism as a whole. See RFC 2109.
- # (Does anyone care?)
- if M:
- M[key[1:]] = value
- elif key.lower() in Morsel._reserved:
- if M:
- if value is None:
- if key.lower() in Morsel._flags:
- M[key] = True
- else:
- M[key] = _unquote(value)
- elif value is not None:
- rval, cval = self.value_decode(value)
- self.__set(key, rval, cval)
- M = self[key]
-
-
-class SimpleCookie(BaseCookie):
- """
- SimpleCookie supports strings as cookie values. When setting
- the value using the dictionary assignment notation, SimpleCookie
- calls the builtin str() to convert the value to a string. Values
- received from HTTP are kept as strings.
- """
- def value_decode(self, val):
- return _unquote(val), val
-
- def value_encode(self, val):
- strval = str(val)
- return strval, _quote(strval)
+import string
+
+__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
+
+_nulljoin = ''.join
+_semispacejoin = '; '.join
+_spacejoin = ' '.join
+
+#
+# Define an exception visible to External modules
+#
+class CookieError(Exception):
+ pass
+
+
+# These quoting routines conform to the RFC2109 specification, which in
+# turn references the character definitions from RFC2068. They provide
+# a two-way quoting algorithm. Any non-text character is translated
+# into a 4 character sequence: a forward-slash followed by the
+# three-digit octal equivalent of the character. Any '\' or '"' is
+# quoted with a preceeding '\' slash.
+#
+# These are taken from RFC2068 and RFC2109.
+# _LegalChars is the list of chars which don't require "'s
+# _Translator hash-table for fast quoting
+#
+_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
+_Translator = {
+ '\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
+ '\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
+ '\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
+ '\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
+ '\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
+ '\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
+ '\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
+ '\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
+ '\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
+ '\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
+ '\036' : '\\036', '\037' : '\\037',
+
+ # Because of the way browsers really handle cookies (as opposed
+ # to what the RFC says) we also encode , and ;
+
+ ',' : '\\054', ';' : '\\073',
+
+ '"' : '\\"', '\\' : '\\\\',
+
+ '\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
+ '\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
+ '\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
+ '\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
+ '\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
+ '\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
+ '\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
+ '\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
+ '\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
+ '\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
+ '\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
+ '\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
+ '\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
+ '\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
+ '\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
+ '\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
+ '\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
+ '\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
+ '\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
+ '\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
+ '\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
+ '\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
+ '\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
+ '\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
+ '\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
+ '\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
+ '\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
+ '\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
+ '\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
+ '\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
+ '\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
+ '\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
+ '\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
+ '\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
+ '\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
+ '\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
+ '\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
+ '\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
+ '\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
+ '\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
+ '\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
+ '\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
+ '\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
+ }
+
+def _quote(str, LegalChars=_LegalChars):
+ r"""Quote a string for use in a cookie header.
+
+ If the string does not need to be double-quoted, then just return the
+ string. Otherwise, surround the string in doublequotes and quote
+ (with a \) special characters.
+ """
+ if all(c in LegalChars for c in str):
+ return str
+ else:
+ return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
+
+
+_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
+_QuotePatt = re.compile(r"[\\].")
+
+def _unquote(mystr):
+ # If there aren't any doublequotes,
+ # then there can't be any special characters. See RFC 2109.
+ if len(mystr) < 2:
+ return mystr
+ if mystr[0] != '"' or mystr[-1] != '"':
+ return mystr
+
+ # We have to assume that we must decode this string.
+ # Down to work.
+
+ # Remove the "s
+ mystr = mystr[1:-1]
+
+ # Check for special sequences. Examples:
+ # \012 --> \n
+ # \" --> "
+ #
+ i = 0
+ n = len(mystr)
+ res = []
+ while 0 <= i < n:
+ o_match = _OctalPatt.search(mystr, i)
+ q_match = _QuotePatt.search(mystr, i)
+ if not o_match and not q_match: # Neither matched
+ res.append(mystr[i:])
+ break
+ # else:
+ j = k = -1
+ if o_match:
+ j = o_match.start(0)
+ if q_match:
+ k = q_match.start(0)
+ if q_match and (not o_match or k < j): # QuotePatt matched
+ res.append(mystr[i:k])
+ res.append(mystr[k+1])
+ i = k + 2
+ else: # OctalPatt matched
+ res.append(mystr[i:j])
+ res.append(chr(int(mystr[j+1:j+4], 8)))
+ i = j + 4
+ return _nulljoin(res)
+
+# The _getdate() routine is used to set the expiration time in the cookie's HTTP
+# header. By default, _getdate() returns the current time in the appropriate
+# "expires" format for a Set-Cookie header. The one optional argument is an
+# offset from now, in seconds. For example, an offset of -3600 means "one hour
+# ago". The offset may be a floating point number.
+#
+
+_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+_monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
+ from time import gmtime, time
+ now = time()
+ year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
+ return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \
+ (weekdayname[wd], day, monthname[month], year, hh, mm, ss)
+
+
+class Morsel(dict):
+ """A class to hold ONE (key, value) pair.
+
+ In a cookie, each such pair may have several attributes, so this class is
+ used to keep the attributes associated with the appropriate key,value pair.
+ This class also includes a coded_value attribute, which is used to hold
+ the network representation of the value. This is most useful when Python
+ objects are pickled for network transit.
+ """
+ # RFC 2109 lists these attributes as reserved:
+ # path comment domain
+ # max-age secure version
+ #
+ # For historical reasons, these attributes are also reserved:
+ # expires
+ #
+ # This is an extension from Microsoft:
+ # httponly
+ #
+ # This dictionary provides a mapping from the lowercase
+ # variant on the left to the appropriate traditional
+ # formatting on the right.
+ _reserved = {
+ "expires" : "expires",
+ "path" : "Path",
+ "comment" : "Comment",
+ "domain" : "Domain",
+ "max-age" : "Max-Age",
+ "secure" : "secure",
+ "httponly" : "httponly",
+ "version" : "Version",
+ }
+
+ _flags = set(['secure', 'httponly'])
+
+ def __init__(self):
+ # Set defaults
+ self.key = self.value = self.coded_value = None
+
+ # Set default attributes
+ for key in self._reserved:
+ dict.__setitem__(self, key, "")
+
+ def __setitem__(self, K, V):
+ K = K.lower()
+ if not K in self._reserved:
+ raise CookieError("Invalid Attribute %s" % K)
+ dict.__setitem__(self, K, V)
+
+ def isReservedKey(self, K):
+ return K.lower() in self._reserved
+
+ def set(self, key, val, coded_val, LegalChars=_LegalChars):
+ # First we verify that the key isn't a reserved word
+ # Second we make sure it only contains legal characters
+ if key.lower() in self._reserved:
+ raise CookieError("Attempt to set a reserved key: %s" % key)
+ if any(c not in LegalChars for c in key):
+ raise CookieError("Illegal key value: %s" % key)
+
+ # It's a good key, so save it.
+ self.key = key
+ self.value = val
+ self.coded_value = coded_val
+
+ def output(self, attrs=None, header="Set-Cookie:"):
+ return "%s %s" % (header, self.OutputString(attrs))
+
+ __str__ = output
+
+ @as_native_str()
+ def __repr__(self):
+ if PY2 and isinstance(self.value, unicode):
+ val = str(self.value) # make it a newstr to remove the u prefix
+ else:
+ val = self.value
+ return '<%s: %s=%s>' % (self.__class__.__name__,
+ str(self.key), repr(val))
+
+ def js_output(self, attrs=None):
+ # Print javascript
+ return """
+ <script type="text/javascript">
+ <!-- begin hiding
+ document.cookie = \"%s\";
+ // end hiding -->
+ </script>
+ """ % (self.OutputString(attrs).replace('"', r'\"'))
+
+ def OutputString(self, attrs=None):
+ # Build up our result
+ #
+ result = []
+ append = result.append
+
+ # First, the key=value pair
+ append("%s=%s" % (self.key, self.coded_value))
+
+ # Now add any defined attributes
+ if attrs is None:
+ attrs = self._reserved
+ items = sorted(self.items())
+ for key, value in items:
+ if value == "":
+ continue
+ if key not in attrs:
+ continue
+ if key == "expires" and isinstance(value, int):
+ append("%s=%s" % (self._reserved[key], _getdate(value)))
+ elif key == "max-age" and isinstance(value, int):
+ append("%s=%d" % (self._reserved[key], value))
+ elif key == "secure":
+ append(str(self._reserved[key]))
+ elif key == "httponly":
+ append(str(self._reserved[key]))
+ else:
+ append("%s=%s" % (self._reserved[key], value))
+
+ # Return the result
+ return _semispacejoin(result)
+
+
+#
+# Pattern for finding cookie
+#
+# This used to be strict parsing based on the RFC2109 and RFC2068
+# specifications. I have since discovered that MSIE 3.0x doesn't
+# follow the character rules outlined in those specs. As a
+# result, the parsing rules here are less strict.
+#
+
+_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
+_CookiePattern = re.compile(r"""
+ (?x) # This is a verbose pattern
+ (?P<key> # Start of group 'key'
+ """ + _LegalCharsPatt + r"""+? # Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ (?P<val> # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ """ + _LegalCharsPatt + r"""* # Any word or empty string
+ ) # End of group 'val'
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII) # May be removed if safe.
+
+
+# At long last, here is the cookie class. Using this class is almost just like
+# using a dictionary. See this module's docstring for example usage.
+#
+class BaseCookie(dict):
+ """A container class for a set of Morsels."""
+
+ def value_decode(self, val):
+ """real_value, coded_value = value_decode(STRING)
+ Called prior to setting a cookie's value from the network
+ representation. The VALUE is the value read from HTTP
+ header.
+ Override this function to modify the behavior of cookies.
+ """
+ return val, val
+
+ def value_encode(self, val):
+ """real_value, coded_value = value_encode(VALUE)
+ Called prior to setting a cookie's value from the dictionary
+ representation. The VALUE is the value being assigned.
+ Override this function to modify the behavior of cookies.
+ """
+ strval = str(val)
+ return strval, strval
+
+ def __init__(self, input=None):
+ if input:
+ self.load(input)
+
+ def __set(self, key, real_value, coded_value):
+ """Private method for setting a cookie's value"""
+ M = self.get(key, Morsel())
+ M.set(key, real_value, coded_value)
+ dict.__setitem__(self, key, M)
+
+ def __setitem__(self, key, value):
+ """Dictionary style assignment."""
+ rval, cval = self.value_encode(value)
+ self.__set(key, rval, cval)
+
+ def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
+ """Return a string suitable for HTTP."""
+ result = []
+ items = sorted(self.items())
+ for key, value in items:
+ result.append(value.output(attrs, header))
+ return sep.join(result)
+
+ __str__ = output
+
+ @as_native_str()
+ def __repr__(self):
+ l = []
+ items = sorted(self.items())
+ for key, value in items:
+ if PY2 and isinstance(value.value, unicode):
+ val = str(value.value) # make it a newstr to remove the u prefix
+ else:
+ val = value.value
+ l.append('%s=%s' % (str(key), repr(val)))
+ return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l))
+
+ def js_output(self, attrs=None):
+ """Return a string suitable for JavaScript."""
+ result = []
+ items = sorted(self.items())
+ for key, value in items:
+ result.append(value.js_output(attrs))
+ return _nulljoin(result)
+
+ def load(self, rawdata):
+ """Load cookies from a string (presumably HTTP_COOKIE) or
+ from a dictionary. Loading cookies from a dictionary 'd'
+ is equivalent to calling:
+ map(Cookie.__setitem__, d.keys(), d.values())
+ """
+ if isinstance(rawdata, str):
+ self.__parse_string(rawdata)
+ else:
+ # self.update() wouldn't call our custom __setitem__
+ for key, value in rawdata.items():
+ self[key] = value
+ return
+
+ def __parse_string(self, mystr, patt=_CookiePattern):
+ i = 0 # Our starting point
+ n = len(mystr) # Length of string
+ M = None # current morsel
+
+ while 0 <= i < n:
+ # Start looking for a cookie
+ match = patt.search(mystr, i)
+ if not match:
+ # No more cookies
+ break
+
+ key, value = match.group("key"), match.group("val")
+
+ i = match.end(0)
+
+ # Parse the key, value in case it's metainfo
+ if key[0] == "$":
+ # We ignore attributes which pertain to the cookie
+ # mechanism as a whole. See RFC 2109.
+ # (Does anyone care?)
+ if M:
+ M[key[1:]] = value
+ elif key.lower() in Morsel._reserved:
+ if M:
+ if value is None:
+ if key.lower() in Morsel._flags:
+ M[key] = True
+ else:
+ M[key] = _unquote(value)
+ elif value is not None:
+ rval, cval = self.value_decode(value)
+ self.__set(key, rval, cval)
+ M = self[key]
+
+
+class SimpleCookie(BaseCookie):
+ """
+ SimpleCookie supports strings as cookie values. When setting
+ the value using the dictionary assignment notation, SimpleCookie
+ calls the builtin str() to convert the value to a string. Values
+ received from HTTP are kept as strings.
+ """
+ def value_decode(self, val):
+ return _unquote(val), val
+
+ def value_encode(self, val):
+ strval = str(val)
+ return strval, _quote(strval)
diff --git a/contrib/python/future/future/backports/http/server.py b/contrib/python/future/future/backports/http/server.py
index 082e276667..b1c11e0c73 100644
--- a/contrib/python/future/future/backports/http/server.py
+++ b/contrib/python/future/future/backports/http/server.py
@@ -1,1226 +1,1226 @@
-"""HTTP server classes.
-
-From Python 3.3
-
-Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
-SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
-and CGIHTTPRequestHandler for CGI scripts.
-
-It does, however, optionally implement HTTP/1.1 persistent connections,
-as of version 0.3.
-
-Notes on CGIHTTPRequestHandler
-------------------------------
-
-This class implements GET and POST requests to cgi-bin scripts.
-
-If the os.fork() function is not present (e.g. on Windows),
-subprocess.Popen() is used as a fallback, with slightly altered semantics.
-
-In all cases, the implementation is intentionally naive -- all
-requests are executed synchronously.
-
-SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
--- it may execute arbitrary Python code or external programs.
-
-Note that status code 200 is sent prior to execution of a CGI script, so
-scripts cannot send other status codes such as 302 (redirect).
-
-XXX To do:
-
-- log requests even later (to capture byte count)
-- log user-agent header and other interesting goodies
-- send error log to separate file
-"""
-
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-from future import utils
-from future.builtins import *
-
-
-# See also:
-#
-# HTTP Working Group T. Berners-Lee
-# INTERNET-DRAFT R. T. Fielding
-# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
-# Expires September 8, 1995 March 8, 1995
-#
-# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
-#
-# and
-#
-# Network Working Group R. Fielding
-# Request for Comments: 2616 et al
-# Obsoletes: 2068 June 1999
-# Category: Standards Track
-#
-# URL: http://www.faqs.org/rfcs/rfc2616.html
-
-# Log files
-# ---------
-#
-# Here's a quote from the NCSA httpd docs about log file format.
-#
-# | The logfile format is as follows. Each line consists of:
-# |
-# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
-# |
-# | host: Either the DNS name or the IP number of the remote client
-# | rfc931: Any information returned by identd for this person,
-# | - otherwise.
-# | authuser: If user sent a userid for authentication, the user name,
-# | - otherwise.
-# | DD: Day
-# | Mon: Month (calendar name)
-# | YYYY: Year
-# | hh: hour (24-hour format, the machine's timezone)
-# | mm: minutes
-# | ss: seconds
-# | request: The first line of the HTTP request as sent by the client.
-# | ddd: the status code returned by the server, - if not available.
-# | bbbb: the total number of bytes sent,
-# | *not including the HTTP/1.0 header*, - if not available
-# |
-# | You can determine the name of the file accessed through request.
-#
-# (Actually, the latter is only true if you know the server configuration
-# at the time the request was made!)
-
-__version__ = "0.6"
-
-__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
-
-from future.backports import html
-from future.backports.http import client as http_client
-from future.backports.urllib import parse as urllib_parse
-from future.backports import socketserver
-
-import io
-import mimetypes
-import os
-import posixpath
-import select
-import shutil
-import socket # For gethostbyaddr()
-import sys
-import time
-import copy
-import argparse
-
-
-# Default error message template
-DEFAULT_ERROR_MESSAGE = """\
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
- <head>
- <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
- <title>Error response</title>
- </head>
- <body>
- <h1>Error response</h1>
- <p>Error code: %(code)d</p>
- <p>Message: %(message)s.</p>
- <p>Error code explanation: %(code)s - %(explain)s.</p>
- </body>
-</html>
-"""
-
-DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
-
-def _quote_html(html):
- return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
-
-class HTTPServer(socketserver.TCPServer):
-
- allow_reuse_address = 1 # Seems to make sense in testing environment
-
- def server_bind(self):
- """Override server_bind to store the server name."""
- socketserver.TCPServer.server_bind(self)
- host, port = self.socket.getsockname()[:2]
- self.server_name = socket.getfqdn(host)
- self.server_port = port
-
-
-class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
-
- """HTTP request handler base class.
-
- The following explanation of HTTP serves to guide you through the
- code as well as to expose any misunderstandings I may have about
- HTTP (so you don't need to read the code to figure out I'm wrong
- :-).
-
- HTTP (HyperText Transfer Protocol) is an extensible protocol on
- top of a reliable stream transport (e.g. TCP/IP). The protocol
- recognizes three parts to a request:
-
- 1. One line identifying the request type and path
- 2. An optional set of RFC-822-style headers
- 3. An optional data part
-
- The headers and data are separated by a blank line.
-
- The first line of the request has the form
-
- <command> <path> <version>
-
- where <command> is a (case-sensitive) keyword such as GET or POST,
- <path> is a string containing path information for the request,
- and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
- <path> is encoded using the URL encoding scheme (using %xx to signify
- the ASCII character with hex code xx).
-
- The specification specifies that lines are separated by CRLF but
- for compatibility with the widest range of clients recommends
- servers also handle LF. Similarly, whitespace in the request line
- is treated sensibly (allowing multiple spaces between components
- and allowing trailing whitespace).
-
- Similarly, for output, lines ought to be separated by CRLF pairs
- but most clients grok LF characters just fine.
-
- If the first line of the request has the form
-
- <command> <path>
-
- (i.e. <version> is left out) then this is assumed to be an HTTP
- 0.9 request; this form has no optional headers and data part and
- the reply consists of just the data.
-
- The reply form of the HTTP 1.x protocol again has three parts:
-
- 1. One line giving the response code
- 2. An optional set of RFC-822-style headers
- 3. The data
-
- Again, the headers and data are separated by a blank line.
-
- The response code line has the form
-
- <version> <responsecode> <responsestring>
-
- where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
- <responsecode> is a 3-digit response code indicating success or
- failure of the request, and <responsestring> is an optional
- human-readable string explaining what the response code means.
-
- This server parses the request and the headers, and then calls a
- function specific to the request type (<command>). Specifically,
- a request SPAM will be handled by a method do_SPAM(). If no
- such method exists the server sends an error response to the
- client. If it exists, it is called with no arguments:
-
- do_SPAM()
-
- Note that the request name is case sensitive (i.e. SPAM and spam
- are different requests).
-
- The various request details are stored in instance variables:
-
- - client_address is the client IP address in the form (host,
- port);
-
- - command, path and version are the broken-down request line;
-
- - headers is an instance of email.message.Message (or a derived
- class) containing the header information;
-
- - rfile is a file object open for reading positioned at the
- start of the optional input data part;
-
- - wfile is a file object open for writing.
-
- IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
-
- The first thing to be written must be the response line. Then
- follow 0 or more header lines, then a blank line, and then the
- actual data (if any). The meaning of the header lines depends on
- the command executed by the server; in most cases, when data is
- returned, there should be at least one header line of the form
-
- Content-type: <type>/<subtype>
-
- where <type> and <subtype> should be registered MIME types,
- e.g. "text/html" or "text/plain".
-
- """
-
- # The Python system version, truncated to its first component.
- sys_version = "Python/" + sys.version.split()[0]
-
- # The server software version. You may want to override this.
- # The format is multiple whitespace-separated strings,
- # where each string is of the form name[/version].
- server_version = "BaseHTTP/" + __version__
-
- error_message_format = DEFAULT_ERROR_MESSAGE
- error_content_type = DEFAULT_ERROR_CONTENT_TYPE
-
- # The default request version. This only affects responses up until
- # the point where the request line is parsed, so it mainly decides what
- # the client gets back when sending a malformed request line.
- # Most web servers default to HTTP 0.9, i.e. don't send a status line.
- default_request_version = "HTTP/0.9"
-
- def parse_request(self):
- """Parse a request (internal).
-
- The request should be stored in self.raw_requestline; the results
- are in self.command, self.path, self.request_version and
- self.headers.
-
- Return True for success, False for failure; on failure, an
- error is sent back.
-
- """
- self.command = None # set in case of error on the first line
- self.request_version = version = self.default_request_version
- self.close_connection = 1
- requestline = str(self.raw_requestline, 'iso-8859-1')
- requestline = requestline.rstrip('\r\n')
- self.requestline = requestline
- words = requestline.split()
- if len(words) == 3:
- command, path, version = words
- if version[:5] != 'HTTP/':
- self.send_error(400, "Bad request version (%r)" % version)
- return False
- try:
- base_version_number = version.split('/', 1)[1]
- version_number = base_version_number.split(".")
- # RFC 2145 section 3.1 says there can be only one "." and
- # - major and minor numbers MUST be treated as
- # separate integers;
- # - HTTP/2.4 is a lower version than HTTP/2.13, which in
- # turn is lower than HTTP/12.3;
- # - Leading zeros MUST be ignored by recipients.
- if len(version_number) != 2:
- raise ValueError
- version_number = int(version_number[0]), int(version_number[1])
- except (ValueError, IndexError):
- self.send_error(400, "Bad request version (%r)" % version)
- return False
- if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
- self.close_connection = 0
- if version_number >= (2, 0):
- self.send_error(505,
- "Invalid HTTP Version (%s)" % base_version_number)
- return False
- elif len(words) == 2:
- command, path = words
- self.close_connection = 1
- if command != 'GET':
- self.send_error(400,
- "Bad HTTP/0.9 request type (%r)" % command)
- return False
- elif not words:
- return False
- else:
- self.send_error(400, "Bad request syntax (%r)" % requestline)
- return False
- self.command, self.path, self.request_version = command, path, version
-
- # Examine the headers and look for a Connection directive.
- try:
- self.headers = http_client.parse_headers(self.rfile,
- _class=self.MessageClass)
- except http_client.LineTooLong:
- self.send_error(400, "Line too long")
- return False
-
- conntype = self.headers.get('Connection', "")
- if conntype.lower() == 'close':
- self.close_connection = 1
- elif (conntype.lower() == 'keep-alive' and
- self.protocol_version >= "HTTP/1.1"):
- self.close_connection = 0
- # Examine the headers and look for an Expect directive
- expect = self.headers.get('Expect', "")
- if (expect.lower() == "100-continue" and
- self.protocol_version >= "HTTP/1.1" and
- self.request_version >= "HTTP/1.1"):
- if not self.handle_expect_100():
- return False
- return True
-
- def handle_expect_100(self):
- """Decide what to do with an "Expect: 100-continue" header.
-
- If the client is expecting a 100 Continue response, we must
- respond with either a 100 Continue or a final response before
- waiting for the request body. The default is to always respond
- with a 100 Continue. You can behave differently (for example,
- reject unauthorized requests) by overriding this method.
-
- This method should either return True (possibly after sending
- a 100 Continue response) or send an error response and return
- False.
-
- """
- self.send_response_only(100)
- self.flush_headers()
- return True
-
- def handle_one_request(self):
- """Handle a single HTTP request.
-
- You normally don't need to override this method; see the class
- __doc__ string for information on how to handle specific HTTP
- commands such as GET and POST.
-
- """
- try:
- self.raw_requestline = self.rfile.readline(65537)
- if len(self.raw_requestline) > 65536:
- self.requestline = ''
- self.request_version = ''
- self.command = ''
- self.send_error(414)
- return
- if not self.raw_requestline:
- self.close_connection = 1
- return
- if not self.parse_request():
- # An error code has been sent, just exit
- return
- mname = 'do_' + self.command
- if not hasattr(self, mname):
- self.send_error(501, "Unsupported method (%r)" % self.command)
- return
- method = getattr(self, mname)
- method()
- self.wfile.flush() #actually send the response if not already done.
- except socket.timeout as e:
- #a read or a write timed out. Discard this connection
- self.log_error("Request timed out: %r", e)
- self.close_connection = 1
- return
-
- def handle(self):
- """Handle multiple requests if necessary."""
- self.close_connection = 1
-
- self.handle_one_request()
- while not self.close_connection:
- self.handle_one_request()
-
- def send_error(self, code, message=None):
- """Send and log an error reply.
-
- Arguments are the error code, and a detailed message.
- The detailed message defaults to the short entry matching the
- response code.
-
- This sends an error response (so it must be called before any
- output has been generated), logs the error, and finally sends
- a piece of HTML explaining the error to the user.
-
- """
-
- try:
- shortmsg, longmsg = self.responses[code]
- except KeyError:
- shortmsg, longmsg = '???', '???'
- if message is None:
- message = shortmsg
- explain = longmsg
- self.log_error("code %d, message %s", code, message)
- # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
- content = (self.error_message_format %
- {'code': code, 'message': _quote_html(message), 'explain': explain})
- self.send_response(code, message)
- self.send_header("Content-Type", self.error_content_type)
- self.send_header('Connection', 'close')
- self.end_headers()
- if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
- self.wfile.write(content.encode('UTF-8', 'replace'))
-
- def send_response(self, code, message=None):
- """Add the response header to the headers buffer and log the
- response code.
-
- Also send two standard headers with the server software
- version and the current date.
-
- """
- self.log_request(code)
- self.send_response_only(code, message)
- self.send_header('Server', self.version_string())
- self.send_header('Date', self.date_time_string())
-
- def send_response_only(self, code, message=None):
- """Send the response header only."""
- if message is None:
- if code in self.responses:
- message = self.responses[code][0]
- else:
- message = ''
- if self.request_version != 'HTTP/0.9':
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(("%s %d %s\r\n" %
- (self.protocol_version, code, message)).encode(
- 'latin-1', 'strict'))
-
- def send_header(self, keyword, value):
- """Send a MIME header to the headers buffer."""
- if self.request_version != 'HTTP/0.9':
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(
- ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
-
- if keyword.lower() == 'connection':
- if value.lower() == 'close':
- self.close_connection = 1
- elif value.lower() == 'keep-alive':
- self.close_connection = 0
-
- def end_headers(self):
- """Send the blank line ending the MIME headers."""
- if self.request_version != 'HTTP/0.9':
- self._headers_buffer.append(b"\r\n")
- self.flush_headers()
-
- def flush_headers(self):
- if hasattr(self, '_headers_buffer'):
- self.wfile.write(b"".join(self._headers_buffer))
- self._headers_buffer = []
-
- def log_request(self, code='-', size='-'):
- """Log an accepted request.
-
- This is called by send_response().
-
- """
-
- self.log_message('"%s" %s %s',
- self.requestline, str(code), str(size))
-
- def log_error(self, format, *args):
- """Log an error.
-
- This is called when a request cannot be fulfilled. By
- default it passes the message on to log_message().
-
- Arguments are the same as for log_message().
-
- XXX This should go to the separate error log.
-
- """
-
- self.log_message(format, *args)
-
- def log_message(self, format, *args):
- """Log an arbitrary message.
-
- This is used by all other logging functions. Override
- it if you have specific logging wishes.
-
- The first argument, FORMAT, is a format string for the
- message to be logged. If the format string contains
- any % escapes requiring parameters, they should be
- specified as subsequent arguments (it's just like
- printf!).
-
- The client ip and current date/time are prefixed to
- every message.
-
- """
-
- sys.stderr.write("%s - - [%s] %s\n" %
- (self.address_string(),
- self.log_date_time_string(),
- format%args))
-
- def version_string(self):
- """Return the server software version string."""
- return self.server_version + ' ' + self.sys_version
-
- def date_time_string(self, timestamp=None):
- """Return the current date and time formatted for a message header."""
- if timestamp is None:
- timestamp = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
- s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- self.weekdayname[wd],
- day, self.monthname[month], year,
- hh, mm, ss)
- return s
-
- def log_date_time_string(self):
- """Return the current time formatted for logging."""
- now = time.time()
- year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
- s = "%02d/%3s/%04d %02d:%02d:%02d" % (
- day, self.monthname[month], year, hh, mm, ss)
- return s
-
- weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-
- monthname = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-
- def address_string(self):
- """Return the client address."""
-
- return self.client_address[0]
-
- # Essentially static class variables
-
- # The version of the HTTP protocol we support.
- # Set this to HTTP/1.1 to enable automatic keepalive
- protocol_version = "HTTP/1.0"
-
- # MessageClass used to parse headers
- MessageClass = http_client.HTTPMessage
-
- # Table mapping response codes to messages; entries have the
- # form {code: (shortmessage, longmessage)}.
- # See RFC 2616 and 6585.
- responses = {
- 100: ('Continue', 'Request received, please continue'),
- 101: ('Switching Protocols',
- 'Switching to new protocol; obey Upgrade header'),
-
- 200: ('OK', 'Request fulfilled, document follows'),
- 201: ('Created', 'Document created, URL follows'),
- 202: ('Accepted',
- 'Request accepted, processing continues off-line'),
- 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
- 204: ('No Content', 'Request fulfilled, nothing follows'),
- 205: ('Reset Content', 'Clear input form for further input.'),
- 206: ('Partial Content', 'Partial content follows.'),
-
- 300: ('Multiple Choices',
- 'Object has several resources -- see URI list'),
- 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
- 302: ('Found', 'Object moved temporarily -- see URI list'),
- 303: ('See Other', 'Object moved -- see Method and URL list'),
- 304: ('Not Modified',
- 'Document has not changed since given time'),
- 305: ('Use Proxy',
- 'You must use proxy specified in Location to access this '
- 'resource.'),
- 307: ('Temporary Redirect',
- 'Object moved temporarily -- see URI list'),
-
- 400: ('Bad Request',
- 'Bad request syntax or unsupported method'),
- 401: ('Unauthorized',
- 'No permission -- see authorization schemes'),
- 402: ('Payment Required',
- 'No payment -- see charging schemes'),
- 403: ('Forbidden',
- 'Request forbidden -- authorization will not help'),
- 404: ('Not Found', 'Nothing matches the given URI'),
- 405: ('Method Not Allowed',
- 'Specified method is invalid for this resource.'),
- 406: ('Not Acceptable', 'URI not available in preferred format.'),
- 407: ('Proxy Authentication Required', 'You must authenticate with '
- 'this proxy before proceeding.'),
- 408: ('Request Timeout', 'Request timed out; try again later.'),
- 409: ('Conflict', 'Request conflict.'),
- 410: ('Gone',
- 'URI no longer exists and has been permanently removed.'),
- 411: ('Length Required', 'Client must specify Content-Length.'),
- 412: ('Precondition Failed', 'Precondition in headers is false.'),
- 413: ('Request Entity Too Large', 'Entity is too large.'),
- 414: ('Request-URI Too Long', 'URI is too long.'),
- 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
- 416: ('Requested Range Not Satisfiable',
- 'Cannot satisfy request range.'),
- 417: ('Expectation Failed',
- 'Expect condition could not be satisfied.'),
- 428: ('Precondition Required',
- 'The origin server requires the request to be conditional.'),
- 429: ('Too Many Requests', 'The user has sent too many requests '
- 'in a given amount of time ("rate limiting").'),
- 431: ('Request Header Fields Too Large', 'The server is unwilling to '
- 'process the request because its header fields are too large.'),
-
- 500: ('Internal Server Error', 'Server got itself in trouble'),
- 501: ('Not Implemented',
- 'Server does not support this operation'),
- 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
- 503: ('Service Unavailable',
- 'The server cannot process the request due to a high load'),
- 504: ('Gateway Timeout',
- 'The gateway server did not receive a timely response'),
- 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
- 511: ('Network Authentication Required',
- 'The client needs to authenticate to gain network access.'),
- }
-
-
-class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
-
- """Simple HTTP request handler with GET and HEAD commands.
-
- This serves files from the current directory and any of its
- subdirectories. The MIME type for files is determined by
- calling the .guess_type() method.
-
- The GET and HEAD requests are identical except that the HEAD
- request omits the actual contents of the file.
-
- """
-
- server_version = "SimpleHTTP/" + __version__
-
- def do_GET(self):
- """Serve a GET request."""
- f = self.send_head()
- if f:
- self.copyfile(f, self.wfile)
- f.close()
-
- def do_HEAD(self):
- """Serve a HEAD request."""
- f = self.send_head()
- if f:
- f.close()
-
- def send_head(self):
- """Common code for GET and HEAD commands.
-
- This sends the response code and MIME headers.
-
- Return value is either a file object (which has to be copied
- to the outputfile by the caller unless the command was HEAD,
- and must be closed by the caller under all circumstances), or
- None, in which case the caller has nothing further to do.
-
- """
- path = self.translate_path(self.path)
- f = None
- if os.path.isdir(path):
- if not self.path.endswith('/'):
- # redirect browser - doing basically what apache does
- self.send_response(301)
- self.send_header("Location", self.path + "/")
- self.end_headers()
- return None
- for index in "index.html", "index.htm":
- index = os.path.join(path, index)
- if os.path.exists(index):
- path = index
- break
- else:
- return self.list_directory(path)
- ctype = self.guess_type(path)
- try:
- f = open(path, 'rb')
- except IOError:
- self.send_error(404, "File not found")
- return None
- self.send_response(200)
- self.send_header("Content-type", ctype)
- fs = os.fstat(f.fileno())
- self.send_header("Content-Length", str(fs[6]))
- self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
- self.end_headers()
- return f
-
- def list_directory(self, path):
- """Helper to produce a directory listing (absent index.html).
-
- Return value is either a file object, or None (indicating an
- error). In either case, the headers are sent, making the
- interface the same as for send_head().
-
- """
- try:
- list = os.listdir(path)
- except os.error:
- self.send_error(404, "No permission to list directory")
- return None
- list.sort(key=lambda a: a.lower())
- r = []
- displaypath = html.escape(urllib_parse.unquote(self.path))
- enc = sys.getfilesystemencoding()
- title = 'Directory listing for %s' % displaypath
- r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
- '"http://www.w3.org/TR/html4/strict.dtd">')
- r.append('<html>\n<head>')
- r.append('<meta http-equiv="Content-Type" '
- 'content="text/html; charset=%s">' % enc)
- r.append('<title>%s</title>\n</head>' % title)
- r.append('<body>\n<h1>%s</h1>' % title)
- r.append('<hr>\n<ul>')
- for name in list:
- fullname = os.path.join(path, name)
- displayname = linkname = name
- # Append / for directories or @ for symbolic links
- if os.path.isdir(fullname):
- displayname = name + "/"
- linkname = name + "/"
- if os.path.islink(fullname):
- displayname = name + "@"
- # Note: a link to a directory displays with @ and links with /
- r.append('<li><a href="%s">%s</a></li>'
- % (urllib_parse.quote(linkname), html.escape(displayname)))
- # # Use this instead:
- # r.append('<li><a href="%s">%s</a></li>'
- # % (urllib.quote(linkname), cgi.escape(displayname)))
- r.append('</ul>\n<hr>\n</body>\n</html>\n')
- encoded = '\n'.join(r).encode(enc)
- f = io.BytesIO()
- f.write(encoded)
- f.seek(0)
- self.send_response(200)
- self.send_header("Content-type", "text/html; charset=%s" % enc)
- self.send_header("Content-Length", str(len(encoded)))
- self.end_headers()
- return f
-
- def translate_path(self, path):
- """Translate a /-separated PATH to the local filename syntax.
-
- Components that mean special things to the local file system
- (e.g. drive or directory names) are ignored. (XXX They should
- probably be diagnosed.)
-
- """
- # abandon query parameters
- path = path.split('?',1)[0]
- path = path.split('#',1)[0]
- path = posixpath.normpath(urllib_parse.unquote(path))
- words = path.split('/')
- words = filter(None, words)
- path = os.getcwd()
- for word in words:
- drive, word = os.path.splitdrive(word)
- head, word = os.path.split(word)
- if word in (os.curdir, os.pardir): continue
- path = os.path.join(path, word)
- return path
-
- def copyfile(self, source, outputfile):
- """Copy all data between two file objects.
-
- The SOURCE argument is a file object open for reading
- (or anything with a read() method) and the DESTINATION
- argument is a file object open for writing (or
- anything with a write() method).
-
- The only reason for overriding this would be to change
- the block size or perhaps to replace newlines by CRLF
- -- note however that this the default server uses this
- to copy binary data as well.
-
- """
- shutil.copyfileobj(source, outputfile)
-
- def guess_type(self, path):
- """Guess the type of a file.
-
- Argument is a PATH (a filename).
-
- Return value is a string of the form type/subtype,
- usable for a MIME Content-type header.
-
- The default implementation looks the file's extension
- up in the table self.extensions_map, using application/octet-stream
- as a default; however it would be permissible (if
- slow) to look inside the data to make a better guess.
-
- """
-
- base, ext = posixpath.splitext(path)
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- ext = ext.lower()
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- else:
- return self.extensions_map['']
-
- if not mimetypes.inited:
- mimetypes.init() # try to read system mime.types
- extensions_map = mimetypes.types_map.copy()
- extensions_map.update({
- '': 'application/octet-stream', # Default
- '.py': 'text/plain',
- '.c': 'text/plain',
- '.h': 'text/plain',
- })
-
-
-# Utilities for CGIHTTPRequestHandler
-
-def _url_collapse_path(path):
- """
- Given a URL path, remove extra '/'s and '.' path elements and collapse
- any '..' references and returns a colllapsed path.
-
- Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
- The utility of this function is limited to is_cgi method and helps
- preventing some security attacks.
-
- Returns: A tuple of (head, tail) where tail is everything after the final /
- and head is everything before it. Head will always start with a '/' and,
- if it contains anything else, never have a trailing '/'.
-
- Raises: IndexError if too many '..' occur within the path.
-
- """
- # Similar to os.path.split(os.path.normpath(path)) but specific to URL
- # path semantics rather than local operating system semantics.
- path_parts = path.split('/')
- head_parts = []
- for part in path_parts[:-1]:
- if part == '..':
- head_parts.pop() # IndexError if more '..' than prior parts
- elif part and part != '.':
- head_parts.append( part )
- if path_parts:
- tail_part = path_parts.pop()
- if tail_part:
- if tail_part == '..':
- head_parts.pop()
- tail_part = ''
- elif tail_part == '.':
- tail_part = ''
- else:
- tail_part = ''
-
- splitpath = ('/' + '/'.join(head_parts), tail_part)
- collapsed_path = "/".join(splitpath)
-
- return collapsed_path
-
-
-
-nobody = None
-
-def nobody_uid():
- """Internal routine to get nobody's uid"""
- global nobody
- if nobody:
- return nobody
- try:
- import pwd
- except ImportError:
- return -1
- try:
- nobody = pwd.getpwnam('nobody')[2]
- except KeyError:
- nobody = 1 + max(x[2] for x in pwd.getpwall())
- return nobody
-
-
-def executable(path):
- """Test for executable file."""
- return os.access(path, os.X_OK)
-
-
-class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
-
- """Complete HTTP server with GET, HEAD and POST commands.
-
- GET and HEAD also support running CGI scripts.
-
- The POST command is *only* implemented for CGI scripts.
-
- """
-
- # Determine platform specifics
- have_fork = hasattr(os, 'fork')
-
- # Make rfile unbuffered -- we need to read one line and then pass
- # the rest to a subprocess, so we can't use buffered input.
- rbufsize = 0
-
- def do_POST(self):
- """Serve a POST request.
-
- This is only implemented for CGI scripts.
-
- """
-
- if self.is_cgi():
- self.run_cgi()
- else:
- self.send_error(501, "Can only POST to CGI scripts")
-
- def send_head(self):
- """Version of send_head that support CGI scripts"""
- if self.is_cgi():
- return self.run_cgi()
- else:
- return SimpleHTTPRequestHandler.send_head(self)
-
- def is_cgi(self):
- """Test whether self.path corresponds to a CGI script.
-
- Returns True and updates the cgi_info attribute to the tuple
- (dir, rest) if self.path requires running a CGI script.
- Returns False otherwise.
-
- If any exception is raised, the caller should assume that
- self.path was rejected as invalid and act accordingly.
-
- The default implementation tests whether the normalized url
- path begins with one of the strings in self.cgi_directories
- (and the next character is a '/' or the end of the string).
-
- """
- collapsed_path = _url_collapse_path(self.path)
- dir_sep = collapsed_path.find('/', 1)
- head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
- if head in self.cgi_directories:
- self.cgi_info = head, tail
- return True
- return False
-
-
- cgi_directories = ['/cgi-bin', '/htbin']
-
- def is_executable(self, path):
- """Test whether argument path is an executable file."""
- return executable(path)
-
- def is_python(self, path):
- """Test whether argument path is a Python script."""
- head, tail = os.path.splitext(path)
- return tail.lower() in (".py", ".pyw")
-
- def run_cgi(self):
- """Execute a CGI script."""
- path = self.path
- dir, rest = self.cgi_info
-
- i = path.find('/', len(dir) + 1)
- while i >= 0:
- nextdir = path[:i]
- nextrest = path[i+1:]
-
- scriptdir = self.translate_path(nextdir)
- if os.path.isdir(scriptdir):
- dir, rest = nextdir, nextrest
- i = path.find('/', len(dir) + 1)
- else:
- break
-
- # find an explicit query string, if present.
- i = rest.rfind('?')
- if i >= 0:
- rest, query = rest[:i], rest[i+1:]
- else:
- query = ''
-
- # dissect the part after the directory name into a script name &
- # a possible additional path, to be stored in PATH_INFO.
- i = rest.find('/')
- if i >= 0:
- script, rest = rest[:i], rest[i:]
- else:
- script, rest = rest, ''
-
- scriptname = dir + '/' + script
- scriptfile = self.translate_path(scriptname)
- if not os.path.exists(scriptfile):
- self.send_error(404, "No such CGI script (%r)" % scriptname)
- return
- if not os.path.isfile(scriptfile):
- self.send_error(403, "CGI script is not a plain file (%r)" %
- scriptname)
- return
- ispy = self.is_python(scriptname)
- if self.have_fork or not ispy:
- if not self.is_executable(scriptfile):
- self.send_error(403, "CGI script is not executable (%r)" %
- scriptname)
- return
-
- # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
- # XXX Much of the following could be prepared ahead of time!
- env = copy.deepcopy(os.environ)
- env['SERVER_SOFTWARE'] = self.version_string()
- env['SERVER_NAME'] = self.server.server_name
- env['GATEWAY_INTERFACE'] = 'CGI/1.1'
- env['SERVER_PROTOCOL'] = self.protocol_version
- env['SERVER_PORT'] = str(self.server.server_port)
- env['REQUEST_METHOD'] = self.command
- uqrest = urllib_parse.unquote(rest)
- env['PATH_INFO'] = uqrest
- env['PATH_TRANSLATED'] = self.translate_path(uqrest)
- env['SCRIPT_NAME'] = scriptname
- if query:
- env['QUERY_STRING'] = query
- env['REMOTE_ADDR'] = self.client_address[0]
- authorization = self.headers.get("authorization")
- if authorization:
- authorization = authorization.split()
- if len(authorization) == 2:
- import base64, binascii
- env['AUTH_TYPE'] = authorization[0]
- if authorization[0].lower() == "basic":
- try:
- authorization = authorization[1].encode('ascii')
- if utils.PY3:
- # In Py3.3, was:
- authorization = base64.decodebytes(authorization).\
- decode('ascii')
- else:
- # Backport to Py2.7:
- authorization = base64.decodestring(authorization).\
- decode('ascii')
- except (binascii.Error, UnicodeError):
- pass
- else:
- authorization = authorization.split(':')
- if len(authorization) == 2:
- env['REMOTE_USER'] = authorization[0]
- # XXX REMOTE_IDENT
- if self.headers.get('content-type') is None:
- env['CONTENT_TYPE'] = self.headers.get_content_type()
- else:
- env['CONTENT_TYPE'] = self.headers['content-type']
- length = self.headers.get('content-length')
- if length:
- env['CONTENT_LENGTH'] = length
- referer = self.headers.get('referer')
- if referer:
- env['HTTP_REFERER'] = referer
- accept = []
- for line in self.headers.getallmatchingheaders('accept'):
- if line[:1] in "\t\n\r ":
- accept.append(line.strip())
- else:
- accept = accept + line[7:].split(',')
- env['HTTP_ACCEPT'] = ','.join(accept)
- ua = self.headers.get('user-agent')
- if ua:
- env['HTTP_USER_AGENT'] = ua
- co = filter(None, self.headers.get_all('cookie', []))
- cookie_str = ', '.join(co)
- if cookie_str:
- env['HTTP_COOKIE'] = cookie_str
- # XXX Other HTTP_* headers
- # Since we're setting the env in the parent, provide empty
- # values to override previously set values
- for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
- 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
- env.setdefault(k, "")
-
- self.send_response(200, "Script output follows")
- self.flush_headers()
-
- decoded_query = query.replace('+', ' ')
-
- if self.have_fork:
- # Unix -- fork as we should
- args = [script]
- if '=' not in decoded_query:
- args.append(decoded_query)
- nobody = nobody_uid()
- self.wfile.flush() # Always flush before forking
- pid = os.fork()
- if pid != 0:
- # Parent
- pid, sts = os.waitpid(pid, 0)
- # throw away additional data [see bug #427345]
- while select.select([self.rfile], [], [], 0)[0]:
- if not self.rfile.read(1):
- break
- if sts:
- self.log_error("CGI script exit status %#x", sts)
- return
- # Child
- try:
- try:
- os.setuid(nobody)
- except os.error:
- pass
- os.dup2(self.rfile.fileno(), 0)
- os.dup2(self.wfile.fileno(), 1)
- os.execve(scriptfile, args, env)
- except:
- self.server.handle_error(self.request, self.client_address)
- os._exit(127)
-
- else:
- # Non-Unix -- use subprocess
- import subprocess
- cmdline = [scriptfile]
- if self.is_python(scriptfile):
- interp = sys.executable
- if interp.lower().endswith("w.exe"):
- # On Windows, use python.exe, not pythonw.exe
- interp = interp[:-5] + interp[-4:]
- cmdline = [interp, '-u'] + cmdline
- if '=' not in query:
- cmdline.append(query)
- self.log_message("command: %s", subprocess.list2cmdline(cmdline))
- try:
- nbytes = int(length)
- except (TypeError, ValueError):
- nbytes = 0
- p = subprocess.Popen(cmdline,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env = env
- )
- if self.command.lower() == "post" and nbytes > 0:
- data = self.rfile.read(nbytes)
- else:
- data = None
- # throw away additional data [see bug #427345]
- while select.select([self.rfile._sock], [], [], 0)[0]:
- if not self.rfile._sock.recv(1):
- break
- stdout, stderr = p.communicate(data)
- self.wfile.write(stdout)
- if stderr:
- self.log_error('%s', stderr)
- p.stderr.close()
- p.stdout.close()
- status = p.returncode
- if status:
- self.log_error("CGI script exit status %#x", status)
- else:
- self.log_message("CGI script exited OK")
-
-
-def test(HandlerClass = BaseHTTPRequestHandler,
- ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
- """Test the HTTP request handler class.
-
- This runs an HTTP server on port 8000 (or the first command line
- argument).
-
- """
- server_address = ('', port)
-
- HandlerClass.protocol_version = protocol
- httpd = ServerClass(server_address, HandlerClass)
-
- sa = httpd.socket.getsockname()
- print("Serving HTTP on", sa[0], "port", sa[1], "...")
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\nKeyboard interrupt received, exiting.")
- httpd.server_close()
- sys.exit(0)
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('--cgi', action='store_true',
- help='Run as CGI Server')
- parser.add_argument('port', action='store',
- default=8000, type=int,
- nargs='?',
- help='Specify alternate port [default: 8000]')
- args = parser.parse_args()
- if args.cgi:
- test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
- else:
- test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
+"""HTTP server classes.
+
+From Python 3.3
+
+Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
+SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
+and CGIHTTPRequestHandler for CGI scripts.
+
+It does, however, optionally implement HTTP/1.1 persistent connections,
+as of version 0.3.
+
+Notes on CGIHTTPRequestHandler
+------------------------------
+
+This class implements GET and POST requests to cgi-bin scripts.
+
+If the os.fork() function is not present (e.g. on Windows),
+subprocess.Popen() is used as a fallback, with slightly altered semantics.
+
+In all cases, the implementation is intentionally naive -- all
+requests are executed synchronously.
+
+SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
+-- it may execute arbitrary Python code or external programs.
+
+Note that status code 200 is sent prior to execution of a CGI script, so
+scripts cannot send other status codes such as 302 (redirect).
+
+XXX To do:
+
+- log requests even later (to capture byte count)
+- log user-agent header and other interesting goodies
+- send error log to separate file
+"""
+
+from __future__ import (absolute_import, division,
+ print_function, unicode_literals)
+from future import utils
+from future.builtins import *
+
+
+# See also:
+#
+# HTTP Working Group T. Berners-Lee
+# INTERNET-DRAFT R. T. Fielding
+# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
+# Expires September 8, 1995 March 8, 1995
+#
+# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
+#
+# and
+#
+# Network Working Group R. Fielding
+# Request for Comments: 2616 et al
+# Obsoletes: 2068 June 1999
+# Category: Standards Track
+#
+# URL: http://www.faqs.org/rfcs/rfc2616.html
+
+# Log files
+# ---------
+#
+# Here's a quote from the NCSA httpd docs about log file format.
+#
+# | The logfile format is as follows. Each line consists of:
+# |
+# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
+# |
+# | host: Either the DNS name or the IP number of the remote client
+# | rfc931: Any information returned by identd for this person,
+# | - otherwise.
+# | authuser: If user sent a userid for authentication, the user name,
+# | - otherwise.
+# | DD: Day
+# | Mon: Month (calendar name)
+# | YYYY: Year
+# | hh: hour (24-hour format, the machine's timezone)
+# | mm: minutes
+# | ss: seconds
+# | request: The first line of the HTTP request as sent by the client.
+# | ddd: the status code returned by the server, - if not available.
+# | bbbb: the total number of bytes sent,
+# | *not including the HTTP/1.0 header*, - if not available
+# |
+# | You can determine the name of the file accessed through request.
+#
+# (Actually, the latter is only true if you know the server configuration
+# at the time the request was made!)
+
+__version__ = "0.6"
+
+__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
+
+from future.backports import html
+from future.backports.http import client as http_client
+from future.backports.urllib import parse as urllib_parse
+from future.backports import socketserver
+
+import io
+import mimetypes
+import os
+import posixpath
+import select
+import shutil
+import socket # For gethostbyaddr()
+import sys
+import time
+import copy
+import argparse
+
+
+# Default error message template
+DEFAULT_ERROR_MESSAGE = """\
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
+ <title>Error response</title>
+ </head>
+ <body>
+ <h1>Error response</h1>
+ <p>Error code: %(code)d</p>
+ <p>Message: %(message)s.</p>
+ <p>Error code explanation: %(code)s - %(explain)s.</p>
+ </body>
+</html>
+"""
+
+DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
+
+def _quote_html(html):
+ return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+
+class HTTPServer(socketserver.TCPServer):
+
+ allow_reuse_address = 1 # Seems to make sense in testing environment
+
+ def server_bind(self):
+ """Override server_bind to store the server name."""
+ socketserver.TCPServer.server_bind(self)
+ host, port = self.socket.getsockname()[:2]
+ self.server_name = socket.getfqdn(host)
+ self.server_port = port
+
+
+class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
+
+ """HTTP request handler base class.
+
+ The following explanation of HTTP serves to guide you through the
+ code as well as to expose any misunderstandings I may have about
+ HTTP (so you don't need to read the code to figure out I'm wrong
+ :-).
+
+ HTTP (HyperText Transfer Protocol) is an extensible protocol on
+ top of a reliable stream transport (e.g. TCP/IP). The protocol
+ recognizes three parts to a request:
+
+ 1. One line identifying the request type and path
+ 2. An optional set of RFC-822-style headers
+ 3. An optional data part
+
+ The headers and data are separated by a blank line.
+
+ The first line of the request has the form
+
+ <command> <path> <version>
+
+ where <command> is a (case-sensitive) keyword such as GET or POST,
+ <path> is a string containing path information for the request,
+ and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
+ <path> is encoded using the URL encoding scheme (using %xx to signify
+ the ASCII character with hex code xx).
+
+ The specification specifies that lines are separated by CRLF but
+ for compatibility with the widest range of clients recommends
+ servers also handle LF. Similarly, whitespace in the request line
+ is treated sensibly (allowing multiple spaces between components
+ and allowing trailing whitespace).
+
+ Similarly, for output, lines ought to be separated by CRLF pairs
+ but most clients grok LF characters just fine.
+
+ If the first line of the request has the form
+
+ <command> <path>
+
+ (i.e. <version> is left out) then this is assumed to be an HTTP
+ 0.9 request; this form has no optional headers and data part and
+ the reply consists of just the data.
+
+ The reply form of the HTTP 1.x protocol again has three parts:
+
+ 1. One line giving the response code
+ 2. An optional set of RFC-822-style headers
+ 3. The data
+
+ Again, the headers and data are separated by a blank line.
+
+ The response code line has the form
+
+ <version> <responsecode> <responsestring>
+
+ where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
+ <responsecode> is a 3-digit response code indicating success or
+ failure of the request, and <responsestring> is an optional
+ human-readable string explaining what the response code means.
+
+ This server parses the request and the headers, and then calls a
+ function specific to the request type (<command>). Specifically,
+ a request SPAM will be handled by a method do_SPAM(). If no
+ such method exists the server sends an error response to the
+ client. If it exists, it is called with no arguments:
+
+ do_SPAM()
+
+ Note that the request name is case sensitive (i.e. SPAM and spam
+ are different requests).
+
+ The various request details are stored in instance variables:
+
+ - client_address is the client IP address in the form (host,
+ port);
+
+ - command, path and version are the broken-down request line;
+
+ - headers is an instance of email.message.Message (or a derived
+ class) containing the header information;
+
+ - rfile is a file object open for reading positioned at the
+ start of the optional input data part;
+
+ - wfile is a file object open for writing.
+
+ IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
+
+ The first thing to be written must be the response line. Then
+ follow 0 or more header lines, then a blank line, and then the
+ actual data (if any). The meaning of the header lines depends on
+ the command executed by the server; in most cases, when data is
+ returned, there should be at least one header line of the form
+
+ Content-type: <type>/<subtype>
+
+ where <type> and <subtype> should be registered MIME types,
+ e.g. "text/html" or "text/plain".
+
+ """
+
+ # The Python system version, truncated to its first component.
+ sys_version = "Python/" + sys.version.split()[0]
+
+ # The server software version. You may want to override this.
+ # The format is multiple whitespace-separated strings,
+ # where each string is of the form name[/version].
+ server_version = "BaseHTTP/" + __version__
+
+ error_message_format = DEFAULT_ERROR_MESSAGE
+ error_content_type = DEFAULT_ERROR_CONTENT_TYPE
+
+ # The default request version. This only affects responses up until
+ # the point where the request line is parsed, so it mainly decides what
+ # the client gets back when sending a malformed request line.
+ # Most web servers default to HTTP 0.9, i.e. don't send a status line.
+ default_request_version = "HTTP/0.9"
+
+ def parse_request(self):
+ """Parse a request (internal).
+
+ The request should be stored in self.raw_requestline; the results
+ are in self.command, self.path, self.request_version and
+ self.headers.
+
+ Return True for success, False for failure; on failure, an
+ error is sent back.
+
+ """
+ self.command = None # set in case of error on the first line
+ self.request_version = version = self.default_request_version
+ self.close_connection = 1
+ requestline = str(self.raw_requestline, 'iso-8859-1')
+ requestline = requestline.rstrip('\r\n')
+ self.requestline = requestline
+ words = requestline.split()
+ if len(words) == 3:
+ command, path, version = words
+ if version[:5] != 'HTTP/':
+ self.send_error(400, "Bad request version (%r)" % version)
+ return False
+ try:
+ base_version_number = version.split('/', 1)[1]
+ version_number = base_version_number.split(".")
+ # RFC 2145 section 3.1 says there can be only one "." and
+ # - major and minor numbers MUST be treated as
+ # separate integers;
+ # - HTTP/2.4 is a lower version than HTTP/2.13, which in
+ # turn is lower than HTTP/12.3;
+ # - Leading zeros MUST be ignored by recipients.
+ if len(version_number) != 2:
+ raise ValueError
+ version_number = int(version_number[0]), int(version_number[1])
+ except (ValueError, IndexError):
+ self.send_error(400, "Bad request version (%r)" % version)
+ return False
+ if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
+ self.close_connection = 0
+ if version_number >= (2, 0):
+ self.send_error(505,
+ "Invalid HTTP Version (%s)" % base_version_number)
+ return False
+ elif len(words) == 2:
+ command, path = words
+ self.close_connection = 1
+ if command != 'GET':
+ self.send_error(400,
+ "Bad HTTP/0.9 request type (%r)" % command)
+ return False
+ elif not words:
+ return False
+ else:
+ self.send_error(400, "Bad request syntax (%r)" % requestline)
+ return False
+ self.command, self.path, self.request_version = command, path, version
+
+ # Examine the headers and look for a Connection directive.
+ try:
+ self.headers = http_client.parse_headers(self.rfile,
+ _class=self.MessageClass)
+ except http_client.LineTooLong:
+ self.send_error(400, "Line too long")
+ return False
+
+ conntype = self.headers.get('Connection', "")
+ if conntype.lower() == 'close':
+ self.close_connection = 1
+ elif (conntype.lower() == 'keep-alive' and
+ self.protocol_version >= "HTTP/1.1"):
+ self.close_connection = 0
+ # Examine the headers and look for an Expect directive
+ expect = self.headers.get('Expect', "")
+ if (expect.lower() == "100-continue" and
+ self.protocol_version >= "HTTP/1.1" and
+ self.request_version >= "HTTP/1.1"):
+ if not self.handle_expect_100():
+ return False
+ return True
+
+ def handle_expect_100(self):
+ """Decide what to do with an "Expect: 100-continue" header.
+
+ If the client is expecting a 100 Continue response, we must
+ respond with either a 100 Continue or a final response before
+ waiting for the request body. The default is to always respond
+ with a 100 Continue. You can behave differently (for example,
+ reject unauthorized requests) by overriding this method.
+
+ This method should either return True (possibly after sending
+ a 100 Continue response) or send an error response and return
+ False.
+
+ """
+ self.send_response_only(100)
+ self.flush_headers()
+ return True
+
+ def handle_one_request(self):
+ """Handle a single HTTP request.
+
+ You normally don't need to override this method; see the class
+ __doc__ string for information on how to handle specific HTTP
+ commands such as GET and POST.
+
+ """
+ try:
+ self.raw_requestline = self.rfile.readline(65537)
+ if len(self.raw_requestline) > 65536:
+ self.requestline = ''
+ self.request_version = ''
+ self.command = ''
+ self.send_error(414)
+ return
+ if not self.raw_requestline:
+ self.close_connection = 1
+ return
+ if not self.parse_request():
+ # An error code has been sent, just exit
+ return
+ mname = 'do_' + self.command
+ if not hasattr(self, mname):
+ self.send_error(501, "Unsupported method (%r)" % self.command)
+ return
+ method = getattr(self, mname)
+ method()
+ self.wfile.flush() #actually send the response if not already done.
+ except socket.timeout as e:
+ #a read or a write timed out. Discard this connection
+ self.log_error("Request timed out: %r", e)
+ self.close_connection = 1
+ return
+
+ def handle(self):
+ """Handle multiple requests if necessary."""
+ self.close_connection = 1
+
+ self.handle_one_request()
+ while not self.close_connection:
+ self.handle_one_request()
+
+ def send_error(self, code, message=None):
+ """Send and log an error reply.
+
+ Arguments are the error code, and a detailed message.
+ The detailed message defaults to the short entry matching the
+ response code.
+
+ This sends an error response (so it must be called before any
+ output has been generated), logs the error, and finally sends
+ a piece of HTML explaining the error to the user.
+
+ """
+
+ try:
+ shortmsg, longmsg = self.responses[code]
+ except KeyError:
+ shortmsg, longmsg = '???', '???'
+ if message is None:
+ message = shortmsg
+ explain = longmsg
+ self.log_error("code %d, message %s", code, message)
+ # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
+ content = (self.error_message_format %
+ {'code': code, 'message': _quote_html(message), 'explain': explain})
+ self.send_response(code, message)
+ self.send_header("Content-Type", self.error_content_type)
+ self.send_header('Connection', 'close')
+ self.end_headers()
+ if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
+ self.wfile.write(content.encode('UTF-8', 'replace'))
+
+ def send_response(self, code, message=None):
+ """Add the response header to the headers buffer and log the
+ response code.
+
+ Also send two standard headers with the server software
+ version and the current date.
+
+ """
+ self.log_request(code)
+ self.send_response_only(code, message)
+ self.send_header('Server', self.version_string())
+ self.send_header('Date', self.date_time_string())
+
+ def send_response_only(self, code, message=None):
+ """Send the response header only."""
+ if message is None:
+ if code in self.responses:
+ message = self.responses[code][0]
+ else:
+ message = ''
+ if self.request_version != 'HTTP/0.9':
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
+ self._headers_buffer.append(("%s %d %s\r\n" %
+ (self.protocol_version, code, message)).encode(
+ 'latin-1', 'strict'))
+
+ def send_header(self, keyword, value):
+ """Send a MIME header to the headers buffer."""
+ if self.request_version != 'HTTP/0.9':
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
+ self._headers_buffer.append(
+ ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
+
+ if keyword.lower() == 'connection':
+ if value.lower() == 'close':
+ self.close_connection = 1
+ elif value.lower() == 'keep-alive':
+ self.close_connection = 0
+
+ def end_headers(self):
+ """Send the blank line ending the MIME headers."""
+ if self.request_version != 'HTTP/0.9':
+ self._headers_buffer.append(b"\r\n")
+ self.flush_headers()
+
+ def flush_headers(self):
+ if hasattr(self, '_headers_buffer'):
+ self.wfile.write(b"".join(self._headers_buffer))
+ self._headers_buffer = []
+
+ def log_request(self, code='-', size='-'):
+ """Log an accepted request.
+
+ This is called by send_response().
+
+ """
+
+ self.log_message('"%s" %s %s',
+ self.requestline, str(code), str(size))
+
+ def log_error(self, format, *args):
+ """Log an error.
+
+ This is called when a request cannot be fulfilled. By
+ default it passes the message on to log_message().
+
+ Arguments are the same as for log_message().
+
+ XXX This should go to the separate error log.
+
+ """
+
+ self.log_message(format, *args)
+
+ def log_message(self, format, *args):
+ """Log an arbitrary message.
+
+ This is used by all other logging functions. Override
+ it if you have specific logging wishes.
+
+ The first argument, FORMAT, is a format string for the
+ message to be logged. If the format string contains
+ any % escapes requiring parameters, they should be
+ specified as subsequent arguments (it's just like
+ printf!).
+
+ The client ip and current date/time are prefixed to
+ every message.
+
+ """
+
+ sys.stderr.write("%s - - [%s] %s\n" %
+ (self.address_string(),
+ self.log_date_time_string(),
+ format%args))
+
+ def version_string(self):
+ """Return the server software version string."""
+ return self.server_version + ' ' + self.sys_version
+
+ def date_time_string(self, timestamp=None):
+ """Return the current date and time formatted for a message header."""
+ if timestamp is None:
+ timestamp = time.time()
+ year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
+ s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
+ self.weekdayname[wd],
+ day, self.monthname[month], year,
+ hh, mm, ss)
+ return s
+
+ def log_date_time_string(self):
+ """Return the current time formatted for logging."""
+ now = time.time()
+ year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
+ s = "%02d/%3s/%04d %02d:%02d:%02d" % (
+ day, self.monthname[month], year, hh, mm, ss)
+ return s
+
+ weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+ monthname = [None,
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+ def address_string(self):
+ """Return the client address."""
+
+ return self.client_address[0]
+
+ # Essentially static class variables
+
+ # The version of the HTTP protocol we support.
+ # Set this to HTTP/1.1 to enable automatic keepalive
+ protocol_version = "HTTP/1.0"
+
+ # MessageClass used to parse headers
+ MessageClass = http_client.HTTPMessage
+
+ # Table mapping response codes to messages; entries have the
+ # form {code: (shortmessage, longmessage)}.
+ # See RFC 2616 and 6585.
+ responses = {
+ 100: ('Continue', 'Request received, please continue'),
+ 101: ('Switching Protocols',
+ 'Switching to new protocol; obey Upgrade header'),
+
+ 200: ('OK', 'Request fulfilled, document follows'),
+ 201: ('Created', 'Document created, URL follows'),
+ 202: ('Accepted',
+ 'Request accepted, processing continues off-line'),
+ 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
+ 204: ('No Content', 'Request fulfilled, nothing follows'),
+ 205: ('Reset Content', 'Clear input form for further input.'),
+ 206: ('Partial Content', 'Partial content follows.'),
+
+ 300: ('Multiple Choices',
+ 'Object has several resources -- see URI list'),
+ 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
+ 302: ('Found', 'Object moved temporarily -- see URI list'),
+ 303: ('See Other', 'Object moved -- see Method and URL list'),
+ 304: ('Not Modified',
+ 'Document has not changed since given time'),
+ 305: ('Use Proxy',
+ 'You must use proxy specified in Location to access this '
+ 'resource.'),
+ 307: ('Temporary Redirect',
+ 'Object moved temporarily -- see URI list'),
+
+ 400: ('Bad Request',
+ 'Bad request syntax or unsupported method'),
+ 401: ('Unauthorized',
+ 'No permission -- see authorization schemes'),
+ 402: ('Payment Required',
+ 'No payment -- see charging schemes'),
+ 403: ('Forbidden',
+ 'Request forbidden -- authorization will not help'),
+ 404: ('Not Found', 'Nothing matches the given URI'),
+ 405: ('Method Not Allowed',
+ 'Specified method is invalid for this resource.'),
+ 406: ('Not Acceptable', 'URI not available in preferred format.'),
+ 407: ('Proxy Authentication Required', 'You must authenticate with '
+ 'this proxy before proceeding.'),
+ 408: ('Request Timeout', 'Request timed out; try again later.'),
+ 409: ('Conflict', 'Request conflict.'),
+ 410: ('Gone',
+ 'URI no longer exists and has been permanently removed.'),
+ 411: ('Length Required', 'Client must specify Content-Length.'),
+ 412: ('Precondition Failed', 'Precondition in headers is false.'),
+ 413: ('Request Entity Too Large', 'Entity is too large.'),
+ 414: ('Request-URI Too Long', 'URI is too long.'),
+ 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
+ 416: ('Requested Range Not Satisfiable',
+ 'Cannot satisfy request range.'),
+ 417: ('Expectation Failed',
+ 'Expect condition could not be satisfied.'),
+ 428: ('Precondition Required',
+ 'The origin server requires the request to be conditional.'),
+ 429: ('Too Many Requests', 'The user has sent too many requests '
+ 'in a given amount of time ("rate limiting").'),
+ 431: ('Request Header Fields Too Large', 'The server is unwilling to '
+ 'process the request because its header fields are too large.'),
+
+ 500: ('Internal Server Error', 'Server got itself in trouble'),
+ 501: ('Not Implemented',
+ 'Server does not support this operation'),
+ 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
+ 503: ('Service Unavailable',
+ 'The server cannot process the request due to a high load'),
+ 504: ('Gateway Timeout',
+ 'The gateway server did not receive a timely response'),
+ 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+ 511: ('Network Authentication Required',
+ 'The client needs to authenticate to gain network access.'),
+ }
+
+
+class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
+
+ """Simple HTTP request handler with GET and HEAD commands.
+
+ This serves files from the current directory and any of its
+ subdirectories. The MIME type for files is determined by
+ calling the .guess_type() method.
+
+ The GET and HEAD requests are identical except that the HEAD
+ request omits the actual contents of the file.
+
+ """
+
+ server_version = "SimpleHTTP/" + __version__
+
+ def do_GET(self):
+ """Serve a GET request."""
+ f = self.send_head()
+ if f:
+ self.copyfile(f, self.wfile)
+ f.close()
+
+ def do_HEAD(self):
+ """Serve a HEAD request."""
+ f = self.send_head()
+ if f:
+ f.close()
+
+ def send_head(self):
+ """Common code for GET and HEAD commands.
+
+ This sends the response code and MIME headers.
+
+ Return value is either a file object (which has to be copied
+ to the outputfile by the caller unless the command was HEAD,
+ and must be closed by the caller under all circumstances), or
+ None, in which case the caller has nothing further to do.
+
+ """
+ path = self.translate_path(self.path)
+ f = None
+ if os.path.isdir(path):
+ if not self.path.endswith('/'):
+ # redirect browser - doing basically what apache does
+ self.send_response(301)
+ self.send_header("Location", self.path + "/")
+ self.end_headers()
+ return None
+ for index in "index.html", "index.htm":
+ index = os.path.join(path, index)
+ if os.path.exists(index):
+ path = index
+ break
+ else:
+ return self.list_directory(path)
+ ctype = self.guess_type(path)
+ try:
+ f = open(path, 'rb')
+ except IOError:
+ self.send_error(404, "File not found")
+ return None
+ self.send_response(200)
+ self.send_header("Content-type", ctype)
+ fs = os.fstat(f.fileno())
+ self.send_header("Content-Length", str(fs[6]))
+ self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
+ self.end_headers()
+ return f
+
+ def list_directory(self, path):
+ """Helper to produce a directory listing (absent index.html).
+
+ Return value is either a file object, or None (indicating an
+ error). In either case, the headers are sent, making the
+ interface the same as for send_head().
+
+ """
+ try:
+ list = os.listdir(path)
+ except os.error:
+ self.send_error(404, "No permission to list directory")
+ return None
+ list.sort(key=lambda a: a.lower())
+ r = []
+ displaypath = html.escape(urllib_parse.unquote(self.path))
+ enc = sys.getfilesystemencoding()
+ title = 'Directory listing for %s' % displaypath
+ r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
+ '"http://www.w3.org/TR/html4/strict.dtd">')
+ r.append('<html>\n<head>')
+ r.append('<meta http-equiv="Content-Type" '
+ 'content="text/html; charset=%s">' % enc)
+ r.append('<title>%s</title>\n</head>' % title)
+ r.append('<body>\n<h1>%s</h1>' % title)
+ r.append('<hr>\n<ul>')
+ for name in list:
+ fullname = os.path.join(path, name)
+ displayname = linkname = name
+ # Append / for directories or @ for symbolic links
+ if os.path.isdir(fullname):
+ displayname = name + "/"
+ linkname = name + "/"
+ if os.path.islink(fullname):
+ displayname = name + "@"
+ # Note: a link to a directory displays with @ and links with /
+ r.append('<li><a href="%s">%s</a></li>'
+ % (urllib_parse.quote(linkname), html.escape(displayname)))
+ # # Use this instead:
+ # r.append('<li><a href="%s">%s</a></li>'
+ # % (urllib.quote(linkname), cgi.escape(displayname)))
+ r.append('</ul>\n<hr>\n</body>\n</html>\n')
+ encoded = '\n'.join(r).encode(enc)
+ f = io.BytesIO()
+ f.write(encoded)
+ f.seek(0)
+ self.send_response(200)
+ self.send_header("Content-type", "text/html; charset=%s" % enc)
+ self.send_header("Content-Length", str(len(encoded)))
+ self.end_headers()
+ return f
+
+ def translate_path(self, path):
+ """Translate a /-separated PATH to the local filename syntax.
+
+ Components that mean special things to the local file system
+ (e.g. drive or directory names) are ignored. (XXX They should
+ probably be diagnosed.)
+
+ """
+ # abandon query parameters
+ path = path.split('?',1)[0]
+ path = path.split('#',1)[0]
+ path = posixpath.normpath(urllib_parse.unquote(path))
+ words = path.split('/')
+ words = filter(None, words)
+ path = os.getcwd()
+ for word in words:
+ drive, word = os.path.splitdrive(word)
+ head, word = os.path.split(word)
+ if word in (os.curdir, os.pardir): continue
+ path = os.path.join(path, word)
+ return path
+
+ def copyfile(self, source, outputfile):
+ """Copy all data between two file objects.
+
+ The SOURCE argument is a file object open for reading
+ (or anything with a read() method) and the DESTINATION
+ argument is a file object open for writing (or
+ anything with a write() method).
+
+ The only reason for overriding this would be to change
+ the block size or perhaps to replace newlines by CRLF
+ -- note however that this the default server uses this
+ to copy binary data as well.
+
+ """
+ shutil.copyfileobj(source, outputfile)
+
+ def guess_type(self, path):
+ """Guess the type of a file.
+
+ Argument is a PATH (a filename).
+
+ Return value is a string of the form type/subtype,
+ usable for a MIME Content-type header.
+
+ The default implementation looks the file's extension
+ up in the table self.extensions_map, using application/octet-stream
+ as a default; however it would be permissible (if
+ slow) to look inside the data to make a better guess.
+
+ """
+
+ base, ext = posixpath.splitext(path)
+ if ext in self.extensions_map:
+ return self.extensions_map[ext]
+ ext = ext.lower()
+ if ext in self.extensions_map:
+ return self.extensions_map[ext]
+ else:
+ return self.extensions_map['']
+
+ if not mimetypes.inited:
+ mimetypes.init() # try to read system mime.types
+ extensions_map = mimetypes.types_map.copy()
+ extensions_map.update({
+ '': 'application/octet-stream', # Default
+ '.py': 'text/plain',
+ '.c': 'text/plain',
+ '.h': 'text/plain',
+ })
+
+
+# Utilities for CGIHTTPRequestHandler
+
+def _url_collapse_path(path):
+ """
+ Given a URL path, remove extra '/'s and '.' path elements and collapse
+ any '..' references and returns a colllapsed path.
+
+ Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+ The utility of this function is limited to is_cgi method and helps
+ preventing some security attacks.
+
+ Returns: A tuple of (head, tail) where tail is everything after the final /
+ and head is everything before it. Head will always start with a '/' and,
+ if it contains anything else, never have a trailing '/'.
+
+ Raises: IndexError if too many '..' occur within the path.
+
+ """
+ # Similar to os.path.split(os.path.normpath(path)) but specific to URL
+ # path semantics rather than local operating system semantics.
+ path_parts = path.split('/')
+ head_parts = []
+ for part in path_parts[:-1]:
+ if part == '..':
+ head_parts.pop() # IndexError if more '..' than prior parts
+ elif part and part != '.':
+ head_parts.append( part )
+ if path_parts:
+ tail_part = path_parts.pop()
+ if tail_part:
+ if tail_part == '..':
+ head_parts.pop()
+ tail_part = ''
+ elif tail_part == '.':
+ tail_part = ''
+ else:
+ tail_part = ''
+
+ splitpath = ('/' + '/'.join(head_parts), tail_part)
+ collapsed_path = "/".join(splitpath)
+
+ return collapsed_path
+
+
+
+nobody = None
+
+def nobody_uid():
+ """Internal routine to get nobody's uid"""
+ global nobody
+ if nobody:
+ return nobody
+ try:
+ import pwd
+ except ImportError:
+ return -1
+ try:
+ nobody = pwd.getpwnam('nobody')[2]
+ except KeyError:
+ nobody = 1 + max(x[2] for x in pwd.getpwall())
+ return nobody
+
+
+def executable(path):
+ """Test for executable file."""
+ return os.access(path, os.X_OK)
+
+
+class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
+
+ """Complete HTTP server with GET, HEAD and POST commands.
+
+ GET and HEAD also support running CGI scripts.
+
+ The POST command is *only* implemented for CGI scripts.
+
+ """
+
+ # Determine platform specifics
+ have_fork = hasattr(os, 'fork')
+
+ # Make rfile unbuffered -- we need to read one line and then pass
+ # the rest to a subprocess, so we can't use buffered input.
+ rbufsize = 0
+
+ def do_POST(self):
+ """Serve a POST request.
+
+ This is only implemented for CGI scripts.
+
+ """
+
+ if self.is_cgi():
+ self.run_cgi()
+ else:
+ self.send_error(501, "Can only POST to CGI scripts")
+
+ def send_head(self):
+ """Version of send_head that support CGI scripts"""
+ if self.is_cgi():
+ return self.run_cgi()
+ else:
+ return SimpleHTTPRequestHandler.send_head(self)
+
+ def is_cgi(self):
+ """Test whether self.path corresponds to a CGI script.
+
+ Returns True and updates the cgi_info attribute to the tuple
+ (dir, rest) if self.path requires running a CGI script.
+ Returns False otherwise.
+
+ If any exception is raised, the caller should assume that
+ self.path was rejected as invalid and act accordingly.
+
+ The default implementation tests whether the normalized url
+ path begins with one of the strings in self.cgi_directories
+ (and the next character is a '/' or the end of the string).
+
+ """
+ collapsed_path = _url_collapse_path(self.path)
+ dir_sep = collapsed_path.find('/', 1)
+ head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
+ if head in self.cgi_directories:
+ self.cgi_info = head, tail
+ return True
+ return False
+
+
+ cgi_directories = ['/cgi-bin', '/htbin']
+
+ def is_executable(self, path):
+ """Test whether argument path is an executable file."""
+ return executable(path)
+
+ def is_python(self, path):
+ """Test whether argument path is a Python script."""
+ head, tail = os.path.splitext(path)
+ return tail.lower() in (".py", ".pyw")
+
+ def run_cgi(self):
+ """Execute a CGI script."""
+ path = self.path
+ dir, rest = self.cgi_info
+
+ i = path.find('/', len(dir) + 1)
+ while i >= 0:
+ nextdir = path[:i]
+ nextrest = path[i+1:]
+
+ scriptdir = self.translate_path(nextdir)
+ if os.path.isdir(scriptdir):
+ dir, rest = nextdir, nextrest
+ i = path.find('/', len(dir) + 1)
+ else:
+ break
+
+ # find an explicit query string, if present.
+ i = rest.rfind('?')
+ if i >= 0:
+ rest, query = rest[:i], rest[i+1:]
+ else:
+ query = ''
+
+ # dissect the part after the directory name into a script name &
+ # a possible additional path, to be stored in PATH_INFO.
+ i = rest.find('/')
+ if i >= 0:
+ script, rest = rest[:i], rest[i:]
+ else:
+ script, rest = rest, ''
+
+ scriptname = dir + '/' + script
+ scriptfile = self.translate_path(scriptname)
+ if not os.path.exists(scriptfile):
+ self.send_error(404, "No such CGI script (%r)" % scriptname)
+ return
+ if not os.path.isfile(scriptfile):
+ self.send_error(403, "CGI script is not a plain file (%r)" %
+ scriptname)
+ return
+ ispy = self.is_python(scriptname)
+ if self.have_fork or not ispy:
+ if not self.is_executable(scriptfile):
+ self.send_error(403, "CGI script is not executable (%r)" %
+ scriptname)
+ return
+
+ # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
+ # XXX Much of the following could be prepared ahead of time!
+ env = copy.deepcopy(os.environ)
+ env['SERVER_SOFTWARE'] = self.version_string()
+ env['SERVER_NAME'] = self.server.server_name
+ env['GATEWAY_INTERFACE'] = 'CGI/1.1'
+ env['SERVER_PROTOCOL'] = self.protocol_version
+ env['SERVER_PORT'] = str(self.server.server_port)
+ env['REQUEST_METHOD'] = self.command
+ uqrest = urllib_parse.unquote(rest)
+ env['PATH_INFO'] = uqrest
+ env['PATH_TRANSLATED'] = self.translate_path(uqrest)
+ env['SCRIPT_NAME'] = scriptname
+ if query:
+ env['QUERY_STRING'] = query
+ env['REMOTE_ADDR'] = self.client_address[0]
+ authorization = self.headers.get("authorization")
+ if authorization:
+ authorization = authorization.split()
+ if len(authorization) == 2:
+ import base64, binascii
+ env['AUTH_TYPE'] = authorization[0]
+ if authorization[0].lower() == "basic":
+ try:
+ authorization = authorization[1].encode('ascii')
+ if utils.PY3:
+ # In Py3.3, was:
+ authorization = base64.decodebytes(authorization).\
+ decode('ascii')
+ else:
+ # Backport to Py2.7:
+ authorization = base64.decodestring(authorization).\
+ decode('ascii')
+ except (binascii.Error, UnicodeError):
+ pass
+ else:
+ authorization = authorization.split(':')
+ if len(authorization) == 2:
+ env['REMOTE_USER'] = authorization[0]
+ # XXX REMOTE_IDENT
+ if self.headers.get('content-type') is None:
+ env['CONTENT_TYPE'] = self.headers.get_content_type()
+ else:
+ env['CONTENT_TYPE'] = self.headers['content-type']
+ length = self.headers.get('content-length')
+ if length:
+ env['CONTENT_LENGTH'] = length
+ referer = self.headers.get('referer')
+ if referer:
+ env['HTTP_REFERER'] = referer
+ accept = []
+ for line in self.headers.getallmatchingheaders('accept'):
+ if line[:1] in "\t\n\r ":
+ accept.append(line.strip())
+ else:
+ accept = accept + line[7:].split(',')
+ env['HTTP_ACCEPT'] = ','.join(accept)
+ ua = self.headers.get('user-agent')
+ if ua:
+ env['HTTP_USER_AGENT'] = ua
+ co = filter(None, self.headers.get_all('cookie', []))
+ cookie_str = ', '.join(co)
+ if cookie_str:
+ env['HTTP_COOKIE'] = cookie_str
+ # XXX Other HTTP_* headers
+ # Since we're setting the env in the parent, provide empty
+ # values to override previously set values
+ for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
+ 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
+ env.setdefault(k, "")
+
+ self.send_response(200, "Script output follows")
+ self.flush_headers()
+
+ decoded_query = query.replace('+', ' ')
+
+ if self.have_fork:
+ # Unix -- fork as we should
+ args = [script]
+ if '=' not in decoded_query:
+ args.append(decoded_query)
+ nobody = nobody_uid()
+ self.wfile.flush() # Always flush before forking
+ pid = os.fork()
+ if pid != 0:
+ # Parent
+ pid, sts = os.waitpid(pid, 0)
+ # throw away additional data [see bug #427345]
+ while select.select([self.rfile], [], [], 0)[0]:
+ if not self.rfile.read(1):
+ break
+ if sts:
+ self.log_error("CGI script exit status %#x", sts)
+ return
+ # Child
+ try:
+ try:
+ os.setuid(nobody)
+ except os.error:
+ pass
+ os.dup2(self.rfile.fileno(), 0)
+ os.dup2(self.wfile.fileno(), 1)
+ os.execve(scriptfile, args, env)
+ except:
+ self.server.handle_error(self.request, self.client_address)
+ os._exit(127)
+
+ else:
+ # Non-Unix -- use subprocess
+ import subprocess
+ cmdline = [scriptfile]
+ if self.is_python(scriptfile):
+ interp = sys.executable
+ if interp.lower().endswith("w.exe"):
+ # On Windows, use python.exe, not pythonw.exe
+ interp = interp[:-5] + interp[-4:]
+ cmdline = [interp, '-u'] + cmdline
+ if '=' not in query:
+ cmdline.append(query)
+ self.log_message("command: %s", subprocess.list2cmdline(cmdline))
+ try:
+ nbytes = int(length)
+ except (TypeError, ValueError):
+ nbytes = 0
+ p = subprocess.Popen(cmdline,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env = env
+ )
+ if self.command.lower() == "post" and nbytes > 0:
+ data = self.rfile.read(nbytes)
+ else:
+ data = None
+ # throw away additional data [see bug #427345]
+ while select.select([self.rfile._sock], [], [], 0)[0]:
+ if not self.rfile._sock.recv(1):
+ break
+ stdout, stderr = p.communicate(data)
+ self.wfile.write(stdout)
+ if stderr:
+ self.log_error('%s', stderr)
+ p.stderr.close()
+ p.stdout.close()
+ status = p.returncode
+ if status:
+ self.log_error("CGI script exit status %#x", status)
+ else:
+ self.log_message("CGI script exited OK")
+
+
+def test(HandlerClass = BaseHTTPRequestHandler,
+ ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
+ """Test the HTTP request handler class.
+
+ This runs an HTTP server on port 8000 (or the first command line
+ argument).
+
+ """
+ server_address = ('', port)
+
+ HandlerClass.protocol_version = protocol
+ httpd = ServerClass(server_address, HandlerClass)
+
+ sa = httpd.socket.getsockname()
+ print("Serving HTTP on", sa[0], "port", sa[1], "...")
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ print("\nKeyboard interrupt received, exiting.")
+ httpd.server_close()
+ sys.exit(0)
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--cgi', action='store_true',
+ help='Run as CGI Server')
+ parser.add_argument('port', action='store',
+ default=8000, type=int,
+ nargs='?',
+ help='Specify alternate port [default: 8000]')
+ args = parser.parse_args()
+ if args.cgi:
+ test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
+ else:
+ test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
diff --git a/contrib/python/future/future/backports/misc.py b/contrib/python/future/future/backports/misc.py
index 622427fd0e..098a0667e8 100644
--- a/contrib/python/future/future/backports/misc.py
+++ b/contrib/python/future/future/backports/misc.py
@@ -1,944 +1,944 @@
-"""
-Miscellaneous function (re)definitions from the Py3.4+ standard library
-for Python 2.6/2.7.
-
-- math.ceil (for Python 2.7)
-- collections.OrderedDict (for Python 2.6)
-- collections.Counter (for Python 2.6)
-- collections.ChainMap (for all versions prior to Python 3.3)
-- itertools.count (for Python 2.6, with step parameter)
-- subprocess.check_output (for Python 2.6)
-- reprlib.recursive_repr (for Python 2.6+)
-- functools.cmp_to_key (for Python 2.6)
-"""
-
-from __future__ import absolute_import
-
-import subprocess
-from math import ceil as oldceil
-
-from operator import itemgetter as _itemgetter, eq as _eq
-import sys
-import heapq as _heapq
-from _weakref import proxy as _proxy
-from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
-from socket import getaddrinfo, SOCK_STREAM, error, socket
-
+"""
+Miscellaneous function (re)definitions from the Py3.4+ standard library
+for Python 2.6/2.7.
+
+- math.ceil (for Python 2.7)
+- collections.OrderedDict (for Python 2.6)
+- collections.Counter (for Python 2.6)
+- collections.ChainMap (for all versions prior to Python 3.3)
+- itertools.count (for Python 2.6, with step parameter)
+- subprocess.check_output (for Python 2.6)
+- reprlib.recursive_repr (for Python 2.6+)
+- functools.cmp_to_key (for Python 2.6)
+"""
+
+from __future__ import absolute_import
+
+import subprocess
+from math import ceil as oldceil
+
+from operator import itemgetter as _itemgetter, eq as _eq
+import sys
+import heapq as _heapq
+from _weakref import proxy as _proxy
+from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+from socket import getaddrinfo, SOCK_STREAM, error, socket
+
from future.utils import iteritems, itervalues, PY2, PY26, PY3
-
+
if PY2:
from collections import Mapping, MutableMapping
else:
from collections.abc import Mapping, MutableMapping
-
-
-def ceil(x):
- """
- Return the ceiling of x as an int.
- This is the smallest integral value >= x.
- """
- return int(oldceil(x))
-
-
-########################################################################
-### reprlib.recursive_repr decorator from Py3.4
-########################################################################
-
-from itertools import islice
-
-if PY3:
- try:
- from _thread import get_ident
- except ImportError:
- from _dummy_thread import get_ident
-else:
- try:
- from thread import get_ident
- except ImportError:
- from dummy_thread import get_ident
-
-
-def recursive_repr(fillvalue='...'):
- 'Decorator to make a repr function return fillvalue for a recursive call'
-
- def decorating_function(user_function):
- repr_running = set()
-
- def wrapper(self):
- key = id(self), get_ident()
- if key in repr_running:
- return fillvalue
- repr_running.add(key)
- try:
- result = user_function(self)
- finally:
- repr_running.discard(key)
- return result
-
- # Can't use functools.wraps() here because of bootstrap issues
- wrapper.__module__ = getattr(user_function, '__module__')
- wrapper.__doc__ = getattr(user_function, '__doc__')
- wrapper.__name__ = getattr(user_function, '__name__')
- wrapper.__annotations__ = getattr(user_function, '__annotations__', {})
- return wrapper
-
- return decorating_function
-
-
-################################################################################
-### OrderedDict
-################################################################################
-
-class _Link(object):
- __slots__ = 'prev', 'next', 'key', '__weakref__'
-
-class OrderedDict(dict):
- 'Dictionary that remembers insertion order'
- # An inherited dict maps keys to values.
- # The inherited dict provides __getitem__, __len__, __contains__, and get.
- # The remaining methods are order-aware.
- # Big-O running times for all methods are the same as regular dictionaries.
-
- # The internal self.__map dict maps keys to links in a doubly linked list.
- # The circular doubly linked list starts and ends with a sentinel element.
- # The sentinel element never gets deleted (this simplifies the algorithm).
- # The sentinel is in self.__hardroot with a weakref proxy in self.__root.
- # The prev links are weakref proxies (to prevent circular references).
- # Individual links are kept alive by the hard reference in self.__map.
- # Those hard references disappear when a key is deleted from an OrderedDict.
-
- def __init__(*args, **kwds):
- '''Initialize an ordered dictionary. The signature is the same as
- regular dictionaries, but keyword arguments are not recommended because
- their insertion order is arbitrary.
-
- '''
- if not args:
- raise TypeError("descriptor '__init__' of 'OrderedDict' object "
- "needs an argument")
- self = args[0]
- args = args[1:]
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- try:
- self.__root
- except AttributeError:
- self.__hardroot = _Link()
- self.__root = root = _proxy(self.__hardroot)
- root.prev = root.next = root
- self.__map = {}
- self.__update(*args, **kwds)
-
- def __setitem__(self, key, value,
- dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
- 'od.__setitem__(i, y) <==> od[i]=y'
- # Setting a new item creates a new link at the end of the linked list,
- # and the inherited dictionary is updated with the new key/value pair.
- if key not in self:
- self.__map[key] = link = Link()
- root = self.__root
- last = root.prev
- link.prev, link.next, link.key = last, root, key
- last.next = link
- root.prev = proxy(link)
- dict_setitem(self, key, value)
-
- def __delitem__(self, key, dict_delitem=dict.__delitem__):
- 'od.__delitem__(y) <==> del od[y]'
- # Deleting an existing item uses self.__map to find the link which gets
- # removed by updating the links in the predecessor and successor nodes.
- dict_delitem(self, key)
- link = self.__map.pop(key)
- link_prev = link.prev
- link_next = link.next
- link_prev.next = link_next
- link_next.prev = link_prev
-
- def __iter__(self):
- 'od.__iter__() <==> iter(od)'
- # Traverse the linked list in order.
- root = self.__root
- curr = root.next
- while curr is not root:
- yield curr.key
- curr = curr.next
-
- def __reversed__(self):
- 'od.__reversed__() <==> reversed(od)'
- # Traverse the linked list in reverse order.
- root = self.__root
- curr = root.prev
- while curr is not root:
- yield curr.key
- curr = curr.prev
-
- def clear(self):
- 'od.clear() -> None. Remove all items from od.'
- root = self.__root
- root.prev = root.next = root
- self.__map.clear()
- dict.clear(self)
-
- def popitem(self, last=True):
- '''od.popitem() -> (k, v), return and remove a (key, value) pair.
- Pairs are returned in LIFO order if last is true or FIFO order if false.
-
- '''
- if not self:
- raise KeyError('dictionary is empty')
- root = self.__root
- if last:
- link = root.prev
- link_prev = link.prev
- link_prev.next = root
- root.prev = link_prev
- else:
- link = root.next
- link_next = link.next
- root.next = link_next
- link_next.prev = root
- key = link.key
- del self.__map[key]
- value = dict.pop(self, key)
- return key, value
-
- def move_to_end(self, key, last=True):
- '''Move an existing element to the end (or beginning if last==False).
-
- Raises KeyError if the element does not exist.
- When last=True, acts like a fast version of self[key]=self.pop(key).
-
- '''
- link = self.__map[key]
- link_prev = link.prev
- link_next = link.next
- link_prev.next = link_next
- link_next.prev = link_prev
- root = self.__root
- if last:
- last = root.prev
- link.prev = last
- link.next = root
- last.next = root.prev = link
- else:
- first = root.next
- link.prev = root
- link.next = first
- root.next = first.prev = link
-
- def __sizeof__(self):
- sizeof = sys.getsizeof
- n = len(self) + 1 # number of links including root
- size = sizeof(self.__dict__) # instance dictionary
- size += sizeof(self.__map) * 2 # internal dict and inherited dict
- size += sizeof(self.__hardroot) * n # link objects
- size += sizeof(self.__root) * n # proxy objects
- return size
-
- update = __update = MutableMapping.update
- keys = MutableMapping.keys
- values = MutableMapping.values
- items = MutableMapping.items
- __ne__ = MutableMapping.__ne__
-
- __marker = object()
-
- def pop(self, key, default=__marker):
- '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
- value. If key is not found, d is returned if given, otherwise KeyError
- is raised.
-
- '''
- if key in self:
- result = self[key]
- del self[key]
- return result
- if default is self.__marker:
- raise KeyError(key)
- return default
-
- def setdefault(self, key, default=None):
- 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
- if key in self:
- return self[key]
- self[key] = default
- return default
-
- @recursive_repr()
- def __repr__(self):
- 'od.__repr__() <==> repr(od)'
- if not self:
- return '%s()' % (self.__class__.__name__,)
- return '%s(%r)' % (self.__class__.__name__, list(self.items()))
-
- def __reduce__(self):
- 'Return state information for pickling'
- inst_dict = vars(self).copy()
- for k in vars(OrderedDict()):
- inst_dict.pop(k, None)
- return self.__class__, (), inst_dict or None, None, iter(self.items())
-
- def copy(self):
- 'od.copy() -> a shallow copy of od'
- return self.__class__(self)
-
- @classmethod
- def fromkeys(cls, iterable, value=None):
- '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
- If not specified, the value defaults to None.
-
- '''
- self = cls()
- for key in iterable:
- self[key] = value
- return self
-
- def __eq__(self, other):
- '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
- while comparison to a regular mapping is order-insensitive.
-
- '''
- if isinstance(other, OrderedDict):
- return dict.__eq__(self, other) and all(map(_eq, self, other))
- return dict.__eq__(self, other)
-
-
-# {{{ http://code.activestate.com/recipes/576611/ (r11)
-
-try:
- from operator import itemgetter
- from heapq import nlargest
-except ImportError:
- pass
-
-########################################################################
-### Counter
-########################################################################
-
-def _count_elements(mapping, iterable):
- 'Tally elements from the iterable.'
- mapping_get = mapping.get
- for elem in iterable:
- mapping[elem] = mapping_get(elem, 0) + 1
-
-class Counter(dict):
- '''Dict subclass for counting hashable items. Sometimes called a bag
- or multiset. Elements are stored as dictionary keys and their counts
- are stored as dictionary values.
-
- >>> c = Counter('abcdeabcdabcaba') # count elements from a string
-
- >>> c.most_common(3) # three most common elements
- [('a', 5), ('b', 4), ('c', 3)]
- >>> sorted(c) # list all unique elements
- ['a', 'b', 'c', 'd', 'e']
- >>> ''.join(sorted(c.elements())) # list elements with repetitions
- 'aaaaabbbbcccdde'
- >>> sum(c.values()) # total of all counts
- 15
-
- >>> c['a'] # count of letter 'a'
- 5
- >>> for elem in 'shazam': # update counts from an iterable
- ... c[elem] += 1 # by adding 1 to each element's count
- >>> c['a'] # now there are seven 'a'
- 7
- >>> del c['b'] # remove all 'b'
- >>> c['b'] # now there are zero 'b'
- 0
-
- >>> d = Counter('simsalabim') # make another counter
- >>> c.update(d) # add in the second counter
- >>> c['a'] # now there are nine 'a'
- 9
-
- >>> c.clear() # empty the counter
- >>> c
- Counter()
-
- Note: If a count is set to zero or reduced to zero, it will remain
- in the counter until the entry is deleted or the counter is cleared:
-
- >>> c = Counter('aaabbc')
- >>> c['b'] -= 2 # reduce the count of 'b' by two
- >>> c.most_common() # 'b' is still in, but its count is zero
- [('a', 3), ('c', 1), ('b', 0)]
-
- '''
- # References:
- # http://en.wikipedia.org/wiki/Multiset
- # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
- # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
- # http://code.activestate.com/recipes/259174/
- # Knuth, TAOCP Vol. II section 4.6.3
-
- def __init__(*args, **kwds):
- '''Create a new, empty Counter object. And if given, count elements
- from an input iterable. Or, initialize the count from another mapping
- of elements to their counts.
-
- >>> c = Counter() # a new, empty counter
- >>> c = Counter('gallahad') # a new counter from an iterable
- >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
- >>> c = Counter(a=4, b=2) # a new counter from keyword args
-
- '''
- if not args:
- raise TypeError("descriptor '__init__' of 'Counter' object "
- "needs an argument")
- self = args[0]
- args = args[1:]
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- super(Counter, self).__init__()
- self.update(*args, **kwds)
-
- def __missing__(self, key):
- 'The count of elements not in the Counter is zero.'
- # Needed so that self[missing_item] does not raise KeyError
- return 0
-
- def most_common(self, n=None):
- '''List the n most common elements and their counts from the most
- common to the least. If n is None, then list all element counts.
-
- >>> Counter('abcdeabcdabcaba').most_common(3)
- [('a', 5), ('b', 4), ('c', 3)]
-
- '''
- # Emulate Bag.sortedByCount from Smalltalk
- if n is None:
- return sorted(self.items(), key=_itemgetter(1), reverse=True)
- return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
-
- def elements(self):
- '''Iterator over elements repeating each as many times as its count.
-
- >>> c = Counter('ABCABC')
- >>> sorted(c.elements())
- ['A', 'A', 'B', 'B', 'C', 'C']
-
- # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
- >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
- >>> product = 1
- >>> for factor in prime_factors.elements(): # loop over factors
- ... product *= factor # and multiply them
- >>> product
- 1836
-
- Note, if an element's count has been set to zero or is a negative
- number, elements() will ignore it.
-
- '''
- # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
- return _chain.from_iterable(_starmap(_repeat, self.items()))
-
- # Override dict methods where necessary
-
- @classmethod
- def fromkeys(cls, iterable, v=None):
- # There is no equivalent method for counters because setting v=1
- # means that no element can have a count greater than one.
- raise NotImplementedError(
- 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
-
- def update(*args, **kwds):
- '''Like dict.update() but add counts instead of replacing them.
-
- Source can be an iterable, a dictionary, or another Counter instance.
-
- >>> c = Counter('which')
- >>> c.update('witch') # add elements from another iterable
- >>> d = Counter('watch')
- >>> c.update(d) # add elements from another counter
- >>> c['h'] # four 'h' in which, witch, and watch
- 4
-
- '''
- # The regular dict.update() operation makes no sense here because the
- # replace behavior results in the some of original untouched counts
- # being mixed-in with all of the other counts for a mismash that
- # doesn't have a straight-forward interpretation in most counting
- # contexts. Instead, we implement straight-addition. Both the inputs
- # and outputs are allowed to contain zero and negative counts.
-
- if not args:
- raise TypeError("descriptor 'update' of 'Counter' object "
- "needs an argument")
- self = args[0]
- args = args[1:]
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- iterable = args[0] if args else None
- if iterable is not None:
- if isinstance(iterable, Mapping):
- if self:
- self_get = self.get
- for elem, count in iterable.items():
- self[elem] = count + self_get(elem, 0)
- else:
- super(Counter, self).update(iterable) # fast path when counter is empty
- else:
- _count_elements(self, iterable)
- if kwds:
- self.update(kwds)
-
- def subtract(*args, **kwds):
- '''Like dict.update() but subtracts counts instead of replacing them.
- Counts can be reduced below zero. Both the inputs and outputs are
- allowed to contain zero and negative counts.
-
- Source can be an iterable, a dictionary, or another Counter instance.
-
- >>> c = Counter('which')
- >>> c.subtract('witch') # subtract elements from another iterable
- >>> c.subtract(Counter('watch')) # subtract elements from another counter
- >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
- 0
- >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
- -1
-
- '''
- if not args:
- raise TypeError("descriptor 'subtract' of 'Counter' object "
- "needs an argument")
- self = args[0]
- args = args[1:]
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- iterable = args[0] if args else None
- if iterable is not None:
- self_get = self.get
- if isinstance(iterable, Mapping):
- for elem, count in iterable.items():
- self[elem] = self_get(elem, 0) - count
- else:
- for elem in iterable:
- self[elem] = self_get(elem, 0) - 1
- if kwds:
- self.subtract(kwds)
-
- def copy(self):
- 'Return a shallow copy.'
- return self.__class__(self)
-
- def __reduce__(self):
- return self.__class__, (dict(self),)
-
- def __delitem__(self, elem):
- 'Like dict.__delitem__() but does not raise KeyError for missing values.'
- if elem in self:
- super(Counter, self).__delitem__(elem)
-
- def __repr__(self):
- if not self:
- return '%s()' % self.__class__.__name__
- try:
- items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
- return '%s({%s})' % (self.__class__.__name__, items)
- except TypeError:
- # handle case where values are not orderable
- return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
-
- # Multiset-style mathematical operations discussed in:
- # Knuth TAOCP Volume II section 4.6.3 exercise 19
- # and at http://en.wikipedia.org/wiki/Multiset
- #
- # Outputs guaranteed to only include positive counts.
- #
- # To strip negative and zero counts, add-in an empty counter:
- # c += Counter()
-
- def __add__(self, other):
- '''Add counts from two counters.
-
- >>> Counter('abbb') + Counter('bcc')
- Counter({'b': 4, 'c': 2, 'a': 1})
-
- '''
- if not isinstance(other, Counter):
- return NotImplemented
- result = Counter()
- for elem, count in self.items():
- newcount = count + other[elem]
- if newcount > 0:
- result[elem] = newcount
- for elem, count in other.items():
- if elem not in self and count > 0:
- result[elem] = count
- return result
-
- def __sub__(self, other):
- ''' Subtract count, but keep only results with positive counts.
-
- >>> Counter('abbbc') - Counter('bccd')
- Counter({'b': 2, 'a': 1})
-
- '''
- if not isinstance(other, Counter):
- return NotImplemented
- result = Counter()
- for elem, count in self.items():
- newcount = count - other[elem]
- if newcount > 0:
- result[elem] = newcount
- for elem, count in other.items():
- if elem not in self and count < 0:
- result[elem] = 0 - count
- return result
-
- def __or__(self, other):
- '''Union is the maximum of value in either of the input counters.
-
- >>> Counter('abbb') | Counter('bcc')
- Counter({'b': 3, 'c': 2, 'a': 1})
-
- '''
- if not isinstance(other, Counter):
- return NotImplemented
- result = Counter()
- for elem, count in self.items():
- other_count = other[elem]
- newcount = other_count if count < other_count else count
- if newcount > 0:
- result[elem] = newcount
- for elem, count in other.items():
- if elem not in self and count > 0:
- result[elem] = count
- return result
-
- def __and__(self, other):
- ''' Intersection is the minimum of corresponding counts.
-
- >>> Counter('abbb') & Counter('bcc')
- Counter({'b': 1})
-
- '''
- if not isinstance(other, Counter):
- return NotImplemented
- result = Counter()
- for elem, count in self.items():
- other_count = other[elem]
- newcount = count if count < other_count else other_count
- if newcount > 0:
- result[elem] = newcount
- return result
-
- def __pos__(self):
- 'Adds an empty counter, effectively stripping negative and zero counts'
- return self + Counter()
-
- def __neg__(self):
- '''Subtracts from an empty counter. Strips positive and zero counts,
- and flips the sign on negative counts.
-
- '''
- return Counter() - self
-
- def _keep_positive(self):
- '''Internal method to strip elements with a negative or zero count'''
- nonpositive = [elem for elem, count in self.items() if not count > 0]
- for elem in nonpositive:
- del self[elem]
- return self
-
- def __iadd__(self, other):
- '''Inplace add from another counter, keeping only positive counts.
-
- >>> c = Counter('abbb')
- >>> c += Counter('bcc')
- >>> c
- Counter({'b': 4, 'c': 2, 'a': 1})
-
- '''
- for elem, count in other.items():
- self[elem] += count
- return self._keep_positive()
-
- def __isub__(self, other):
- '''Inplace subtract counter, but keep only results with positive counts.
-
- >>> c = Counter('abbbc')
- >>> c -= Counter('bccd')
- >>> c
- Counter({'b': 2, 'a': 1})
-
- '''
- for elem, count in other.items():
- self[elem] -= count
- return self._keep_positive()
-
- def __ior__(self, other):
- '''Inplace union is the maximum of value from either counter.
-
- >>> c = Counter('abbb')
- >>> c |= Counter('bcc')
- >>> c
- Counter({'b': 3, 'c': 2, 'a': 1})
-
- '''
- for elem, other_count in other.items():
- count = self[elem]
- if other_count > count:
- self[elem] = other_count
- return self._keep_positive()
-
- def __iand__(self, other):
- '''Inplace intersection is the minimum of corresponding counts.
-
- >>> c = Counter('abbb')
- >>> c &= Counter('bcc')
- >>> c
- Counter({'b': 1})
-
- '''
- for elem, count in self.items():
- other_count = other[elem]
- if other_count < count:
- self[elem] = other_count
- return self._keep_positive()
-
-
-def check_output(*popenargs, **kwargs):
- """
- For Python 2.6 compatibility: see
- http://stackoverflow.com/questions/4814970/
- """
-
- if 'stdout' in kwargs:
- raise ValueError('stdout argument not allowed, it will be overridden.')
- process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
- output, unused_err = process.communicate()
- retcode = process.poll()
- if retcode:
- cmd = kwargs.get("args")
- if cmd is None:
- cmd = popenargs[0]
- raise subprocess.CalledProcessError(retcode, cmd)
- return output
-
-
-def count(start=0, step=1):
- """
- ``itertools.count`` in Py 2.6 doesn't accept a step
- parameter. This is an enhanced version of ``itertools.count``
- for Py2.6 equivalent to ``itertools.count`` in Python 2.7+.
- """
- while True:
- yield start
- start += step
-
-
-########################################################################
-### ChainMap (helper for configparser and string.Template)
-### From the Py3.4 source code. See also:
-### https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py
-########################################################################
-
-class ChainMap(MutableMapping):
- ''' A ChainMap groups multiple dicts (or other mappings) together
- to create a single, updateable view.
-
- The underlying mappings are stored in a list. That list is public and can
- accessed or updated using the *maps* attribute. There is no other state.
-
- Lookups search the underlying mappings successively until a key is found.
- In contrast, writes, updates, and deletions only operate on the first
- mapping.
-
- '''
-
- def __init__(self, *maps):
- '''Initialize a ChainMap by setting *maps* to the given mappings.
- If no mappings are provided, a single empty dictionary is used.
-
- '''
- self.maps = list(maps) or [{}] # always at least one map
-
- def __missing__(self, key):
- raise KeyError(key)
-
- def __getitem__(self, key):
- for mapping in self.maps:
- try:
- return mapping[key] # can't use 'key in mapping' with defaultdict
- except KeyError:
- pass
- return self.__missing__(key) # support subclasses that define __missing__
-
- def get(self, key, default=None):
- return self[key] if key in self else default
-
- def __len__(self):
- return len(set().union(*self.maps)) # reuses stored hash values if possible
-
- def __iter__(self):
- return iter(set().union(*self.maps))
-
- def __contains__(self, key):
- return any(key in m for m in self.maps)
-
- def __bool__(self):
- return any(self.maps)
-
- # Py2 compatibility:
- __nonzero__ = __bool__
-
- @recursive_repr()
- def __repr__(self):
- return '{0.__class__.__name__}({1})'.format(
- self, ', '.join(map(repr, self.maps)))
-
- @classmethod
- def fromkeys(cls, iterable, *args):
- 'Create a ChainMap with a single dict created from the iterable.'
- return cls(dict.fromkeys(iterable, *args))
-
- def copy(self):
- 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
- return self.__class__(self.maps[0].copy(), *self.maps[1:])
-
- __copy__ = copy
-
- def new_child(self, m=None): # like Django's Context.push()
- '''
- New ChainMap with a new map followed by all previous maps. If no
- map is provided, an empty dict is used.
- '''
- if m is None:
- m = {}
- return self.__class__(m, *self.maps)
-
- @property
- def parents(self): # like Django's Context.pop()
- 'New ChainMap from maps[1:].'
- return self.__class__(*self.maps[1:])
-
- def __setitem__(self, key, value):
- self.maps[0][key] = value
-
- def __delitem__(self, key):
- try:
- del self.maps[0][key]
- except KeyError:
+
+
+def ceil(x):
+ """
+ Return the ceiling of x as an int.
+ This is the smallest integral value >= x.
+ """
+ return int(oldceil(x))
+
+
+########################################################################
+### reprlib.recursive_repr decorator from Py3.4
+########################################################################
+
+from itertools import islice
+
+if PY3:
+ try:
+ from _thread import get_ident
+ except ImportError:
+ from _dummy_thread import get_ident
+else:
+ try:
+ from thread import get_ident
+ except ImportError:
+ from dummy_thread import get_ident
+
+
+def recursive_repr(fillvalue='...'):
+ 'Decorator to make a repr function return fillvalue for a recursive call'
+
+ def decorating_function(user_function):
+ repr_running = set()
+
+ def wrapper(self):
+ key = id(self), get_ident()
+ if key in repr_running:
+ return fillvalue
+ repr_running.add(key)
+ try:
+ result = user_function(self)
+ finally:
+ repr_running.discard(key)
+ return result
+
+ # Can't use functools.wraps() here because of bootstrap issues
+ wrapper.__module__ = getattr(user_function, '__module__')
+ wrapper.__doc__ = getattr(user_function, '__doc__')
+ wrapper.__name__ = getattr(user_function, '__name__')
+ wrapper.__annotations__ = getattr(user_function, '__annotations__', {})
+ return wrapper
+
+ return decorating_function
+
+
+################################################################################
+### OrderedDict
+################################################################################
+
+class _Link(object):
+ __slots__ = 'prev', 'next', 'key', '__weakref__'
+
+class OrderedDict(dict):
+ 'Dictionary that remembers insertion order'
+ # An inherited dict maps keys to values.
+ # The inherited dict provides __getitem__, __len__, __contains__, and get.
+ # The remaining methods are order-aware.
+ # Big-O running times for all methods are the same as regular dictionaries.
+
+ # The internal self.__map dict maps keys to links in a doubly linked list.
+ # The circular doubly linked list starts and ends with a sentinel element.
+ # The sentinel element never gets deleted (this simplifies the algorithm).
+ # The sentinel is in self.__hardroot with a weakref proxy in self.__root.
+ # The prev links are weakref proxies (to prevent circular references).
+ # Individual links are kept alive by the hard reference in self.__map.
+ # Those hard references disappear when a key is deleted from an OrderedDict.
+
+ def __init__(*args, **kwds):
+ '''Initialize an ordered dictionary. The signature is the same as
+ regular dictionaries, but keyword arguments are not recommended because
+ their insertion order is arbitrary.
+
+ '''
+ if not args:
+ raise TypeError("descriptor '__init__' of 'OrderedDict' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ try:
+ self.__root
+ except AttributeError:
+ self.__hardroot = _Link()
+ self.__root = root = _proxy(self.__hardroot)
+ root.prev = root.next = root
+ self.__map = {}
+ self.__update(*args, **kwds)
+
+ def __setitem__(self, key, value,
+ dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
+ 'od.__setitem__(i, y) <==> od[i]=y'
+ # Setting a new item creates a new link at the end of the linked list,
+ # and the inherited dictionary is updated with the new key/value pair.
+ if key not in self:
+ self.__map[key] = link = Link()
+ root = self.__root
+ last = root.prev
+ link.prev, link.next, link.key = last, root, key
+ last.next = link
+ root.prev = proxy(link)
+ dict_setitem(self, key, value)
+
+ def __delitem__(self, key, dict_delitem=dict.__delitem__):
+ 'od.__delitem__(y) <==> del od[y]'
+ # Deleting an existing item uses self.__map to find the link which gets
+ # removed by updating the links in the predecessor and successor nodes.
+ dict_delitem(self, key)
+ link = self.__map.pop(key)
+ link_prev = link.prev
+ link_next = link.next
+ link_prev.next = link_next
+ link_next.prev = link_prev
+
+ def __iter__(self):
+ 'od.__iter__() <==> iter(od)'
+ # Traverse the linked list in order.
+ root = self.__root
+ curr = root.next
+ while curr is not root:
+ yield curr.key
+ curr = curr.next
+
+ def __reversed__(self):
+ 'od.__reversed__() <==> reversed(od)'
+ # Traverse the linked list in reverse order.
+ root = self.__root
+ curr = root.prev
+ while curr is not root:
+ yield curr.key
+ curr = curr.prev
+
+ def clear(self):
+ 'od.clear() -> None. Remove all items from od.'
+ root = self.__root
+ root.prev = root.next = root
+ self.__map.clear()
+ dict.clear(self)
+
+ def popitem(self, last=True):
+ '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+ Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+ '''
+ if not self:
+ raise KeyError('dictionary is empty')
+ root = self.__root
+ if last:
+ link = root.prev
+ link_prev = link.prev
+ link_prev.next = root
+ root.prev = link_prev
+ else:
+ link = root.next
+ link_next = link.next
+ root.next = link_next
+ link_next.prev = root
+ key = link.key
+ del self.__map[key]
+ value = dict.pop(self, key)
+ return key, value
+
+ def move_to_end(self, key, last=True):
+ '''Move an existing element to the end (or beginning if last==False).
+
+ Raises KeyError if the element does not exist.
+ When last=True, acts like a fast version of self[key]=self.pop(key).
+
+ '''
+ link = self.__map[key]
+ link_prev = link.prev
+ link_next = link.next
+ link_prev.next = link_next
+ link_next.prev = link_prev
+ root = self.__root
+ if last:
+ last = root.prev
+ link.prev = last
+ link.next = root
+ last.next = root.prev = link
+ else:
+ first = root.next
+ link.prev = root
+ link.next = first
+ root.next = first.prev = link
+
+ def __sizeof__(self):
+ sizeof = sys.getsizeof
+ n = len(self) + 1 # number of links including root
+ size = sizeof(self.__dict__) # instance dictionary
+ size += sizeof(self.__map) * 2 # internal dict and inherited dict
+ size += sizeof(self.__hardroot) * n # link objects
+ size += sizeof(self.__root) * n # proxy objects
+ return size
+
+ update = __update = MutableMapping.update
+ keys = MutableMapping.keys
+ values = MutableMapping.values
+ items = MutableMapping.items
+ __ne__ = MutableMapping.__ne__
+
+ __marker = object()
+
+ def pop(self, key, default=__marker):
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
+ value. If key is not found, d is returned if given, otherwise KeyError
+ is raised.
+
+ '''
+ if key in self:
+ result = self[key]
+ del self[key]
+ return result
+ if default is self.__marker:
+ raise KeyError(key)
+ return default
+
+ def setdefault(self, key, default=None):
+ 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+ if key in self:
+ return self[key]
+ self[key] = default
+ return default
+
+ @recursive_repr()
+ def __repr__(self):
+ 'od.__repr__() <==> repr(od)'
+ if not self:
+ return '%s()' % (self.__class__.__name__,)
+ return '%s(%r)' % (self.__class__.__name__, list(self.items()))
+
+ def __reduce__(self):
+ 'Return state information for pickling'
+ inst_dict = vars(self).copy()
+ for k in vars(OrderedDict()):
+ inst_dict.pop(k, None)
+ return self.__class__, (), inst_dict or None, None, iter(self.items())
+
+ def copy(self):
+ 'od.copy() -> a shallow copy of od'
+ return self.__class__(self)
+
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
+ If not specified, the value defaults to None.
+
+ '''
+ self = cls()
+ for key in iterable:
+ self[key] = value
+ return self
+
+ def __eq__(self, other):
+ '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
+ while comparison to a regular mapping is order-insensitive.
+
+ '''
+ if isinstance(other, OrderedDict):
+ return dict.__eq__(self, other) and all(map(_eq, self, other))
+ return dict.__eq__(self, other)
+
+
+# {{{ http://code.activestate.com/recipes/576611/ (r11)
+
+try:
+ from operator import itemgetter
+ from heapq import nlargest
+except ImportError:
+ pass
+
+########################################################################
+### Counter
+########################################################################
+
+def _count_elements(mapping, iterable):
+ 'Tally elements from the iterable.'
+ mapping_get = mapping.get
+ for elem in iterable:
+ mapping[elem] = mapping_get(elem, 0) + 1
+
+class Counter(dict):
+ '''Dict subclass for counting hashable items. Sometimes called a bag
+ or multiset. Elements are stored as dictionary keys and their counts
+ are stored as dictionary values.
+
+ >>> c = Counter('abcdeabcdabcaba') # count elements from a string
+
+ >>> c.most_common(3) # three most common elements
+ [('a', 5), ('b', 4), ('c', 3)]
+ >>> sorted(c) # list all unique elements
+ ['a', 'b', 'c', 'd', 'e']
+ >>> ''.join(sorted(c.elements())) # list elements with repetitions
+ 'aaaaabbbbcccdde'
+ >>> sum(c.values()) # total of all counts
+ 15
+
+ >>> c['a'] # count of letter 'a'
+ 5
+ >>> for elem in 'shazam': # update counts from an iterable
+ ... c[elem] += 1 # by adding 1 to each element's count
+ >>> c['a'] # now there are seven 'a'
+ 7
+ >>> del c['b'] # remove all 'b'
+ >>> c['b'] # now there are zero 'b'
+ 0
+
+ >>> d = Counter('simsalabim') # make another counter
+ >>> c.update(d) # add in the second counter
+ >>> c['a'] # now there are nine 'a'
+ 9
+
+ >>> c.clear() # empty the counter
+ >>> c
+ Counter()
+
+ Note: If a count is set to zero or reduced to zero, it will remain
+ in the counter until the entry is deleted or the counter is cleared:
+
+ >>> c = Counter('aaabbc')
+ >>> c['b'] -= 2 # reduce the count of 'b' by two
+ >>> c.most_common() # 'b' is still in, but its count is zero
+ [('a', 3), ('c', 1), ('b', 0)]
+
+ '''
+ # References:
+ # http://en.wikipedia.org/wiki/Multiset
+ # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+ # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+ # http://code.activestate.com/recipes/259174/
+ # Knuth, TAOCP Vol. II section 4.6.3
+
+ def __init__(*args, **kwds):
+ '''Create a new, empty Counter object. And if given, count elements
+ from an input iterable. Or, initialize the count from another mapping
+ of elements to their counts.
+
+ >>> c = Counter() # a new, empty counter
+ >>> c = Counter('gallahad') # a new counter from an iterable
+ >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+ >>> c = Counter(a=4, b=2) # a new counter from keyword args
+
+ '''
+ if not args:
+ raise TypeError("descriptor '__init__' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ super(Counter, self).__init__()
+ self.update(*args, **kwds)
+
+ def __missing__(self, key):
+ 'The count of elements not in the Counter is zero.'
+ # Needed so that self[missing_item] does not raise KeyError
+ return 0
+
+ def most_common(self, n=None):
+ '''List the n most common elements and their counts from the most
+ common to the least. If n is None, then list all element counts.
+
+ >>> Counter('abcdeabcdabcaba').most_common(3)
+ [('a', 5), ('b', 4), ('c', 3)]
+
+ '''
+ # Emulate Bag.sortedByCount from Smalltalk
+ if n is None:
+ return sorted(self.items(), key=_itemgetter(1), reverse=True)
+ return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
+
+ def elements(self):
+ '''Iterator over elements repeating each as many times as its count.
+
+ >>> c = Counter('ABCABC')
+ >>> sorted(c.elements())
+ ['A', 'A', 'B', 'B', 'C', 'C']
+
+ # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
+ >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
+ >>> product = 1
+ >>> for factor in prime_factors.elements(): # loop over factors
+ ... product *= factor # and multiply them
+ >>> product
+ 1836
+
+ Note, if an element's count has been set to zero or is a negative
+ number, elements() will ignore it.
+
+ '''
+ # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+ return _chain.from_iterable(_starmap(_repeat, self.items()))
+
+ # Override dict methods where necessary
+
+ @classmethod
+ def fromkeys(cls, iterable, v=None):
+ # There is no equivalent method for counters because setting v=1
+ # means that no element can have a count greater than one.
+ raise NotImplementedError(
+ 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
+
+ def update(*args, **kwds):
+ '''Like dict.update() but add counts instead of replacing them.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.update('witch') # add elements from another iterable
+ >>> d = Counter('watch')
+ >>> c.update(d) # add elements from another counter
+ >>> c['h'] # four 'h' in which, witch, and watch
+ 4
+
+ '''
+ # The regular dict.update() operation makes no sense here because the
+ # replace behavior results in the some of original untouched counts
+ # being mixed-in with all of the other counts for a mismash that
+ # doesn't have a straight-forward interpretation in most counting
+ # contexts. Instead, we implement straight-addition. Both the inputs
+ # and outputs are allowed to contain zero and negative counts.
+
+ if not args:
+ raise TypeError("descriptor 'update' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ iterable = args[0] if args else None
+ if iterable is not None:
+ if isinstance(iterable, Mapping):
+ if self:
+ self_get = self.get
+ for elem, count in iterable.items():
+ self[elem] = count + self_get(elem, 0)
+ else:
+ super(Counter, self).update(iterable) # fast path when counter is empty
+ else:
+ _count_elements(self, iterable)
+ if kwds:
+ self.update(kwds)
+
+ def subtract(*args, **kwds):
+ '''Like dict.update() but subtracts counts instead of replacing them.
+ Counts can be reduced below zero. Both the inputs and outputs are
+ allowed to contain zero and negative counts.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.subtract('witch') # subtract elements from another iterable
+ >>> c.subtract(Counter('watch')) # subtract elements from another counter
+ >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
+ 0
+ >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
+ -1
+
+ '''
+ if not args:
+ raise TypeError("descriptor 'subtract' of 'Counter' object "
+ "needs an argument")
+ self = args[0]
+ args = args[1:]
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ iterable = args[0] if args else None
+ if iterable is not None:
+ self_get = self.get
+ if isinstance(iterable, Mapping):
+ for elem, count in iterable.items():
+ self[elem] = self_get(elem, 0) - count
+ else:
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) - 1
+ if kwds:
+ self.subtract(kwds)
+
+ def copy(self):
+ 'Return a shallow copy.'
+ return self.__class__(self)
+
+ def __reduce__(self):
+ return self.__class__, (dict(self),)
+
+ def __delitem__(self, elem):
+ 'Like dict.__delitem__() but does not raise KeyError for missing values.'
+ if elem in self:
+ super(Counter, self).__delitem__(elem)
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % self.__class__.__name__
+ try:
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+ except TypeError:
+ # handle case where values are not orderable
+ return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
+
+ # Multiset-style mathematical operations discussed in:
+ # Knuth TAOCP Volume II section 4.6.3 exercise 19
+ # and at http://en.wikipedia.org/wiki/Multiset
+ #
+ # Outputs guaranteed to only include positive counts.
+ #
+ # To strip negative and zero counts, add-in an empty counter:
+ # c += Counter()
+
+ def __add__(self, other):
+ '''Add counts from two counters.
+
+ >>> Counter('abbb') + Counter('bcc')
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count + other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __sub__(self, other):
+ ''' Subtract count, but keep only results with positive counts.
+
+ >>> Counter('abbbc') - Counter('bccd')
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count - other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count < 0:
+ result[elem] = 0 - count
+ return result
+
+ def __or__(self, other):
+ '''Union is the maximum of value in either of the input counters.
+
+ >>> Counter('abbb') | Counter('bcc')
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = other_count if count < other_count else count
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __and__(self, other):
+ ''' Intersection is the minimum of corresponding counts.
+
+ >>> Counter('abbb') & Counter('bcc')
+ Counter({'b': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = count if count < other_count else other_count
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __pos__(self):
+ 'Adds an empty counter, effectively stripping negative and zero counts'
+ return self + Counter()
+
+ def __neg__(self):
+ '''Subtracts from an empty counter. Strips positive and zero counts,
+ and flips the sign on negative counts.
+
+ '''
+ return Counter() - self
+
+ def _keep_positive(self):
+ '''Internal method to strip elements with a negative or zero count'''
+ nonpositive = [elem for elem, count in self.items() if not count > 0]
+ for elem in nonpositive:
+ del self[elem]
+ return self
+
+ def __iadd__(self, other):
+ '''Inplace add from another counter, keeping only positive counts.
+
+ >>> c = Counter('abbb')
+ >>> c += Counter('bcc')
+ >>> c
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] += count
+ return self._keep_positive()
+
+ def __isub__(self, other):
+ '''Inplace subtract counter, but keep only results with positive counts.
+
+ >>> c = Counter('abbbc')
+ >>> c -= Counter('bccd')
+ >>> c
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] -= count
+ return self._keep_positive()
+
+ def __ior__(self, other):
+ '''Inplace union is the maximum of value from either counter.
+
+ >>> c = Counter('abbb')
+ >>> c |= Counter('bcc')
+ >>> c
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ for elem, other_count in other.items():
+ count = self[elem]
+ if other_count > count:
+ self[elem] = other_count
+ return self._keep_positive()
+
+ def __iand__(self, other):
+ '''Inplace intersection is the minimum of corresponding counts.
+
+ >>> c = Counter('abbb')
+ >>> c &= Counter('bcc')
+ >>> c
+ Counter({'b': 1})
+
+ '''
+ for elem, count in self.items():
+ other_count = other[elem]
+ if other_count < count:
+ self[elem] = other_count
+ return self._keep_positive()
+
+
+def check_output(*popenargs, **kwargs):
+ """
+ For Python 2.6 compatibility: see
+ http://stackoverflow.com/questions/4814970/
+ """
+
+ if 'stdout' in kwargs:
+ raise ValueError('stdout argument not allowed, it will be overridden.')
+ process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+ output, unused_err = process.communicate()
+ retcode = process.poll()
+ if retcode:
+ cmd = kwargs.get("args")
+ if cmd is None:
+ cmd = popenargs[0]
+ raise subprocess.CalledProcessError(retcode, cmd)
+ return output
+
+
+def count(start=0, step=1):
+ """
+ ``itertools.count`` in Py 2.6 doesn't accept a step
+ parameter. This is an enhanced version of ``itertools.count``
+ for Py2.6 equivalent to ``itertools.count`` in Python 2.7+.
+ """
+ while True:
+ yield start
+ start += step
+
+
+########################################################################
+### ChainMap (helper for configparser and string.Template)
+### From the Py3.4 source code. See also:
+### https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py
+########################################################################
+
+class ChainMap(MutableMapping):
+ ''' A ChainMap groups multiple dicts (or other mappings) together
+ to create a single, updateable view.
+
+ The underlying mappings are stored in a list. That list is public and can
+ accessed or updated using the *maps* attribute. There is no other state.
+
+ Lookups search the underlying mappings successively until a key is found.
+ In contrast, writes, updates, and deletions only operate on the first
+ mapping.
+
+ '''
+
+ def __init__(self, *maps):
+ '''Initialize a ChainMap by setting *maps* to the given mappings.
+ If no mappings are provided, a single empty dictionary is used.
+
+ '''
+ self.maps = list(maps) or [{}] # always at least one map
+
+ def __missing__(self, key):
+ raise KeyError(key)
+
+ def __getitem__(self, key):
+ for mapping in self.maps:
+ try:
+ return mapping[key] # can't use 'key in mapping' with defaultdict
+ except KeyError:
+ pass
+ return self.__missing__(key) # support subclasses that define __missing__
+
+ def get(self, key, default=None):
+ return self[key] if key in self else default
+
+ def __len__(self):
+ return len(set().union(*self.maps)) # reuses stored hash values if possible
+
+ def __iter__(self):
+ return iter(set().union(*self.maps))
+
+ def __contains__(self, key):
+ return any(key in m for m in self.maps)
+
+ def __bool__(self):
+ return any(self.maps)
+
+ # Py2 compatibility:
+ __nonzero__ = __bool__
+
+ @recursive_repr()
+ def __repr__(self):
+ return '{0.__class__.__name__}({1})'.format(
+ self, ', '.join(map(repr, self.maps)))
+
+ @classmethod
+ def fromkeys(cls, iterable, *args):
+ 'Create a ChainMap with a single dict created from the iterable.'
+ return cls(dict.fromkeys(iterable, *args))
+
+ def copy(self):
+ 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
+ return self.__class__(self.maps[0].copy(), *self.maps[1:])
+
+ __copy__ = copy
+
+ def new_child(self, m=None): # like Django's Context.push()
+ '''
+ New ChainMap with a new map followed by all previous maps. If no
+ map is provided, an empty dict is used.
+ '''
+ if m is None:
+ m = {}
+ return self.__class__(m, *self.maps)
+
+ @property
+ def parents(self): # like Django's Context.pop()
+ 'New ChainMap from maps[1:].'
+ return self.__class__(*self.maps[1:])
+
+ def __setitem__(self, key, value):
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ try:
+ del self.maps[0][key]
+ except KeyError:
raise KeyError('Key not found in the first mapping: {0!r}'.format(key))
-
- def popitem(self):
- 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
- try:
- return self.maps[0].popitem()
- except KeyError:
- raise KeyError('No keys found in the first mapping.')
-
- def pop(self, key, *args):
- 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
- try:
- return self.maps[0].pop(key, *args)
- except KeyError:
+
+ def popitem(self):
+ 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
+ try:
+ return self.maps[0].popitem()
+ except KeyError:
+ raise KeyError('No keys found in the first mapping.')
+
+ def pop(self, key, *args):
+ 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
+ try:
+ return self.maps[0].pop(key, *args)
+ except KeyError:
raise KeyError('Key not found in the first mapping: {0!r}'.format(key))
-
- def clear(self):
- 'Clear maps[0], leaving maps[1:] intact.'
- self.maps[0].clear()
-
-
-# Re-use the same sentinel as in the Python stdlib socket module:
-from socket import _GLOBAL_DEFAULT_TIMEOUT
-# Was: _GLOBAL_DEFAULT_TIMEOUT = object()
-
-
-def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
- source_address=None):
- """Backport of 3-argument create_connection() for Py2.6.
-
- Connect to *address* and return the socket object.
-
- Convenience function. Connect to *address* (a 2-tuple ``(host,
- port)``) and return the socket object. Passing the optional
- *timeout* parameter will set the timeout on the socket instance
- before attempting to connect. If no *timeout* is supplied, the
- global default timeout setting returned by :func:`getdefaulttimeout`
- is used. If *source_address* is set it must be a tuple of (host, port)
- for the socket to bind as a source address before making the connection.
- An host of '' or port 0 tells the OS to use the default.
- """
-
- host, port = address
- err = None
- for res in getaddrinfo(host, port, 0, SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket(af, socktype, proto)
- if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect(sa)
- return sock
-
- except error as _:
- err = _
- if sock is not None:
- sock.close()
-
- if err is not None:
- raise err
- else:
- raise error("getaddrinfo returns an empty list")
-
-# Backport from Py2.7 for Py2.6:
-def cmp_to_key(mycmp):
- """Convert a cmp= function into a key= function"""
- class K(object):
- __slots__ = ['obj']
- def __init__(self, obj, *args):
- self.obj = obj
- def __lt__(self, other):
- return mycmp(self.obj, other.obj) < 0
- def __gt__(self, other):
- return mycmp(self.obj, other.obj) > 0
- def __eq__(self, other):
- return mycmp(self.obj, other.obj) == 0
- def __le__(self, other):
- return mycmp(self.obj, other.obj) <= 0
- def __ge__(self, other):
- return mycmp(self.obj, other.obj) >= 0
- def __ne__(self, other):
- return mycmp(self.obj, other.obj) != 0
- def __hash__(self):
- raise TypeError('hash not implemented')
- return K
-
-# Back up our definitions above in case they're useful
-_OrderedDict = OrderedDict
-_Counter = Counter
-_check_output = check_output
-_count = count
-_ceil = ceil
-__count_elements = _count_elements
-_recursive_repr = recursive_repr
-_ChainMap = ChainMap
-_create_connection = create_connection
-_cmp_to_key = cmp_to_key
-
-# Overwrite the definitions above with the usual ones
-# from the standard library:
-if sys.version_info >= (2, 7):
- from collections import OrderedDict, Counter
- from itertools import count
- from functools import cmp_to_key
- try:
- from subprocess import check_output
- except ImportError:
- # Not available. This happens with Google App Engine: see issue #231
- pass
- from socket import create_connection
-
-if sys.version_info >= (3, 0):
- from math import ceil
- from collections import _count_elements
-
-if sys.version_info >= (3, 3):
- from reprlib import recursive_repr
- from collections import ChainMap
+
+ def clear(self):
+ 'Clear maps[0], leaving maps[1:] intact.'
+ self.maps[0].clear()
+
+
+# Re-use the same sentinel as in the Python stdlib socket module:
+from socket import _GLOBAL_DEFAULT_TIMEOUT
+# Was: _GLOBAL_DEFAULT_TIMEOUT = object()
+
+
+def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
+ """Backport of 3-argument create_connection() for Py2.6.
+
+ Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ err = None
+ for res in getaddrinfo(host, port, 0, SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket(af, socktype, proto)
+ if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+
+ if err is not None:
+ raise err
+ else:
+ raise error("getaddrinfo returns an empty list")
+
+# Backport from Py2.7 for Py2.6:
+def cmp_to_key(mycmp):
+ """Convert a cmp= function into a key= function"""
+ class K(object):
+ __slots__ = ['obj']
+ def __init__(self, obj, *args):
+ self.obj = obj
+ def __lt__(self, other):
+ return mycmp(self.obj, other.obj) < 0
+ def __gt__(self, other):
+ return mycmp(self.obj, other.obj) > 0
+ def __eq__(self, other):
+ return mycmp(self.obj, other.obj) == 0
+ def __le__(self, other):
+ return mycmp(self.obj, other.obj) <= 0
+ def __ge__(self, other):
+ return mycmp(self.obj, other.obj) >= 0
+ def __ne__(self, other):
+ return mycmp(self.obj, other.obj) != 0
+ def __hash__(self):
+ raise TypeError('hash not implemented')
+ return K
+
+# Back up our definitions above in case they're useful
+_OrderedDict = OrderedDict
+_Counter = Counter
+_check_output = check_output
+_count = count
+_ceil = ceil
+__count_elements = _count_elements
+_recursive_repr = recursive_repr
+_ChainMap = ChainMap
+_create_connection = create_connection
+_cmp_to_key = cmp_to_key
+
+# Overwrite the definitions above with the usual ones
+# from the standard library:
+if sys.version_info >= (2, 7):
+ from collections import OrderedDict, Counter
+ from itertools import count
+ from functools import cmp_to_key
+ try:
+ from subprocess import check_output
+ except ImportError:
+ # Not available. This happens with Google App Engine: see issue #231
+ pass
+ from socket import create_connection
+
+if sys.version_info >= (3, 0):
+ from math import ceil
+ from collections import _count_elements
+
+if sys.version_info >= (3, 3):
+ from reprlib import recursive_repr
+ from collections import ChainMap
diff --git a/contrib/python/future/future/backports/socket.py b/contrib/python/future/future/backports/socket.py
index e6757ac7d0..eea20f7fd0 100644
--- a/contrib/python/future/future/backports/socket.py
+++ b/contrib/python/future/future/backports/socket.py
@@ -1,454 +1,454 @@
-# Wrapper module for _socket, providing some additional facilities
-# implemented in Python.
-
-"""\
-This module provides socket operations and some related functions.
-On Unix, it supports IP (Internet Protocol) and Unix domain sockets.
-On other systems, it only supports IP. Functions specific for a
-socket are available as methods of the socket object.
-
-Functions:
-
-socket() -- create a new socket object
-socketpair() -- create a pair of new socket objects [*]
-fromfd() -- create a socket object from an open file descriptor [*]
-fromshare() -- create a socket object from data received from socket.share() [*]
-gethostname() -- return the current hostname
-gethostbyname() -- map a hostname to its IP number
-gethostbyaddr() -- map an IP number or hostname to DNS info
-getservbyname() -- map a service name and a protocol name to a port number
-getprotobyname() -- map a protocol name (e.g. 'tcp') to a number
-ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order
-htons(), htonl() -- convert 16, 32 bit int from host to network byte order
-inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format
-inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89)
-socket.getdefaulttimeout() -- get the default timeout value
-socket.setdefaulttimeout() -- set the default timeout value
-create_connection() -- connects to an address, with an optional timeout and
- optional source address.
-
- [*] not available on all platforms!
-
-Special objects:
-
-SocketType -- type object for socket objects
-error -- exception raised for I/O errors
-has_ipv6 -- boolean value indicating if IPv6 is supported
-
-Integer constants:
-
-AF_INET, AF_UNIX -- socket domains (first argument to socket() call)
-SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument)
-
-Many other constants may be defined; these may be used in calls to
-the setsockopt() and getsockopt() methods.
-"""
-
-from __future__ import unicode_literals
-from __future__ import print_function
-from __future__ import division
-from __future__ import absolute_import
-from future.builtins import super
-
-import _socket
-from _socket import *
-
-import os, sys, io
-
-try:
- import errno
-except ImportError:
- errno = None
-EBADF = getattr(errno, 'EBADF', 9)
-EAGAIN = getattr(errno, 'EAGAIN', 11)
-EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11)
-
-__all__ = ["getfqdn", "create_connection"]
-__all__.extend(os._get_exports_list(_socket))
-
-
-_realsocket = socket
-
-# WSA error codes
-if sys.platform.lower().startswith("win"):
- errorTab = {}
- errorTab[10004] = "The operation was interrupted."
- errorTab[10009] = "A bad file handle was passed."
- errorTab[10013] = "Permission denied."
- errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT
- errorTab[10022] = "An invalid operation was attempted."
- errorTab[10035] = "The socket operation would block"
- errorTab[10036] = "A blocking operation is already in progress."
- errorTab[10048] = "The network address is in use."
- errorTab[10054] = "The connection has been reset."
- errorTab[10058] = "The network has been shut down."
- errorTab[10060] = "The operation timed out."
- errorTab[10061] = "Connection refused."
- errorTab[10063] = "The name is too long."
- errorTab[10064] = "The host is down."
- errorTab[10065] = "The host is unreachable."
- __all__.append("errorTab")
-
-
-class socket(_socket.socket):
-
- """A subclass of _socket.socket adding the makefile() method."""
-
- __slots__ = ["_io_refs", "_closed"]
-
- def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None):
- if fileno is None:
- _socket.socket.__init__(self, family, type, proto)
- else:
- _socket.socket.__init__(self, family, type, proto, fileno)
- self._io_refs = 0
- self._closed = False
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args):
- if not self._closed:
- self.close()
-
- def __repr__(self):
- """Wrap __repr__() to reveal the real class name."""
- s = _socket.socket.__repr__(self)
- if s.startswith("<socket object"):
- s = "<%s.%s%s%s" % (self.__class__.__module__,
- self.__class__.__name__,
- getattr(self, '_closed', False) and " [closed] " or "",
- s[7:])
- return s
-
- def __getstate__(self):
- raise TypeError("Cannot serialize socket object")
-
- def dup(self):
- """dup() -> socket object
-
- Return a new socket object connected to the same system resource.
- """
- fd = dup(self.fileno())
- sock = self.__class__(self.family, self.type, self.proto, fileno=fd)
- sock.settimeout(self.gettimeout())
- return sock
-
- def accept(self):
- """accept() -> (socket object, address info)
-
- Wait for an incoming connection. Return a new socket
- representing the connection, and the address of the client.
- For IP sockets, the address info is a pair (hostaddr, port).
- """
- fd, addr = self._accept()
- sock = socket(self.family, self.type, self.proto, fileno=fd)
- # Issue #7995: if no default timeout is set and the listening
- # socket had a (non-zero) timeout, force the new socket in blocking
- # mode to override platform-specific socket flags inheritance.
- if getdefaulttimeout() is None and self.gettimeout():
- sock.setblocking(True)
- return sock, addr
-
- def makefile(self, mode="r", buffering=None, **_3to2kwargs):
- """makefile(...) -> an I/O stream connected to the socket
-
- The arguments are as for io.open() after the filename,
- except the only mode characters supported are 'r', 'w' and 'b'.
- The semantics are similar too. (XXX refactor to share code?)
- """
- if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline']
- else: newline = None
- if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors']
- else: errors = None
- if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding']
- else: encoding = None
- for c in mode:
- if c not in ("r", "w", "b"):
- raise ValueError("invalid mode %r (only r, w, b allowed)")
- writing = "w" in mode
- reading = "r" in mode or not writing
- assert reading or writing
- binary = "b" in mode
- rawmode = ""
- if reading:
- rawmode += "r"
- if writing:
- rawmode += "w"
- raw = SocketIO(self, rawmode)
- self._io_refs += 1
- if buffering is None:
- buffering = -1
- if buffering < 0:
- buffering = io.DEFAULT_BUFFER_SIZE
- if buffering == 0:
- if not binary:
- raise ValueError("unbuffered streams must be binary")
- return raw
- if reading and writing:
- buffer = io.BufferedRWPair(raw, raw, buffering)
- elif reading:
- buffer = io.BufferedReader(raw, buffering)
- else:
- assert writing
- buffer = io.BufferedWriter(raw, buffering)
- if binary:
- return buffer
- text = io.TextIOWrapper(buffer, encoding, errors, newline)
- text.mode = mode
- return text
-
- def _decref_socketios(self):
- if self._io_refs > 0:
- self._io_refs -= 1
- if self._closed:
- self.close()
-
- def _real_close(self, _ss=_socket.socket):
- # This function should not reference any globals. See issue #808164.
- _ss.close(self)
-
- def close(self):
- # This function should not reference any globals. See issue #808164.
- self._closed = True
- if self._io_refs <= 0:
- self._real_close()
-
- def detach(self):
- """detach() -> file descriptor
-
- Close the socket object without closing the underlying file descriptor.
- The object cannot be used after this call, but the file descriptor
- can be reused for other purposes. The file descriptor is returned.
- """
- self._closed = True
- return super().detach()
-
-def fromfd(fd, family, type, proto=0):
- """ fromfd(fd, family, type[, proto]) -> socket object
-
- Create a socket object from a duplicate of the given file
- descriptor. The remaining arguments are the same as for socket().
- """
- nfd = dup(fd)
- return socket(family, type, proto, nfd)
-
-if hasattr(_socket.socket, "share"):
- def fromshare(info):
- """ fromshare(info) -> socket object
-
- Create a socket object from a the bytes object returned by
- socket.share(pid).
- """
- return socket(0, 0, 0, info)
-
-if hasattr(_socket, "socketpair"):
-
- def socketpair(family=None, type=SOCK_STREAM, proto=0):
- """socketpair([family[, type[, proto]]]) -> (socket object, socket object)
-
- Create a pair of socket objects from the sockets returned by the platform
- socketpair() function.
- The arguments are the same as for socket() except the default family is
- AF_UNIX if defined on the platform; otherwise, the default is AF_INET.
- """
- if family is None:
- try:
- family = AF_UNIX
- except NameError:
- family = AF_INET
- a, b = _socket.socketpair(family, type, proto)
- a = socket(family, type, proto, a.detach())
- b = socket(family, type, proto, b.detach())
- return a, b
-
-
-_blocking_errnos = set([EAGAIN, EWOULDBLOCK])
-
-class SocketIO(io.RawIOBase):
-
- """Raw I/O implementation for stream sockets.
-
- This class supports the makefile() method on sockets. It provides
- the raw I/O interface on top of a socket object.
- """
-
- # One might wonder why not let FileIO do the job instead. There are two
- # main reasons why FileIO is not adapted:
- # - it wouldn't work under Windows (where you can't used read() and
- # write() on a socket handle)
- # - it wouldn't work with socket timeouts (FileIO would ignore the
- # timeout and consider the socket non-blocking)
-
- # XXX More docs
-
- def __init__(self, sock, mode):
- if mode not in ("r", "w", "rw", "rb", "wb", "rwb"):
- raise ValueError("invalid mode: %r" % mode)
- io.RawIOBase.__init__(self)
- self._sock = sock
- if "b" not in mode:
- mode += "b"
- self._mode = mode
- self._reading = "r" in mode
- self._writing = "w" in mode
- self._timeout_occurred = False
-
- def readinto(self, b):
- """Read up to len(b) bytes into the writable buffer *b* and return
- the number of bytes read. If the socket is non-blocking and no bytes
- are available, None is returned.
-
- If *b* is non-empty, a 0 return value indicates that the connection
- was shutdown at the other end.
- """
- self._checkClosed()
- self._checkReadable()
- if self._timeout_occurred:
- raise IOError("cannot read from timed out object")
- while True:
- try:
- return self._sock.recv_into(b)
- except timeout:
- self._timeout_occurred = True
- raise
- # except InterruptedError:
- # continue
- except error as e:
- if e.args[0] in _blocking_errnos:
- return None
- raise
-
- def write(self, b):
- """Write the given bytes or bytearray object *b* to the socket
- and return the number of bytes written. This can be less than
- len(b) if not all data could be written. If the socket is
- non-blocking and no bytes could be written None is returned.
- """
- self._checkClosed()
- self._checkWritable()
- try:
- return self._sock.send(b)
- except error as e:
- # XXX what about EINTR?
- if e.args[0] in _blocking_errnos:
- return None
- raise
-
- def readable(self):
- """True if the SocketIO is open for reading.
- """
- if self.closed:
- raise ValueError("I/O operation on closed socket.")
- return self._reading
-
- def writable(self):
- """True if the SocketIO is open for writing.
- """
- if self.closed:
- raise ValueError("I/O operation on closed socket.")
- return self._writing
-
- def seekable(self):
- """True if the SocketIO is open for seeking.
- """
- if self.closed:
- raise ValueError("I/O operation on closed socket.")
- return super().seekable()
-
- def fileno(self):
- """Return the file descriptor of the underlying socket.
- """
- self._checkClosed()
- return self._sock.fileno()
-
- @property
- def name(self):
- if not self.closed:
- return self.fileno()
- else:
- return -1
-
- @property
- def mode(self):
- return self._mode
-
- def close(self):
- """Close the SocketIO object. This doesn't close the underlying
- socket, except if all references to it have disappeared.
- """
- if self.closed:
- return
- io.RawIOBase.close(self)
- self._sock._decref_socketios()
- self._sock = None
-
-
-def getfqdn(name=''):
- """Get fully qualified domain name from name.
-
- An empty argument is interpreted as meaning the local host.
-
- First the hostname returned by gethostbyaddr() is checked, then
- possibly existing aliases. In case no FQDN is available, hostname
- from gethostname() is returned.
- """
- name = name.strip()
- if not name or name == '0.0.0.0':
- name = gethostname()
- try:
- hostname, aliases, ipaddrs = gethostbyaddr(name)
- except error:
- pass
- else:
- aliases.insert(0, hostname)
- for name in aliases:
- if '.' in name:
- break
- else:
- name = hostname
- return name
-
-
-# Re-use the same sentinel as in the Python stdlib socket module:
-from socket import _GLOBAL_DEFAULT_TIMEOUT
-# Was: _GLOBAL_DEFAULT_TIMEOUT = object()
-
-
-def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
- source_address=None):
- """Connect to *address* and return the socket object.
-
- Convenience function. Connect to *address* (a 2-tuple ``(host,
- port)``) and return the socket object. Passing the optional
- *timeout* parameter will set the timeout on the socket instance
- before attempting to connect. If no *timeout* is supplied, the
- global default timeout setting returned by :func:`getdefaulttimeout`
- is used. If *source_address* is set it must be a tuple of (host, port)
- for the socket to bind as a source address before making the connection.
- An host of '' or port 0 tells the OS to use the default.
- """
-
- host, port = address
- err = None
- for res in getaddrinfo(host, port, 0, SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket(af, socktype, proto)
- if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect(sa)
- return sock
-
- except error as _:
- err = _
- if sock is not None:
- sock.close()
-
- if err is not None:
- raise err
- else:
- raise error("getaddrinfo returns an empty list")
+# Wrapper module for _socket, providing some additional facilities
+# implemented in Python.
+
+"""\
+This module provides socket operations and some related functions.
+On Unix, it supports IP (Internet Protocol) and Unix domain sockets.
+On other systems, it only supports IP. Functions specific for a
+socket are available as methods of the socket object.
+
+Functions:
+
+socket() -- create a new socket object
+socketpair() -- create a pair of new socket objects [*]
+fromfd() -- create a socket object from an open file descriptor [*]
+fromshare() -- create a socket object from data received from socket.share() [*]
+gethostname() -- return the current hostname
+gethostbyname() -- map a hostname to its IP number
+gethostbyaddr() -- map an IP number or hostname to DNS info
+getservbyname() -- map a service name and a protocol name to a port number
+getprotobyname() -- map a protocol name (e.g. 'tcp') to a number
+ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order
+htons(), htonl() -- convert 16, 32 bit int from host to network byte order
+inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format
+inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89)
+socket.getdefaulttimeout() -- get the default timeout value
+socket.setdefaulttimeout() -- set the default timeout value
+create_connection() -- connects to an address, with an optional timeout and
+ optional source address.
+
+ [*] not available on all platforms!
+
+Special objects:
+
+SocketType -- type object for socket objects
+error -- exception raised for I/O errors
+has_ipv6 -- boolean value indicating if IPv6 is supported
+
+Integer constants:
+
+AF_INET, AF_UNIX -- socket domains (first argument to socket() call)
+SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument)
+
+Many other constants may be defined; these may be used in calls to
+the setsockopt() and getsockopt() methods.
+"""
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from future.builtins import super
+
+import _socket
+from _socket import *
+
+import os, sys, io
+
+try:
+ import errno
+except ImportError:
+ errno = None
+EBADF = getattr(errno, 'EBADF', 9)
+EAGAIN = getattr(errno, 'EAGAIN', 11)
+EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11)
+
+__all__ = ["getfqdn", "create_connection"]
+__all__.extend(os._get_exports_list(_socket))
+
+
+_realsocket = socket
+
+# WSA error codes
+if sys.platform.lower().startswith("win"):
+ errorTab = {}
+ errorTab[10004] = "The operation was interrupted."
+ errorTab[10009] = "A bad file handle was passed."
+ errorTab[10013] = "Permission denied."
+ errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT
+ errorTab[10022] = "An invalid operation was attempted."
+ errorTab[10035] = "The socket operation would block"
+ errorTab[10036] = "A blocking operation is already in progress."
+ errorTab[10048] = "The network address is in use."
+ errorTab[10054] = "The connection has been reset."
+ errorTab[10058] = "The network has been shut down."
+ errorTab[10060] = "The operation timed out."
+ errorTab[10061] = "Connection refused."
+ errorTab[10063] = "The name is too long."
+ errorTab[10064] = "The host is down."
+ errorTab[10065] = "The host is unreachable."
+ __all__.append("errorTab")
+
+
+class socket(_socket.socket):
+
+ """A subclass of _socket.socket adding the makefile() method."""
+
+ __slots__ = ["_io_refs", "_closed"]
+
+ def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None):
+ if fileno is None:
+ _socket.socket.__init__(self, family, type, proto)
+ else:
+ _socket.socket.__init__(self, family, type, proto, fileno)
+ self._io_refs = 0
+ self._closed = False
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ if not self._closed:
+ self.close()
+
+ def __repr__(self):
+ """Wrap __repr__() to reveal the real class name."""
+ s = _socket.socket.__repr__(self)
+ if s.startswith("<socket object"):
+ s = "<%s.%s%s%s" % (self.__class__.__module__,
+ self.__class__.__name__,
+ getattr(self, '_closed', False) and " [closed] " or "",
+ s[7:])
+ return s
+
+ def __getstate__(self):
+ raise TypeError("Cannot serialize socket object")
+
+ def dup(self):
+ """dup() -> socket object
+
+ Return a new socket object connected to the same system resource.
+ """
+ fd = dup(self.fileno())
+ sock = self.__class__(self.family, self.type, self.proto, fileno=fd)
+ sock.settimeout(self.gettimeout())
+ return sock
+
+ def accept(self):
+ """accept() -> (socket object, address info)
+
+ Wait for an incoming connection. Return a new socket
+ representing the connection, and the address of the client.
+ For IP sockets, the address info is a pair (hostaddr, port).
+ """
+ fd, addr = self._accept()
+ sock = socket(self.family, self.type, self.proto, fileno=fd)
+ # Issue #7995: if no default timeout is set and the listening
+ # socket had a (non-zero) timeout, force the new socket in blocking
+ # mode to override platform-specific socket flags inheritance.
+ if getdefaulttimeout() is None and self.gettimeout():
+ sock.setblocking(True)
+ return sock, addr
+
+ def makefile(self, mode="r", buffering=None, **_3to2kwargs):
+ """makefile(...) -> an I/O stream connected to the socket
+
+ The arguments are as for io.open() after the filename,
+ except the only mode characters supported are 'r', 'w' and 'b'.
+ The semantics are similar too. (XXX refactor to share code?)
+ """
+ if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline']
+ else: newline = None
+ if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors']
+ else: errors = None
+ if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding']
+ else: encoding = None
+ for c in mode:
+ if c not in ("r", "w", "b"):
+ raise ValueError("invalid mode %r (only r, w, b allowed)")
+ writing = "w" in mode
+ reading = "r" in mode or not writing
+ assert reading or writing
+ binary = "b" in mode
+ rawmode = ""
+ if reading:
+ rawmode += "r"
+ if writing:
+ rawmode += "w"
+ raw = SocketIO(self, rawmode)
+ self._io_refs += 1
+ if buffering is None:
+ buffering = -1
+ if buffering < 0:
+ buffering = io.DEFAULT_BUFFER_SIZE
+ if buffering == 0:
+ if not binary:
+ raise ValueError("unbuffered streams must be binary")
+ return raw
+ if reading and writing:
+ buffer = io.BufferedRWPair(raw, raw, buffering)
+ elif reading:
+ buffer = io.BufferedReader(raw, buffering)
+ else:
+ assert writing
+ buffer = io.BufferedWriter(raw, buffering)
+ if binary:
+ return buffer
+ text = io.TextIOWrapper(buffer, encoding, errors, newline)
+ text.mode = mode
+ return text
+
+ def _decref_socketios(self):
+ if self._io_refs > 0:
+ self._io_refs -= 1
+ if self._closed:
+ self.close()
+
+ def _real_close(self, _ss=_socket.socket):
+ # This function should not reference any globals. See issue #808164.
+ _ss.close(self)
+
+ def close(self):
+ # This function should not reference any globals. See issue #808164.
+ self._closed = True
+ if self._io_refs <= 0:
+ self._real_close()
+
+ def detach(self):
+ """detach() -> file descriptor
+
+ Close the socket object without closing the underlying file descriptor.
+ The object cannot be used after this call, but the file descriptor
+ can be reused for other purposes. The file descriptor is returned.
+ """
+ self._closed = True
+ return super().detach()
+
+def fromfd(fd, family, type, proto=0):
+ """ fromfd(fd, family, type[, proto]) -> socket object
+
+ Create a socket object from a duplicate of the given file
+ descriptor. The remaining arguments are the same as for socket().
+ """
+ nfd = dup(fd)
+ return socket(family, type, proto, nfd)
+
+if hasattr(_socket.socket, "share"):
+ def fromshare(info):
+ """ fromshare(info) -> socket object
+
+ Create a socket object from a the bytes object returned by
+ socket.share(pid).
+ """
+ return socket(0, 0, 0, info)
+
+if hasattr(_socket, "socketpair"):
+
+ def socketpair(family=None, type=SOCK_STREAM, proto=0):
+ """socketpair([family[, type[, proto]]]) -> (socket object, socket object)
+
+ Create a pair of socket objects from the sockets returned by the platform
+ socketpair() function.
+ The arguments are the same as for socket() except the default family is
+ AF_UNIX if defined on the platform; otherwise, the default is AF_INET.
+ """
+ if family is None:
+ try:
+ family = AF_UNIX
+ except NameError:
+ family = AF_INET
+ a, b = _socket.socketpair(family, type, proto)
+ a = socket(family, type, proto, a.detach())
+ b = socket(family, type, proto, b.detach())
+ return a, b
+
+
+_blocking_errnos = set([EAGAIN, EWOULDBLOCK])
+
+class SocketIO(io.RawIOBase):
+
+ """Raw I/O implementation for stream sockets.
+
+ This class supports the makefile() method on sockets. It provides
+ the raw I/O interface on top of a socket object.
+ """
+
+ # One might wonder why not let FileIO do the job instead. There are two
+ # main reasons why FileIO is not adapted:
+ # - it wouldn't work under Windows (where you can't used read() and
+ # write() on a socket handle)
+ # - it wouldn't work with socket timeouts (FileIO would ignore the
+ # timeout and consider the socket non-blocking)
+
+ # XXX More docs
+
+ def __init__(self, sock, mode):
+ if mode not in ("r", "w", "rw", "rb", "wb", "rwb"):
+ raise ValueError("invalid mode: %r" % mode)
+ io.RawIOBase.__init__(self)
+ self._sock = sock
+ if "b" not in mode:
+ mode += "b"
+ self._mode = mode
+ self._reading = "r" in mode
+ self._writing = "w" in mode
+ self._timeout_occurred = False
+
+ def readinto(self, b):
+ """Read up to len(b) bytes into the writable buffer *b* and return
+ the number of bytes read. If the socket is non-blocking and no bytes
+ are available, None is returned.
+
+ If *b* is non-empty, a 0 return value indicates that the connection
+ was shutdown at the other end.
+ """
+ self._checkClosed()
+ self._checkReadable()
+ if self._timeout_occurred:
+ raise IOError("cannot read from timed out object")
+ while True:
+ try:
+ return self._sock.recv_into(b)
+ except timeout:
+ self._timeout_occurred = True
+ raise
+ # except InterruptedError:
+ # continue
+ except error as e:
+ if e.args[0] in _blocking_errnos:
+ return None
+ raise
+
+ def write(self, b):
+ """Write the given bytes or bytearray object *b* to the socket
+ and return the number of bytes written. This can be less than
+ len(b) if not all data could be written. If the socket is
+ non-blocking and no bytes could be written None is returned.
+ """
+ self._checkClosed()
+ self._checkWritable()
+ try:
+ return self._sock.send(b)
+ except error as e:
+ # XXX what about EINTR?
+ if e.args[0] in _blocking_errnos:
+ return None
+ raise
+
+ def readable(self):
+ """True if the SocketIO is open for reading.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed socket.")
+ return self._reading
+
+ def writable(self):
+ """True if the SocketIO is open for writing.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed socket.")
+ return self._writing
+
+ def seekable(self):
+ """True if the SocketIO is open for seeking.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed socket.")
+ return super().seekable()
+
+ def fileno(self):
+ """Return the file descriptor of the underlying socket.
+ """
+ self._checkClosed()
+ return self._sock.fileno()
+
+ @property
+ def name(self):
+ if not self.closed:
+ return self.fileno()
+ else:
+ return -1
+
+ @property
+ def mode(self):
+ return self._mode
+
+ def close(self):
+ """Close the SocketIO object. This doesn't close the underlying
+ socket, except if all references to it have disappeared.
+ """
+ if self.closed:
+ return
+ io.RawIOBase.close(self)
+ self._sock._decref_socketios()
+ self._sock = None
+
+
+def getfqdn(name=''):
+ """Get fully qualified domain name from name.
+
+ An empty argument is interpreted as meaning the local host.
+
+ First the hostname returned by gethostbyaddr() is checked, then
+ possibly existing aliases. In case no FQDN is available, hostname
+ from gethostname() is returned.
+ """
+ name = name.strip()
+ if not name or name == '0.0.0.0':
+ name = gethostname()
+ try:
+ hostname, aliases, ipaddrs = gethostbyaddr(name)
+ except error:
+ pass
+ else:
+ aliases.insert(0, hostname)
+ for name in aliases:
+ if '.' in name:
+ break
+ else:
+ name = hostname
+ return name
+
+
+# Re-use the same sentinel as in the Python stdlib socket module:
+from socket import _GLOBAL_DEFAULT_TIMEOUT
+# Was: _GLOBAL_DEFAULT_TIMEOUT = object()
+
+
+def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
+ """Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ err = None
+ for res in getaddrinfo(host, port, 0, SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket(af, socktype, proto)
+ if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+
+ if err is not None:
+ raise err
+ else:
+ raise error("getaddrinfo returns an empty list")
diff --git a/contrib/python/future/future/backports/socketserver.py b/contrib/python/future/future/backports/socketserver.py
index 43d60b99e5..d1e24a6dd0 100644
--- a/contrib/python/future/future/backports/socketserver.py
+++ b/contrib/python/future/future/backports/socketserver.py
@@ -1,747 +1,747 @@
-"""Generic socket server classes.
-
-This module tries to capture the various aspects of defining a server:
-
-For socket-based servers:
-
-- address family:
- - AF_INET{,6}: IP (Internet Protocol) sockets (default)
- - AF_UNIX: Unix domain sockets
- - others, e.g. AF_DECNET are conceivable (see <socket.h>
-- socket type:
- - SOCK_STREAM (reliable stream, e.g. TCP)
- - SOCK_DGRAM (datagrams, e.g. UDP)
-
-For request-based servers (including socket-based):
-
-- client address verification before further looking at the request
- (This is actually a hook for any processing that needs to look
- at the request before anything else, e.g. logging)
-- how to handle multiple requests:
- - synchronous (one request is handled at a time)
- - forking (each request is handled by a new process)
- - threading (each request is handled by a new thread)
-
-The classes in this module favor the server type that is simplest to
-write: a synchronous TCP/IP server. This is bad class design, but
-save some typing. (There's also the issue that a deep class hierarchy
-slows down method lookups.)
-
-There are five classes in an inheritance diagram, four of which represent
-synchronous servers of four types:
-
- +------------+
- | BaseServer |
- +------------+
- |
- v
- +-----------+ +------------------+
- | TCPServer |------->| UnixStreamServer |
- +-----------+ +------------------+
- |
- v
- +-----------+ +--------------------+
- | UDPServer |------->| UnixDatagramServer |
- +-----------+ +--------------------+
-
-Note that UnixDatagramServer derives from UDPServer, not from
-UnixStreamServer -- the only difference between an IP and a Unix
-stream server is the address family, which is simply repeated in both
-unix server classes.
-
-Forking and threading versions of each type of server can be created
-using the ForkingMixIn and ThreadingMixIn mix-in classes. For
-instance, a threading UDP server class is created as follows:
-
- class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
-
-The Mix-in class must come first, since it overrides a method defined
-in UDPServer! Setting the various member variables also changes
-the behavior of the underlying server mechanism.
-
-To implement a service, you must derive a class from
-BaseRequestHandler and redefine its handle() method. You can then run
-various versions of the service by combining one of the server classes
-with your request handler class.
-
-The request handler class must be different for datagram or stream
-services. This can be hidden by using the request handler
-subclasses StreamRequestHandler or DatagramRequestHandler.
-
-Of course, you still have to use your head!
-
-For instance, it makes no sense to use a forking server if the service
-contains state in memory that can be modified by requests (since the
-modifications in the child process would never reach the initial state
-kept in the parent process and passed to each child). In this case,
-you can use a threading server, but you will probably have to use
-locks to avoid two requests that come in nearly simultaneous to apply
-conflicting changes to the server state.
-
-On the other hand, if you are building e.g. an HTTP server, where all
-data is stored externally (e.g. in the file system), a synchronous
-class will essentially render the service "deaf" while one request is
-being handled -- which may be for a very long time if a client is slow
-to read all the data it has requested. Here a threading or forking
-server is appropriate.
-
-In some cases, it may be appropriate to process part of a request
-synchronously, but to finish processing in a forked child depending on
-the request data. This can be implemented by using a synchronous
-server and doing an explicit fork in the request handler class
-handle() method.
-
-Another approach to handling multiple simultaneous requests in an
-environment that supports neither threads nor fork (or where these are
-too expensive or inappropriate for the service) is to maintain an
-explicit table of partially finished requests and to use select() to
-decide which request to work on next (or whether to handle a new
-incoming request). This is particularly important for stream services
-where each client can potentially be connected for a long time (if
-threads or subprocesses cannot be used).
-
-Future work:
-- Standard classes for Sun RPC (which uses either UDP or TCP)
-- Standard mix-in classes to implement various authentication
- and encryption schemes
-- Standard framework for select-based multiplexing
-
-XXX Open problems:
-- What to do with out-of-band data?
-
-BaseServer:
-- split generic "request" functionality out into BaseServer class.
- Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org>
-
- example: read entries from a SQL database (requires overriding
- get_request() to return a table entry from the database).
- entry is processed by a RequestHandlerClass.
-
-"""
-
-# Author of the BaseServer patch: Luke Kenneth Casson Leighton
-
-# XXX Warning!
-# There is a test suite for this module, but it cannot be run by the
-# standard regression test.
-# To run it manually, run Lib/test/test_socketserver.py.
-
-from __future__ import (absolute_import, print_function)
-
-__version__ = "0.4"
-
-
-import socket
-import select
-import sys
-import os
-import errno
-try:
- import threading
-except ImportError:
- import dummy_threading as threading
-
-__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer",
- "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler",
- "StreamRequestHandler","DatagramRequestHandler",
- "ThreadingMixIn", "ForkingMixIn"]
-if hasattr(socket, "AF_UNIX"):
- __all__.extend(["UnixStreamServer","UnixDatagramServer",
- "ThreadingUnixStreamServer",
- "ThreadingUnixDatagramServer"])
-
-def _eintr_retry(func, *args):
- """restart a system call interrupted by EINTR"""
- while True:
- try:
- return func(*args)
- except OSError as e:
- if e.errno != errno.EINTR:
- raise
-
-class BaseServer(object):
-
- """Base class for server classes.
-
- Methods for the caller:
-
- - __init__(server_address, RequestHandlerClass)
- - serve_forever(poll_interval=0.5)
- - shutdown()
- - handle_request() # if you do not use serve_forever()
- - fileno() -> int # for select()
-
- Methods that may be overridden:
-
- - server_bind()
- - server_activate()
- - get_request() -> request, client_address
- - handle_timeout()
- - verify_request(request, client_address)
- - server_close()
- - process_request(request, client_address)
- - shutdown_request(request)
- - close_request(request)
- - service_actions()
- - handle_error()
-
- Methods for derived classes:
-
- - finish_request(request, client_address)
-
- Class variables that may be overridden by derived classes or
- instances:
-
- - timeout
- - address_family
- - socket_type
- - allow_reuse_address
-
- Instance variables:
-
- - RequestHandlerClass
- - socket
-
- """
-
- timeout = None
-
- def __init__(self, server_address, RequestHandlerClass):
- """Constructor. May be extended, do not override."""
- self.server_address = server_address
- self.RequestHandlerClass = RequestHandlerClass
- self.__is_shut_down = threading.Event()
- self.__shutdown_request = False
-
- def server_activate(self):
- """Called by constructor to activate the server.
-
- May be overridden.
-
- """
- pass
-
- def serve_forever(self, poll_interval=0.5):
- """Handle one request at a time until shutdown.
-
- Polls for shutdown every poll_interval seconds. Ignores
- self.timeout. If you need to do periodic tasks, do them in
- another thread.
- """
- self.__is_shut_down.clear()
- try:
- while not self.__shutdown_request:
- # XXX: Consider using another file descriptor or
- # connecting to the socket to wake this up instead of
- # polling. Polling reduces our responsiveness to a
- # shutdown request and wastes cpu at all other times.
- r, w, e = _eintr_retry(select.select, [self], [], [],
- poll_interval)
- if self in r:
- self._handle_request_noblock()
-
- self.service_actions()
- finally:
- self.__shutdown_request = False
- self.__is_shut_down.set()
-
- def shutdown(self):
- """Stops the serve_forever loop.
-
- Blocks until the loop has finished. This must be called while
- serve_forever() is running in another thread, or it will
- deadlock.
- """
- self.__shutdown_request = True
- self.__is_shut_down.wait()
-
- def service_actions(self):
- """Called by the serve_forever() loop.
-
- May be overridden by a subclass / Mixin to implement any code that
- needs to be run during the loop.
- """
- pass
-
- # The distinction between handling, getting, processing and
- # finishing a request is fairly arbitrary. Remember:
- #
- # - handle_request() is the top-level call. It calls
- # select, get_request(), verify_request() and process_request()
- # - get_request() is different for stream or datagram sockets
- # - process_request() is the place that may fork a new process
- # or create a new thread to finish the request
- # - finish_request() instantiates the request handler class;
- # this constructor will handle the request all by itself
-
- def handle_request(self):
- """Handle one request, possibly blocking.
-
- Respects self.timeout.
- """
- # Support people who used socket.settimeout() to escape
- # handle_request before self.timeout was available.
- timeout = self.socket.gettimeout()
- if timeout is None:
- timeout = self.timeout
- elif self.timeout is not None:
- timeout = min(timeout, self.timeout)
- fd_sets = _eintr_retry(select.select, [self], [], [], timeout)
- if not fd_sets[0]:
- self.handle_timeout()
- return
- self._handle_request_noblock()
-
- def _handle_request_noblock(self):
- """Handle one request, without blocking.
-
- I assume that select.select has returned that the socket is
- readable before this function was called, so there should be
- no risk of blocking in get_request().
- """
- try:
- request, client_address = self.get_request()
- except socket.error:
- return
- if self.verify_request(request, client_address):
- try:
- self.process_request(request, client_address)
- except:
- self.handle_error(request, client_address)
- self.shutdown_request(request)
-
- def handle_timeout(self):
- """Called if no new request arrives within self.timeout.
-
- Overridden by ForkingMixIn.
- """
- pass
-
- def verify_request(self, request, client_address):
- """Verify the request. May be overridden.
-
- Return True if we should proceed with this request.
-
- """
- return True
-
- def process_request(self, request, client_address):
- """Call finish_request.
-
- Overridden by ForkingMixIn and ThreadingMixIn.
-
- """
- self.finish_request(request, client_address)
- self.shutdown_request(request)
-
- def server_close(self):
- """Called to clean-up the server.
-
- May be overridden.
-
- """
- pass
-
- def finish_request(self, request, client_address):
- """Finish one request by instantiating RequestHandlerClass."""
- self.RequestHandlerClass(request, client_address, self)
-
- def shutdown_request(self, request):
- """Called to shutdown and close an individual request."""
- self.close_request(request)
-
- def close_request(self, request):
- """Called to clean up an individual request."""
- pass
-
- def handle_error(self, request, client_address):
- """Handle an error gracefully. May be overridden.
-
- The default is to print a traceback and continue.
-
- """
- print('-'*40)
- print('Exception happened during processing of request from', end=' ')
- print(client_address)
- import traceback
- traceback.print_exc() # XXX But this goes to stderr!
- print('-'*40)
-
-
-class TCPServer(BaseServer):
-
- """Base class for various socket-based server classes.
-
- Defaults to synchronous IP stream (i.e., TCP).
-
- Methods for the caller:
-
- - __init__(server_address, RequestHandlerClass, bind_and_activate=True)
- - serve_forever(poll_interval=0.5)
- - shutdown()
- - handle_request() # if you don't use serve_forever()
- - fileno() -> int # for select()
-
- Methods that may be overridden:
-
- - server_bind()
- - server_activate()
- - get_request() -> request, client_address
- - handle_timeout()
- - verify_request(request, client_address)
- - process_request(request, client_address)
- - shutdown_request(request)
- - close_request(request)
- - handle_error()
-
- Methods for derived classes:
-
- - finish_request(request, client_address)
-
- Class variables that may be overridden by derived classes or
- instances:
-
- - timeout
- - address_family
- - socket_type
- - request_queue_size (only for stream sockets)
- - allow_reuse_address
-
- Instance variables:
-
- - server_address
- - RequestHandlerClass
- - socket
-
- """
-
- address_family = socket.AF_INET
-
- socket_type = socket.SOCK_STREAM
-
- request_queue_size = 5
-
- allow_reuse_address = False
-
- def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
- """Constructor. May be extended, do not override."""
- BaseServer.__init__(self, server_address, RequestHandlerClass)
- self.socket = socket.socket(self.address_family,
- self.socket_type)
- if bind_and_activate:
- self.server_bind()
- self.server_activate()
-
- def server_bind(self):
- """Called by constructor to bind the socket.
-
- May be overridden.
-
- """
- if self.allow_reuse_address:
- self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- self.socket.bind(self.server_address)
- self.server_address = self.socket.getsockname()
-
- def server_activate(self):
- """Called by constructor to activate the server.
-
- May be overridden.
-
- """
- self.socket.listen(self.request_queue_size)
-
- def server_close(self):
- """Called to clean-up the server.
-
- May be overridden.
-
- """
- self.socket.close()
-
- def fileno(self):
- """Return socket file number.
-
- Interface required by select().
-
- """
- return self.socket.fileno()
-
- def get_request(self):
- """Get the request and client address from the socket.
-
- May be overridden.
-
- """
- return self.socket.accept()
-
- def shutdown_request(self, request):
- """Called to shutdown and close an individual request."""
- try:
- #explicitly shutdown. socket.close() merely releases
- #the socket and waits for GC to perform the actual close.
- request.shutdown(socket.SHUT_WR)
- except socket.error:
- pass #some platforms may raise ENOTCONN here
- self.close_request(request)
-
- def close_request(self, request):
- """Called to clean up an individual request."""
- request.close()
-
-
-class UDPServer(TCPServer):
-
- """UDP server class."""
-
- allow_reuse_address = False
-
- socket_type = socket.SOCK_DGRAM
-
- max_packet_size = 8192
-
- def get_request(self):
- data, client_addr = self.socket.recvfrom(self.max_packet_size)
- return (data, self.socket), client_addr
-
- def server_activate(self):
- # No need to call listen() for UDP.
- pass
-
- def shutdown_request(self, request):
- # No need to shutdown anything.
- self.close_request(request)
-
- def close_request(self, request):
- # No need to close anything.
- pass
-
-class ForkingMixIn(object):
-
- """Mix-in class to handle each request in a new process."""
-
- timeout = 300
- active_children = None
- max_children = 40
-
- def collect_children(self):
- """Internal routine to wait for children that have exited."""
- if self.active_children is None: return
- while len(self.active_children) >= self.max_children:
- # XXX: This will wait for any child process, not just ones
- # spawned by this library. This could confuse other
- # libraries that expect to be able to wait for their own
- # children.
- try:
- pid, status = os.waitpid(0, 0)
- except os.error:
- pid = None
- if pid not in self.active_children: continue
- self.active_children.remove(pid)
-
- # XXX: This loop runs more system calls than it ought
- # to. There should be a way to put the active_children into a
- # process group and then use os.waitpid(-pgid) to wait for any
- # of that set, but I couldn't find a way to allocate pgids
- # that couldn't collide.
- for child in self.active_children:
- try:
- pid, status = os.waitpid(child, os.WNOHANG)
- except os.error:
- pid = None
- if not pid: continue
- try:
- self.active_children.remove(pid)
- except ValueError as e:
- raise ValueError('%s. x=%d and list=%r' % (e.message, pid,
- self.active_children))
-
- def handle_timeout(self):
- """Wait for zombies after self.timeout seconds of inactivity.
-
- May be extended, do not override.
- """
- self.collect_children()
-
- def service_actions(self):
- """Collect the zombie child processes regularly in the ForkingMixIn.
-
- service_actions is called in the BaseServer's serve_forver loop.
- """
- self.collect_children()
-
- def process_request(self, request, client_address):
- """Fork a new subprocess to process the request."""
- pid = os.fork()
- if pid:
- # Parent process
- if self.active_children is None:
- self.active_children = []
- self.active_children.append(pid)
- self.close_request(request)
- return
- else:
- # Child process.
- # This must never return, hence os._exit()!
- try:
- self.finish_request(request, client_address)
- self.shutdown_request(request)
- os._exit(0)
- except:
- try:
- self.handle_error(request, client_address)
- self.shutdown_request(request)
- finally:
- os._exit(1)
-
-
-class ThreadingMixIn(object):
- """Mix-in class to handle each request in a new thread."""
-
- # Decides how threads will act upon termination of the
- # main process
- daemon_threads = False
-
- def process_request_thread(self, request, client_address):
- """Same as in BaseServer but as a thread.
-
- In addition, exception handling is done here.
-
- """
- try:
- self.finish_request(request, client_address)
- self.shutdown_request(request)
- except:
- self.handle_error(request, client_address)
- self.shutdown_request(request)
-
- def process_request(self, request, client_address):
- """Start a new thread to process the request."""
- t = threading.Thread(target = self.process_request_thread,
- args = (request, client_address))
- t.daemon = self.daemon_threads
- t.start()
-
-
-class ForkingUDPServer(ForkingMixIn, UDPServer): pass
-class ForkingTCPServer(ForkingMixIn, TCPServer): pass
-
-class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
-class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
-
-if hasattr(socket, 'AF_UNIX'):
-
- class UnixStreamServer(TCPServer):
- address_family = socket.AF_UNIX
-
- class UnixDatagramServer(UDPServer):
- address_family = socket.AF_UNIX
-
- class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
-
- class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
-
-class BaseRequestHandler(object):
-
- """Base class for request handler classes.
-
- This class is instantiated for each request to be handled. The
- constructor sets the instance variables request, client_address
- and server, and then calls the handle() method. To implement a
- specific service, all you need to do is to derive a class which
- defines a handle() method.
-
- The handle() method can find the request as self.request, the
- client address as self.client_address, and the server (in case it
- needs access to per-server information) as self.server. Since a
- separate instance is created for each request, the handle() method
- can define arbitrary other instance variariables.
-
- """
-
- def __init__(self, request, client_address, server):
- self.request = request
- self.client_address = client_address
- self.server = server
- self.setup()
- try:
- self.handle()
- finally:
- self.finish()
-
- def setup(self):
- pass
-
- def handle(self):
- pass
-
- def finish(self):
- pass
-
-
-# The following two classes make it possible to use the same service
-# class for stream or datagram servers.
-# Each class sets up these instance variables:
-# - rfile: a file object from which receives the request is read
-# - wfile: a file object to which the reply is written
-# When the handle() method returns, wfile is flushed properly
-
-
-class StreamRequestHandler(BaseRequestHandler):
-
- """Define self.rfile and self.wfile for stream sockets."""
-
- # Default buffer sizes for rfile, wfile.
- # We default rfile to buffered because otherwise it could be
- # really slow for large data (a getc() call per byte); we make
- # wfile unbuffered because (a) often after a write() we want to
- # read and we need to flush the line; (b) big writes to unbuffered
- # files are typically optimized by stdio even when big reads
- # aren't.
- rbufsize = -1
- wbufsize = 0
-
- # A timeout to apply to the request socket, if not None.
- timeout = None
-
- # Disable nagle algorithm for this socket, if True.
- # Use only when wbufsize != 0, to avoid small packets.
- disable_nagle_algorithm = False
-
- def setup(self):
- self.connection = self.request
- if self.timeout is not None:
- self.connection.settimeout(self.timeout)
- if self.disable_nagle_algorithm:
- self.connection.setsockopt(socket.IPPROTO_TCP,
- socket.TCP_NODELAY, True)
- self.rfile = self.connection.makefile('rb', self.rbufsize)
- self.wfile = self.connection.makefile('wb', self.wbufsize)
-
- def finish(self):
- if not self.wfile.closed:
- try:
- self.wfile.flush()
- except socket.error:
- # An final socket error may have occurred here, such as
- # the local error ECONNABORTED.
- pass
- self.wfile.close()
- self.rfile.close()
-
-
-class DatagramRequestHandler(BaseRequestHandler):
-
- # XXX Regrettably, I cannot get this working on Linux;
- # s.recvfrom() doesn't return a meaningful client address.
-
- """Define self.rfile and self.wfile for datagram sockets."""
-
- def setup(self):
- from io import BytesIO
- self.packet, self.socket = self.request
- self.rfile = BytesIO(self.packet)
- self.wfile = BytesIO()
-
- def finish(self):
- self.socket.sendto(self.wfile.getvalue(), self.client_address)
+"""Generic socket server classes.
+
+This module tries to capture the various aspects of defining a server:
+
+For socket-based servers:
+
+- address family:
+ - AF_INET{,6}: IP (Internet Protocol) sockets (default)
+ - AF_UNIX: Unix domain sockets
+ - others, e.g. AF_DECNET are conceivable (see <socket.h>
+- socket type:
+ - SOCK_STREAM (reliable stream, e.g. TCP)
+ - SOCK_DGRAM (datagrams, e.g. UDP)
+
+For request-based servers (including socket-based):
+
+- client address verification before further looking at the request
+ (This is actually a hook for any processing that needs to look
+ at the request before anything else, e.g. logging)
+- how to handle multiple requests:
+ - synchronous (one request is handled at a time)
+ - forking (each request is handled by a new process)
+ - threading (each request is handled by a new thread)
+
+The classes in this module favor the server type that is simplest to
+write: a synchronous TCP/IP server. This is bad class design, but
+save some typing. (There's also the issue that a deep class hierarchy
+slows down method lookups.)
+
+There are five classes in an inheritance diagram, four of which represent
+synchronous servers of four types:
+
+ +------------+
+ | BaseServer |
+ +------------+
+ |
+ v
+ +-----------+ +------------------+
+ | TCPServer |------->| UnixStreamServer |
+ +-----------+ +------------------+
+ |
+ v
+ +-----------+ +--------------------+
+ | UDPServer |------->| UnixDatagramServer |
+ +-----------+ +--------------------+
+
+Note that UnixDatagramServer derives from UDPServer, not from
+UnixStreamServer -- the only difference between an IP and a Unix
+stream server is the address family, which is simply repeated in both
+unix server classes.
+
+Forking and threading versions of each type of server can be created
+using the ForkingMixIn and ThreadingMixIn mix-in classes. For
+instance, a threading UDP server class is created as follows:
+
+ class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+
+The Mix-in class must come first, since it overrides a method defined
+in UDPServer! Setting the various member variables also changes
+the behavior of the underlying server mechanism.
+
+To implement a service, you must derive a class from
+BaseRequestHandler and redefine its handle() method. You can then run
+various versions of the service by combining one of the server classes
+with your request handler class.
+
+The request handler class must be different for datagram or stream
+services. This can be hidden by using the request handler
+subclasses StreamRequestHandler or DatagramRequestHandler.
+
+Of course, you still have to use your head!
+
+For instance, it makes no sense to use a forking server if the service
+contains state in memory that can be modified by requests (since the
+modifications in the child process would never reach the initial state
+kept in the parent process and passed to each child). In this case,
+you can use a threading server, but you will probably have to use
+locks to avoid two requests that come in nearly simultaneous to apply
+conflicting changes to the server state.
+
+On the other hand, if you are building e.g. an HTTP server, where all
+data is stored externally (e.g. in the file system), a synchronous
+class will essentially render the service "deaf" while one request is
+being handled -- which may be for a very long time if a client is slow
+to read all the data it has requested. Here a threading or forking
+server is appropriate.
+
+In some cases, it may be appropriate to process part of a request
+synchronously, but to finish processing in a forked child depending on
+the request data. This can be implemented by using a synchronous
+server and doing an explicit fork in the request handler class
+handle() method.
+
+Another approach to handling multiple simultaneous requests in an
+environment that supports neither threads nor fork (or where these are
+too expensive or inappropriate for the service) is to maintain an
+explicit table of partially finished requests and to use select() to
+decide which request to work on next (or whether to handle a new
+incoming request). This is particularly important for stream services
+where each client can potentially be connected for a long time (if
+threads or subprocesses cannot be used).
+
+Future work:
+- Standard classes for Sun RPC (which uses either UDP or TCP)
+- Standard mix-in classes to implement various authentication
+ and encryption schemes
+- Standard framework for select-based multiplexing
+
+XXX Open problems:
+- What to do with out-of-band data?
+
+BaseServer:
+- split generic "request" functionality out into BaseServer class.
+ Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org>
+
+ example: read entries from a SQL database (requires overriding
+ get_request() to return a table entry from the database).
+ entry is processed by a RequestHandlerClass.
+
+"""
+
+# Author of the BaseServer patch: Luke Kenneth Casson Leighton
+
+# XXX Warning!
+# There is a test suite for this module, but it cannot be run by the
+# standard regression test.
+# To run it manually, run Lib/test/test_socketserver.py.
+
+from __future__ import (absolute_import, print_function)
+
+__version__ = "0.4"
+
+
+import socket
+import select
+import sys
+import os
+import errno
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+
+__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer",
+ "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler",
+ "StreamRequestHandler","DatagramRequestHandler",
+ "ThreadingMixIn", "ForkingMixIn"]
+if hasattr(socket, "AF_UNIX"):
+ __all__.extend(["UnixStreamServer","UnixDatagramServer",
+ "ThreadingUnixStreamServer",
+ "ThreadingUnixDatagramServer"])
+
+def _eintr_retry(func, *args):
+ """restart a system call interrupted by EINTR"""
+ while True:
+ try:
+ return func(*args)
+ except OSError as e:
+ if e.errno != errno.EINTR:
+ raise
+
+class BaseServer(object):
+
+ """Base class for server classes.
+
+ Methods for the caller:
+
+ - __init__(server_address, RequestHandlerClass)
+ - serve_forever(poll_interval=0.5)
+ - shutdown()
+ - handle_request() # if you do not use serve_forever()
+ - fileno() -> int # for select()
+
+ Methods that may be overridden:
+
+ - server_bind()
+ - server_activate()
+ - get_request() -> request, client_address
+ - handle_timeout()
+ - verify_request(request, client_address)
+ - server_close()
+ - process_request(request, client_address)
+ - shutdown_request(request)
+ - close_request(request)
+ - service_actions()
+ - handle_error()
+
+ Methods for derived classes:
+
+ - finish_request(request, client_address)
+
+ Class variables that may be overridden by derived classes or
+ instances:
+
+ - timeout
+ - address_family
+ - socket_type
+ - allow_reuse_address
+
+ Instance variables:
+
+ - RequestHandlerClass
+ - socket
+
+ """
+
+ timeout = None
+
+ def __init__(self, server_address, RequestHandlerClass):
+ """Constructor. May be extended, do not override."""
+ self.server_address = server_address
+ self.RequestHandlerClass = RequestHandlerClass
+ self.__is_shut_down = threading.Event()
+ self.__shutdown_request = False
+
+ def server_activate(self):
+ """Called by constructor to activate the server.
+
+ May be overridden.
+
+ """
+ pass
+
+ def serve_forever(self, poll_interval=0.5):
+ """Handle one request at a time until shutdown.
+
+ Polls for shutdown every poll_interval seconds. Ignores
+ self.timeout. If you need to do periodic tasks, do them in
+ another thread.
+ """
+ self.__is_shut_down.clear()
+ try:
+ while not self.__shutdown_request:
+ # XXX: Consider using another file descriptor or
+ # connecting to the socket to wake this up instead of
+ # polling. Polling reduces our responsiveness to a
+ # shutdown request and wastes cpu at all other times.
+ r, w, e = _eintr_retry(select.select, [self], [], [],
+ poll_interval)
+ if self in r:
+ self._handle_request_noblock()
+
+ self.service_actions()
+ finally:
+ self.__shutdown_request = False
+ self.__is_shut_down.set()
+
+ def shutdown(self):
+ """Stops the serve_forever loop.
+
+ Blocks until the loop has finished. This must be called while
+ serve_forever() is running in another thread, or it will
+ deadlock.
+ """
+ self.__shutdown_request = True
+ self.__is_shut_down.wait()
+
+ def service_actions(self):
+ """Called by the serve_forever() loop.
+
+ May be overridden by a subclass / Mixin to implement any code that
+ needs to be run during the loop.
+ """
+ pass
+
+ # The distinction between handling, getting, processing and
+ # finishing a request is fairly arbitrary. Remember:
+ #
+ # - handle_request() is the top-level call. It calls
+ # select, get_request(), verify_request() and process_request()
+ # - get_request() is different for stream or datagram sockets
+ # - process_request() is the place that may fork a new process
+ # or create a new thread to finish the request
+ # - finish_request() instantiates the request handler class;
+ # this constructor will handle the request all by itself
+
+ def handle_request(self):
+ """Handle one request, possibly blocking.
+
+ Respects self.timeout.
+ """
+ # Support people who used socket.settimeout() to escape
+ # handle_request before self.timeout was available.
+ timeout = self.socket.gettimeout()
+ if timeout is None:
+ timeout = self.timeout
+ elif self.timeout is not None:
+ timeout = min(timeout, self.timeout)
+ fd_sets = _eintr_retry(select.select, [self], [], [], timeout)
+ if not fd_sets[0]:
+ self.handle_timeout()
+ return
+ self._handle_request_noblock()
+
+ def _handle_request_noblock(self):
+ """Handle one request, without blocking.
+
+ I assume that select.select has returned that the socket is
+ readable before this function was called, so there should be
+ no risk of blocking in get_request().
+ """
+ try:
+ request, client_address = self.get_request()
+ except socket.error:
+ return
+ if self.verify_request(request, client_address):
+ try:
+ self.process_request(request, client_address)
+ except:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+
+ def handle_timeout(self):
+ """Called if no new request arrives within self.timeout.
+
+ Overridden by ForkingMixIn.
+ """
+ pass
+
+ def verify_request(self, request, client_address):
+ """Verify the request. May be overridden.
+
+ Return True if we should proceed with this request.
+
+ """
+ return True
+
+ def process_request(self, request, client_address):
+ """Call finish_request.
+
+ Overridden by ForkingMixIn and ThreadingMixIn.
+
+ """
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+
+ def server_close(self):
+ """Called to clean-up the server.
+
+ May be overridden.
+
+ """
+ pass
+
+ def finish_request(self, request, client_address):
+ """Finish one request by instantiating RequestHandlerClass."""
+ self.RequestHandlerClass(request, client_address, self)
+
+ def shutdown_request(self, request):
+ """Called to shutdown and close an individual request."""
+ self.close_request(request)
+
+ def close_request(self, request):
+ """Called to clean up an individual request."""
+ pass
+
+ def handle_error(self, request, client_address):
+ """Handle an error gracefully. May be overridden.
+
+ The default is to print a traceback and continue.
+
+ """
+ print('-'*40)
+ print('Exception happened during processing of request from', end=' ')
+ print(client_address)
+ import traceback
+ traceback.print_exc() # XXX But this goes to stderr!
+ print('-'*40)
+
+
+class TCPServer(BaseServer):
+
+ """Base class for various socket-based server classes.
+
+ Defaults to synchronous IP stream (i.e., TCP).
+
+ Methods for the caller:
+
+ - __init__(server_address, RequestHandlerClass, bind_and_activate=True)
+ - serve_forever(poll_interval=0.5)
+ - shutdown()
+ - handle_request() # if you don't use serve_forever()
+ - fileno() -> int # for select()
+
+ Methods that may be overridden:
+
+ - server_bind()
+ - server_activate()
+ - get_request() -> request, client_address
+ - handle_timeout()
+ - verify_request(request, client_address)
+ - process_request(request, client_address)
+ - shutdown_request(request)
+ - close_request(request)
+ - handle_error()
+
+ Methods for derived classes:
+
+ - finish_request(request, client_address)
+
+ Class variables that may be overridden by derived classes or
+ instances:
+
+ - timeout
+ - address_family
+ - socket_type
+ - request_queue_size (only for stream sockets)
+ - allow_reuse_address
+
+ Instance variables:
+
+ - server_address
+ - RequestHandlerClass
+ - socket
+
+ """
+
+ address_family = socket.AF_INET
+
+ socket_type = socket.SOCK_STREAM
+
+ request_queue_size = 5
+
+ allow_reuse_address = False
+
+ def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
+ """Constructor. May be extended, do not override."""
+ BaseServer.__init__(self, server_address, RequestHandlerClass)
+ self.socket = socket.socket(self.address_family,
+ self.socket_type)
+ if bind_and_activate:
+ self.server_bind()
+ self.server_activate()
+
+ def server_bind(self):
+ """Called by constructor to bind the socket.
+
+ May be overridden.
+
+ """
+ if self.allow_reuse_address:
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ self.socket.bind(self.server_address)
+ self.server_address = self.socket.getsockname()
+
+ def server_activate(self):
+ """Called by constructor to activate the server.
+
+ May be overridden.
+
+ """
+ self.socket.listen(self.request_queue_size)
+
+ def server_close(self):
+ """Called to clean-up the server.
+
+ May be overridden.
+
+ """
+ self.socket.close()
+
+ def fileno(self):
+ """Return socket file number.
+
+ Interface required by select().
+
+ """
+ return self.socket.fileno()
+
+ def get_request(self):
+ """Get the request and client address from the socket.
+
+ May be overridden.
+
+ """
+ return self.socket.accept()
+
+ def shutdown_request(self, request):
+ """Called to shutdown and close an individual request."""
+ try:
+ #explicitly shutdown. socket.close() merely releases
+ #the socket and waits for GC to perform the actual close.
+ request.shutdown(socket.SHUT_WR)
+ except socket.error:
+ pass #some platforms may raise ENOTCONN here
+ self.close_request(request)
+
+ def close_request(self, request):
+ """Called to clean up an individual request."""
+ request.close()
+
+
+class UDPServer(TCPServer):
+
+ """UDP server class."""
+
+ allow_reuse_address = False
+
+ socket_type = socket.SOCK_DGRAM
+
+ max_packet_size = 8192
+
+ def get_request(self):
+ data, client_addr = self.socket.recvfrom(self.max_packet_size)
+ return (data, self.socket), client_addr
+
+ def server_activate(self):
+ # No need to call listen() for UDP.
+ pass
+
+ def shutdown_request(self, request):
+ # No need to shutdown anything.
+ self.close_request(request)
+
+ def close_request(self, request):
+ # No need to close anything.
+ pass
+
+class ForkingMixIn(object):
+
+ """Mix-in class to handle each request in a new process."""
+
+ timeout = 300
+ active_children = None
+ max_children = 40
+
+ def collect_children(self):
+ """Internal routine to wait for children that have exited."""
+ if self.active_children is None: return
+ while len(self.active_children) >= self.max_children:
+ # XXX: This will wait for any child process, not just ones
+ # spawned by this library. This could confuse other
+ # libraries that expect to be able to wait for their own
+ # children.
+ try:
+ pid, status = os.waitpid(0, 0)
+ except os.error:
+ pid = None
+ if pid not in self.active_children: continue
+ self.active_children.remove(pid)
+
+ # XXX: This loop runs more system calls than it ought
+ # to. There should be a way to put the active_children into a
+ # process group and then use os.waitpid(-pgid) to wait for any
+ # of that set, but I couldn't find a way to allocate pgids
+ # that couldn't collide.
+ for child in self.active_children:
+ try:
+ pid, status = os.waitpid(child, os.WNOHANG)
+ except os.error:
+ pid = None
+ if not pid: continue
+ try:
+ self.active_children.remove(pid)
+ except ValueError as e:
+ raise ValueError('%s. x=%d and list=%r' % (e.message, pid,
+ self.active_children))
+
+ def handle_timeout(self):
+ """Wait for zombies after self.timeout seconds of inactivity.
+
+ May be extended, do not override.
+ """
+ self.collect_children()
+
+ def service_actions(self):
+ """Collect the zombie child processes regularly in the ForkingMixIn.
+
+ service_actions is called in the BaseServer's serve_forver loop.
+ """
+ self.collect_children()
+
+ def process_request(self, request, client_address):
+ """Fork a new subprocess to process the request."""
+ pid = os.fork()
+ if pid:
+ # Parent process
+ if self.active_children is None:
+ self.active_children = []
+ self.active_children.append(pid)
+ self.close_request(request)
+ return
+ else:
+ # Child process.
+ # This must never return, hence os._exit()!
+ try:
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+ os._exit(0)
+ except:
+ try:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+ finally:
+ os._exit(1)
+
+
+class ThreadingMixIn(object):
+ """Mix-in class to handle each request in a new thread."""
+
+ # Decides how threads will act upon termination of the
+ # main process
+ daemon_threads = False
+
+ def process_request_thread(self, request, client_address):
+ """Same as in BaseServer but as a thread.
+
+ In addition, exception handling is done here.
+
+ """
+ try:
+ self.finish_request(request, client_address)
+ self.shutdown_request(request)
+ except:
+ self.handle_error(request, client_address)
+ self.shutdown_request(request)
+
+ def process_request(self, request, client_address):
+ """Start a new thread to process the request."""
+ t = threading.Thread(target = self.process_request_thread,
+ args = (request, client_address))
+ t.daemon = self.daemon_threads
+ t.start()
+
+
+class ForkingUDPServer(ForkingMixIn, UDPServer): pass
+class ForkingTCPServer(ForkingMixIn, TCPServer): pass
+
+class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
+class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
+
+if hasattr(socket, 'AF_UNIX'):
+
+ class UnixStreamServer(TCPServer):
+ address_family = socket.AF_UNIX
+
+ class UnixDatagramServer(UDPServer):
+ address_family = socket.AF_UNIX
+
+ class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
+
+ class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
+
+class BaseRequestHandler(object):
+
+ """Base class for request handler classes.
+
+ This class is instantiated for each request to be handled. The
+ constructor sets the instance variables request, client_address
+ and server, and then calls the handle() method. To implement a
+ specific service, all you need to do is to derive a class which
+ defines a handle() method.
+
+ The handle() method can find the request as self.request, the
+ client address as self.client_address, and the server (in case it
+ needs access to per-server information) as self.server. Since a
+ separate instance is created for each request, the handle() method
+ can define arbitrary other instance variariables.
+
+ """
+
+ def __init__(self, request, client_address, server):
+ self.request = request
+ self.client_address = client_address
+ self.server = server
+ self.setup()
+ try:
+ self.handle()
+ finally:
+ self.finish()
+
+ def setup(self):
+ pass
+
+ def handle(self):
+ pass
+
+ def finish(self):
+ pass
+
+
+# The following two classes make it possible to use the same service
+# class for stream or datagram servers.
+# Each class sets up these instance variables:
+# - rfile: a file object from which receives the request is read
+# - wfile: a file object to which the reply is written
+# When the handle() method returns, wfile is flushed properly
+
+
+class StreamRequestHandler(BaseRequestHandler):
+
+ """Define self.rfile and self.wfile for stream sockets."""
+
+ # Default buffer sizes for rfile, wfile.
+ # We default rfile to buffered because otherwise it could be
+ # really slow for large data (a getc() call per byte); we make
+ # wfile unbuffered because (a) often after a write() we want to
+ # read and we need to flush the line; (b) big writes to unbuffered
+ # files are typically optimized by stdio even when big reads
+ # aren't.
+ rbufsize = -1
+ wbufsize = 0
+
+ # A timeout to apply to the request socket, if not None.
+ timeout = None
+
+ # Disable nagle algorithm for this socket, if True.
+ # Use only when wbufsize != 0, to avoid small packets.
+ disable_nagle_algorithm = False
+
+ def setup(self):
+ self.connection = self.request
+ if self.timeout is not None:
+ self.connection.settimeout(self.timeout)
+ if self.disable_nagle_algorithm:
+ self.connection.setsockopt(socket.IPPROTO_TCP,
+ socket.TCP_NODELAY, True)
+ self.rfile = self.connection.makefile('rb', self.rbufsize)
+ self.wfile = self.connection.makefile('wb', self.wbufsize)
+
+ def finish(self):
+ if not self.wfile.closed:
+ try:
+ self.wfile.flush()
+ except socket.error:
+ # An final socket error may have occurred here, such as
+ # the local error ECONNABORTED.
+ pass
+ self.wfile.close()
+ self.rfile.close()
+
+
+class DatagramRequestHandler(BaseRequestHandler):
+
+ # XXX Regrettably, I cannot get this working on Linux;
+ # s.recvfrom() doesn't return a meaningful client address.
+
+ """Define self.rfile and self.wfile for datagram sockets."""
+
+ def setup(self):
+ from io import BytesIO
+ self.packet, self.socket = self.request
+ self.rfile = BytesIO(self.packet)
+ self.wfile = BytesIO()
+
+ def finish(self):
+ self.socket.sendto(self.wfile.getvalue(), self.client_address)
diff --git a/contrib/python/future/future/backports/total_ordering.py b/contrib/python/future/future/backports/total_ordering.py
index 6bb94306e9..760f06d6c3 100644
--- a/contrib/python/future/future/backports/total_ordering.py
+++ b/contrib/python/future/future/backports/total_ordering.py
@@ -1,38 +1,38 @@
-"""
-For Python < 2.7.2. total_ordering in versions prior to 2.7.2 is buggy.
-See http://bugs.python.org/issue10042 for details. For these versions use
-code borrowed from Python 2.7.3.
-
-From django.utils.
-"""
-
-import sys
-if sys.version_info >= (2, 7, 2):
- from functools import total_ordering
-else:
- def total_ordering(cls):
- """Class decorator that fills in missing ordering methods"""
- convert = {
- '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)),
- ('__le__', lambda self, other: self < other or self == other),
- ('__ge__', lambda self, other: not self < other)],
- '__le__': [('__ge__', lambda self, other: not self <= other or self == other),
- ('__lt__', lambda self, other: self <= other and not self == other),
- ('__gt__', lambda self, other: not self <= other)],
- '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)),
- ('__ge__', lambda self, other: self > other or self == other),
- ('__le__', lambda self, other: not self > other)],
- '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other),
- ('__gt__', lambda self, other: self >= other and not self == other),
- ('__lt__', lambda self, other: not self >= other)]
- }
- roots = set(dir(cls)) & set(convert)
- if not roots:
- raise ValueError('must define at least one ordering operation: < > <= >=')
- root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
- for opname, opfunc in convert[root]:
- if opname not in roots:
- opfunc.__name__ = opname
- opfunc.__doc__ = getattr(int, opname).__doc__
- setattr(cls, opname, opfunc)
- return cls
+"""
+For Python < 2.7.2. total_ordering in versions prior to 2.7.2 is buggy.
+See http://bugs.python.org/issue10042 for details. For these versions use
+code borrowed from Python 2.7.3.
+
+From django.utils.
+"""
+
+import sys
+if sys.version_info >= (2, 7, 2):
+ from functools import total_ordering
+else:
+ def total_ordering(cls):
+ """Class decorator that fills in missing ordering methods"""
+ convert = {
+ '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)),
+ ('__le__', lambda self, other: self < other or self == other),
+ ('__ge__', lambda self, other: not self < other)],
+ '__le__': [('__ge__', lambda self, other: not self <= other or self == other),
+ ('__lt__', lambda self, other: self <= other and not self == other),
+ ('__gt__', lambda self, other: not self <= other)],
+ '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)),
+ ('__ge__', lambda self, other: self > other or self == other),
+ ('__le__', lambda self, other: not self > other)],
+ '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other),
+ ('__gt__', lambda self, other: self >= other and not self == other),
+ ('__lt__', lambda self, other: not self >= other)]
+ }
+ roots = set(dir(cls)) & set(convert)
+ if not roots:
+ raise ValueError('must define at least one ordering operation: < > <= >=')
+ root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
+ for opname, opfunc in convert[root]:
+ if opname not in roots:
+ opfunc.__name__ = opname
+ opfunc.__doc__ = getattr(int, opname).__doc__
+ setattr(cls, opname, opfunc)
+ return cls
diff --git a/contrib/python/future/future/backports/urllib/error.py b/contrib/python/future/future/backports/urllib/error.py
index 7945fcc4ca..a473e4453d 100644
--- a/contrib/python/future/future/backports/urllib/error.py
+++ b/contrib/python/future/future/backports/urllib/error.py
@@ -1,75 +1,75 @@
-"""Exception classes raised by urllib.
-
-The base exception class is URLError, which inherits from IOError. It
-doesn't define any behavior of its own, but is the base class for all
-exceptions defined in this package.
-
-HTTPError is an exception class that is also a valid HTTP response
-instance. It behaves this way because HTTP protocol errors are valid
-responses, with a status code, headers, and a body. In some contexts,
-an application may want to handle an exception like a regular
-response.
-"""
-from __future__ import absolute_import, division, unicode_literals
-from future import standard_library
-
-from future.backports.urllib import response as urllib_response
-
-
-__all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
-
-
-# do these error classes make sense?
-# make sure all of the IOError stuff is overridden. we just want to be
-# subtypes.
-
-class URLError(IOError):
- # URLError is a sub-type of IOError, but it doesn't share any of
- # the implementation. need to override __init__ and __str__.
- # It sets self.args for compatibility with other EnvironmentError
- # subclasses, but args doesn't have the typical format with errno in
- # slot 0 and strerror in slot 1. This may be better than nothing.
- def __init__(self, reason, filename=None):
- self.args = reason,
- self.reason = reason
- if filename is not None:
- self.filename = filename
-
- def __str__(self):
- return '<urlopen error %s>' % self.reason
-
-class HTTPError(URLError, urllib_response.addinfourl):
- """Raised when HTTP error occurs, but also acts like non-error return"""
- __super_init = urllib_response.addinfourl.__init__
-
- def __init__(self, url, code, msg, hdrs, fp):
- self.code = code
- self.msg = msg
- self.hdrs = hdrs
- self.fp = fp
- self.filename = url
- # The addinfourl classes depend on fp being a valid file
- # object. In some cases, the HTTPError may not have a valid
- # file object. If this happens, the simplest workaround is to
- # not initialize the base classes.
- if fp is not None:
- self.__super_init(fp, hdrs, url, code)
-
- def __str__(self):
- return 'HTTP Error %s: %s' % (self.code, self.msg)
-
- # since URLError specifies a .reason attribute, HTTPError should also
- # provide this attribute. See issue13211 for discussion.
- @property
- def reason(self):
- return self.msg
-
- def info(self):
- return self.hdrs
-
-
-# exception raised when downloaded size does not match content-length
-class ContentTooShortError(URLError):
- def __init__(self, message, content):
- URLError.__init__(self, message)
- self.content = content
+"""Exception classes raised by urllib.
+
+The base exception class is URLError, which inherits from IOError. It
+doesn't define any behavior of its own, but is the base class for all
+exceptions defined in this package.
+
+HTTPError is an exception class that is also a valid HTTP response
+instance. It behaves this way because HTTP protocol errors are valid
+responses, with a status code, headers, and a body. In some contexts,
+an application may want to handle an exception like a regular
+response.
+"""
+from __future__ import absolute_import, division, unicode_literals
+from future import standard_library
+
+from future.backports.urllib import response as urllib_response
+
+
+__all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
+
+
+# do these error classes make sense?
+# make sure all of the IOError stuff is overridden. we just want to be
+# subtypes.
+
+class URLError(IOError):
+ # URLError is a sub-type of IOError, but it doesn't share any of
+ # the implementation. need to override __init__ and __str__.
+ # It sets self.args for compatibility with other EnvironmentError
+ # subclasses, but args doesn't have the typical format with errno in
+ # slot 0 and strerror in slot 1. This may be better than nothing.
+ def __init__(self, reason, filename=None):
+ self.args = reason,
+ self.reason = reason
+ if filename is not None:
+ self.filename = filename
+
+ def __str__(self):
+ return '<urlopen error %s>' % self.reason
+
+class HTTPError(URLError, urllib_response.addinfourl):
+ """Raised when HTTP error occurs, but also acts like non-error return"""
+ __super_init = urllib_response.addinfourl.__init__
+
+ def __init__(self, url, code, msg, hdrs, fp):
+ self.code = code
+ self.msg = msg
+ self.hdrs = hdrs
+ self.fp = fp
+ self.filename = url
+ # The addinfourl classes depend on fp being a valid file
+ # object. In some cases, the HTTPError may not have a valid
+ # file object. If this happens, the simplest workaround is to
+ # not initialize the base classes.
+ if fp is not None:
+ self.__super_init(fp, hdrs, url, code)
+
+ def __str__(self):
+ return 'HTTP Error %s: %s' % (self.code, self.msg)
+
+ # since URLError specifies a .reason attribute, HTTPError should also
+ # provide this attribute. See issue13211 for discussion.
+ @property
+ def reason(self):
+ return self.msg
+
+ def info(self):
+ return self.hdrs
+
+
+# exception raised when downloaded size does not match content-length
+class ContentTooShortError(URLError):
+ def __init__(self, message, content):
+ URLError.__init__(self, message)
+ self.content = content
diff --git a/contrib/python/future/future/backports/urllib/parse.py b/contrib/python/future/future/backports/urllib/parse.py
index 367c3ca927..04e52d4925 100644
--- a/contrib/python/future/future/backports/urllib/parse.py
+++ b/contrib/python/future/future/backports/urllib/parse.py
@@ -1,991 +1,991 @@
-"""
-Ported using Python-Future from the Python 3.3 standard library.
-
-Parse (absolute and relative) URLs.
-
-urlparse module is based upon the following RFC specifications.
-
-RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
-and L. Masinter, January 2005.
-
-RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
-and L.Masinter, December 1999.
-
-RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
-Berners-Lee, R. Fielding, and L. Masinter, August 1998.
-
-RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
-
-RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
-1995.
-
-RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
-McCahill, December 1994
-
-RFC 3986 is considered the current standard and any future changes to
-urlparse module should conform with it. The urlparse module is
-currently not entirely compliant with this RFC due to defacto
-scenarios for parsing, and for backward compatibility purposes, some
-parsing quirks from older RFCs are retained. The testcases in
-test_urlparse.py provides a good indicator of parsing behavior.
-"""
-from __future__ import absolute_import, division, unicode_literals
-from future.builtins import bytes, chr, dict, int, range, str
-from future.utils import raise_with_traceback
-
-import re
-import sys
-import collections
-
-__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
- "urlsplit", "urlunsplit", "urlencode", "parse_qs",
- "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
- "unquote", "unquote_plus", "unquote_to_bytes"]
-
-# A classification of schemes ('' means apply by default)
-uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
- 'wais', 'file', 'https', 'shttp', 'mms',
- 'prospero', 'rtsp', 'rtspu', '', 'sftp',
- 'svn', 'svn+ssh']
-uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
- 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
- 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
- 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
-uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
- 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
- 'mms', '', 'sftp', 'tel']
-
-# These are not actually used anymore, but should stay for backwards
-# compatibility. (They are undocumented, but have a public-looking name.)
-non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
- 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
-uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
- 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
-uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
- 'nntp', 'wais', 'https', 'shttp', 'snews',
- 'file', 'prospero', '']
-
-# Characters valid in scheme names
-scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
- 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- '0123456789'
- '+-.')
-
-# XXX: Consider replacing with functools.lru_cache
-MAX_CACHE_SIZE = 20
-_parse_cache = {}
-
-def clear_cache():
- """Clear the parse cache and the quoters cache."""
- _parse_cache.clear()
- _safe_quoters.clear()
-
-
-# Helpers for bytes handling
-# For 3.2, we deliberately require applications that
-# handle improperly quoted URLs to do their own
-# decoding and encoding. If valid use cases are
-# presented, we may relax this by using latin-1
-# decoding internally for 3.3
-_implicit_encoding = 'ascii'
-_implicit_errors = 'strict'
-
-def _noop(obj):
- return obj
-
-def _encode_result(obj, encoding=_implicit_encoding,
- errors=_implicit_errors):
- return obj.encode(encoding, errors)
-
-def _decode_args(args, encoding=_implicit_encoding,
- errors=_implicit_errors):
- return tuple(x.decode(encoding, errors) if x else '' for x in args)
-
-def _coerce_args(*args):
- # Invokes decode if necessary to create str args
- # and returns the coerced inputs along with
- # an appropriate result coercion function
- # - noop for str inputs
- # - encoding function otherwise
- str_input = isinstance(args[0], str)
- for arg in args[1:]:
- # We special-case the empty string to support the
- # "scheme=''" default argument to some functions
- if arg and isinstance(arg, str) != str_input:
- raise TypeError("Cannot mix str and non-str arguments")
- if str_input:
- return args + (_noop,)
- return _decode_args(args) + (_encode_result,)
-
-# Result objects are more helpful than simple tuples
-class _ResultMixinStr(object):
- """Standard approach to encoding parsed results from str to bytes"""
- __slots__ = ()
-
- def encode(self, encoding='ascii', errors='strict'):
- return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
-
-
-class _ResultMixinBytes(object):
- """Standard approach to decoding parsed results from bytes to str"""
- __slots__ = ()
-
- def decode(self, encoding='ascii', errors='strict'):
- return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
-
-
-class _NetlocResultMixinBase(object):
- """Shared methods for the parsed result objects containing a netloc element"""
- __slots__ = ()
-
- @property
- def username(self):
- return self._userinfo[0]
-
- @property
- def password(self):
- return self._userinfo[1]
-
- @property
- def hostname(self):
- hostname = self._hostinfo[0]
- if not hostname:
- hostname = None
- elif hostname is not None:
- hostname = hostname.lower()
- return hostname
-
- @property
- def port(self):
- port = self._hostinfo[1]
- if port is not None:
- port = int(port, 10)
- # Return None on an illegal port
- if not ( 0 <= port <= 65535):
- return None
- return port
-
-
-class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
- __slots__ = ()
-
- @property
- def _userinfo(self):
- netloc = self.netloc
- userinfo, have_info, hostinfo = netloc.rpartition('@')
- if have_info:
- username, have_password, password = userinfo.partition(':')
- if not have_password:
- password = None
- else:
- username = password = None
- return username, password
-
- @property
- def _hostinfo(self):
- netloc = self.netloc
- _, _, hostinfo = netloc.rpartition('@')
- _, have_open_br, bracketed = hostinfo.partition('[')
- if have_open_br:
- hostname, _, port = bracketed.partition(']')
- _, have_port, port = port.partition(':')
- else:
- hostname, have_port, port = hostinfo.partition(':')
- if not have_port:
- port = None
- return hostname, port
-
-
-class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
- __slots__ = ()
-
- @property
- def _userinfo(self):
- netloc = self.netloc
- userinfo, have_info, hostinfo = netloc.rpartition(b'@')
- if have_info:
- username, have_password, password = userinfo.partition(b':')
- if not have_password:
- password = None
- else:
- username = password = None
- return username, password
-
- @property
- def _hostinfo(self):
- netloc = self.netloc
- _, _, hostinfo = netloc.rpartition(b'@')
- _, have_open_br, bracketed = hostinfo.partition(b'[')
- if have_open_br:
- hostname, _, port = bracketed.partition(b']')
- _, have_port, port = port.partition(b':')
- else:
- hostname, have_port, port = hostinfo.partition(b':')
- if not have_port:
- port = None
- return hostname, port
-
-
-from collections import namedtuple
-
-_DefragResultBase = namedtuple('DefragResult', 'url fragment')
-_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')
-_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')
-
-# For backwards compatibility, alias _NetlocResultMixinStr
-# ResultBase is no longer part of the documented API, but it is
-# retained since deprecating it isn't worth the hassle
-ResultBase = _NetlocResultMixinStr
-
-# Structured result objects for string data
-class DefragResult(_DefragResultBase, _ResultMixinStr):
- __slots__ = ()
- def geturl(self):
- if self.fragment:
- return self.url + '#' + self.fragment
- else:
- return self.url
-
-class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
- __slots__ = ()
- def geturl(self):
- return urlunsplit(self)
-
-class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
- __slots__ = ()
- def geturl(self):
- return urlunparse(self)
-
-# Structured result objects for bytes data
-class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
- __slots__ = ()
- def geturl(self):
- if self.fragment:
- return self.url + b'#' + self.fragment
- else:
- return self.url
-
-class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
- __slots__ = ()
- def geturl(self):
- return urlunsplit(self)
-
-class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
- __slots__ = ()
- def geturl(self):
- return urlunparse(self)
-
-# Set up the encode/decode result pairs
-def _fix_result_transcoding():
- _result_pairs = (
- (DefragResult, DefragResultBytes),
- (SplitResult, SplitResultBytes),
- (ParseResult, ParseResultBytes),
- )
- for _decoded, _encoded in _result_pairs:
- _decoded._encoded_counterpart = _encoded
- _encoded._decoded_counterpart = _decoded
-
-_fix_result_transcoding()
-del _fix_result_transcoding
-
-def urlparse(url, scheme='', allow_fragments=True):
- """Parse a URL into 6 components:
- <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
- Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
- Note that we don't break the components up in smaller bits
- (e.g. netloc is a single string) and we don't expand % escapes."""
- url, scheme, _coerce_result = _coerce_args(url, scheme)
- splitresult = urlsplit(url, scheme, allow_fragments)
- scheme, netloc, url, query, fragment = splitresult
- if scheme in uses_params and ';' in url:
- url, params = _splitparams(url)
- else:
- params = ''
- result = ParseResult(scheme, netloc, url, params, query, fragment)
- return _coerce_result(result)
-
-def _splitparams(url):
- if '/' in url:
- i = url.find(';', url.rfind('/'))
- if i < 0:
- return url, ''
- else:
- i = url.find(';')
- return url[:i], url[i+1:]
-
-def _splitnetloc(url, start=0):
- delim = len(url) # position of end of domain part of url, default is end
- for c in '/?#': # look for delimiters; the order is NOT important
- wdelim = url.find(c, start) # find first of this delim
- if wdelim >= 0: # if found
- delim = min(delim, wdelim) # use earliest delim position
- return url[start:delim], url[delim:] # return (domain, rest)
-
-def urlsplit(url, scheme='', allow_fragments=True):
- """Parse a URL into 5 components:
- <scheme>://<netloc>/<path>?<query>#<fragment>
- Return a 5-tuple: (scheme, netloc, path, query, fragment).
- Note that we don't break the components up in smaller bits
- (e.g. netloc is a single string) and we don't expand % escapes."""
- url, scheme, _coerce_result = _coerce_args(url, scheme)
- allow_fragments = bool(allow_fragments)
- key = url, scheme, allow_fragments, type(url), type(scheme)
- cached = _parse_cache.get(key, None)
- if cached:
- return _coerce_result(cached)
- if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
- clear_cache()
- netloc = query = fragment = ''
- i = url.find(':')
- if i > 0:
- if url[:i] == 'http': # optimize the common case
- scheme = url[:i].lower()
- url = url[i+1:]
- if url[:2] == '//':
- netloc, url = _splitnetloc(url, 2)
- if (('[' in netloc and ']' not in netloc) or
- (']' in netloc and '[' not in netloc)):
- raise ValueError("Invalid IPv6 URL")
- if allow_fragments and '#' in url:
- url, fragment = url.split('#', 1)
- if '?' in url:
- url, query = url.split('?', 1)
- v = SplitResult(scheme, netloc, url, query, fragment)
- _parse_cache[key] = v
- return _coerce_result(v)
- for c in url[:i]:
- if c not in scheme_chars:
- break
- else:
- # make sure "url" is not actually a port number (in which case
- # "scheme" is really part of the path)
- rest = url[i+1:]
- if not rest or any(c not in '0123456789' for c in rest):
- # not a port number
- scheme, url = url[:i].lower(), rest
-
- if url[:2] == '//':
- netloc, url = _splitnetloc(url, 2)
- if (('[' in netloc and ']' not in netloc) or
- (']' in netloc and '[' not in netloc)):
- raise ValueError("Invalid IPv6 URL")
- if allow_fragments and '#' in url:
- url, fragment = url.split('#', 1)
- if '?' in url:
- url, query = url.split('?', 1)
- v = SplitResult(scheme, netloc, url, query, fragment)
- _parse_cache[key] = v
- return _coerce_result(v)
-
-def urlunparse(components):
- """Put a parsed URL back together again. This may result in a
- slightly different, but equivalent URL, if the URL that was parsed
- originally had redundant delimiters, e.g. a ? with an empty query
- (the draft states that these are equivalent)."""
- scheme, netloc, url, params, query, fragment, _coerce_result = (
- _coerce_args(*components))
- if params:
- url = "%s;%s" % (url, params)
- return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
-
-def urlunsplit(components):
- """Combine the elements of a tuple as returned by urlsplit() into a
- complete URL as a string. The data argument can be any five-item iterable.
- This may result in a slightly different, but equivalent URL, if the URL that
- was parsed originally had unnecessary delimiters (for example, a ? with an
- empty query; the RFC states that these are equivalent)."""
- scheme, netloc, url, query, fragment, _coerce_result = (
- _coerce_args(*components))
- if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
- if url and url[:1] != '/': url = '/' + url
- url = '//' + (netloc or '') + url
- if scheme:
- url = scheme + ':' + url
- if query:
- url = url + '?' + query
- if fragment:
- url = url + '#' + fragment
- return _coerce_result(url)
-
-def urljoin(base, url, allow_fragments=True):
- """Join a base URL and a possibly relative URL to form an absolute
- interpretation of the latter."""
- if not base:
- return url
- if not url:
- return base
- base, url, _coerce_result = _coerce_args(base, url)
- bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
- urlparse(base, '', allow_fragments)
- scheme, netloc, path, params, query, fragment = \
- urlparse(url, bscheme, allow_fragments)
- if scheme != bscheme or scheme not in uses_relative:
- return _coerce_result(url)
- if scheme in uses_netloc:
- if netloc:
- return _coerce_result(urlunparse((scheme, netloc, path,
- params, query, fragment)))
- netloc = bnetloc
- if path[:1] == '/':
- return _coerce_result(urlunparse((scheme, netloc, path,
- params, query, fragment)))
- if not path and not params:
- path = bpath
- params = bparams
- if not query:
- query = bquery
- return _coerce_result(urlunparse((scheme, netloc, path,
- params, query, fragment)))
- segments = bpath.split('/')[:-1] + path.split('/')
- # XXX The stuff below is bogus in various ways...
- if segments[-1] == '.':
- segments[-1] = ''
- while '.' in segments:
- segments.remove('.')
- while 1:
- i = 1
- n = len(segments) - 1
- while i < n:
- if (segments[i] == '..'
- and segments[i-1] not in ('', '..')):
- del segments[i-1:i+1]
- break
- i = i+1
- else:
- break
- if segments == ['', '..']:
- segments[-1] = ''
- elif len(segments) >= 2 and segments[-1] == '..':
- segments[-2:] = ['']
- return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
- params, query, fragment)))
-
-def urldefrag(url):
- """Removes any existing fragment from URL.
-
- Returns a tuple of the defragmented URL and the fragment. If
- the URL contained no fragments, the second element is the
- empty string.
- """
- url, _coerce_result = _coerce_args(url)
- if '#' in url:
- s, n, p, a, q, frag = urlparse(url)
- defrag = urlunparse((s, n, p, a, q, ''))
- else:
- frag = ''
- defrag = url
- return _coerce_result(DefragResult(defrag, frag))
-
-_hexdig = '0123456789ABCDEFabcdef'
-_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)]))
- for a in _hexdig for b in _hexdig)
-
-def unquote_to_bytes(string):
- """unquote_to_bytes('abc%20def') -> b'abc def'."""
- # Note: strings are encoded as UTF-8. This is only an issue if it contains
- # unescaped non-ASCII characters, which URIs should not.
- if not string:
- # Is it a string-like object?
- string.split
- return bytes(b'')
- if isinstance(string, str):
- string = string.encode('utf-8')
- ### For Python-Future:
- # It is already a byte-string object, but force it to be newbytes here on
- # Py2:
- string = bytes(string)
- ###
- bits = string.split(b'%')
- if len(bits) == 1:
- return string
- res = [bits[0]]
- append = res.append
- for item in bits[1:]:
- try:
- append(_hextobyte[item[:2]])
- append(item[2:])
- except KeyError:
- append(b'%')
- append(item)
- return bytes(b'').join(res)
-
-_asciire = re.compile('([\x00-\x7f]+)')
-
-def unquote(string, encoding='utf-8', errors='replace'):
- """Replace %xx escapes by their single-character equivalent. The optional
- encoding and errors parameters specify how to decode percent-encoded
- sequences into Unicode characters, as accepted by the bytes.decode()
- method.
- By default, percent-encoded sequences are decoded with UTF-8, and invalid
- sequences are replaced by a placeholder character.
-
- unquote('abc%20def') -> 'abc def'.
- """
- if '%' not in string:
- string.split
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'replace'
- bits = _asciire.split(string)
- res = [bits[0]]
- append = res.append
- for i in range(1, len(bits), 2):
- append(unquote_to_bytes(bits[i]).decode(encoding, errors))
- append(bits[i + 1])
- return ''.join(res)
-
-def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- """Parse a query given as a string argument.
-
- Arguments:
-
- qs: percent-encoded query string to be parsed
-
- keep_blank_values: flag indicating whether blank values in
- percent-encoded queries should be treated as blank strings.
- A true value indicates that blanks should be retained as
- blank strings. The default false value indicates that
- blank values are to be ignored and treated as if they were
- not included.
-
- strict_parsing: flag indicating what to do with parsing errors.
- If false (the default), errors are silently ignored.
- If true, errors raise a ValueError exception.
-
- encoding and errors: specify how to decode percent-encoded sequences
- into Unicode characters, as accepted by the bytes.decode() method.
- """
- parsed_result = {}
- pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
-
-def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- """Parse a query given as a string argument.
-
- Arguments:
-
- qs: percent-encoded query string to be parsed
-
- keep_blank_values: flag indicating whether blank values in
- percent-encoded queries should be treated as blank strings. A
- true value indicates that blanks should be retained as blank
- strings. The default false value indicates that blank values
- are to be ignored and treated as if they were not included.
-
- strict_parsing: flag indicating what to do with parsing errors. If
- false (the default), errors are silently ignored. If true,
- errors raise a ValueError exception.
-
- encoding and errors: specify how to decode percent-encoded sequences
- into Unicode characters, as accepted by the bytes.decode() method.
-
- Returns a list, as G-d intended.
- """
- qs, _coerce_result = _coerce_args(qs)
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError("bad query field: %r" % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = unquote(name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = unquote(value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
-def unquote_plus(string, encoding='utf-8', errors='replace'):
- """Like unquote(), but also replace plus signs by spaces, as required for
- unquoting HTML form values.
-
- unquote_plus('%7e/abc+def') -> '~/abc def'
- """
- string = string.replace('+', ' ')
- return unquote(string, encoding, errors)
-
-_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- b'abcdefghijklmnopqrstuvwxyz'
- b'0123456789'
- b'_.-'))
-_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
-_safe_quoters = {}
-
-class Quoter(collections.defaultdict):
- """A mapping from bytes (in range(0,256)) to strings.
-
- String values are percent-encoded byte values, unless the key < 128, and
- in the "safe" set (either the specified safe set, or default set).
- """
- # Keeps a cache internally, using defaultdict, for efficiency (lookups
- # of cached keys don't call Python code at all).
- def __init__(self, safe):
- """safe: bytes object."""
- self.safe = _ALWAYS_SAFE.union(bytes(safe))
-
- def __repr__(self):
- # Without this, will just display as a defaultdict
- return "<Quoter %r>" % dict(self)
-
- def __missing__(self, b):
- # Handle a cache miss. Store quoted string in cache and return.
- res = chr(b) if b in self.safe else '%{0:02X}'.format(b)
- self[b] = res
- return res
-
-def quote(string, safe='/', encoding=None, errors=None):
- """quote('abc def') -> 'abc%20def'
-
- Each part of a URL, e.g. the path info, the query, etc., has a
- different set of reserved characters that must be quoted.
-
- RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
- the following reserved characters.
-
- reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
- "$" | ","
-
- Each of these characters is reserved in some component of a URL,
- but not necessarily in all of them.
-
- By default, the quote function is intended for quoting the path
- section of a URL. Thus, it will not encode '/'. This character
- is reserved, but in typical usage the quote function is being
- called on a path where the existing slash characters are used as
- reserved characters.
-
- string and safe may be either str or bytes objects. encoding must
- not be specified if string is a str.
-
- The optional encoding and errors parameters specify how to deal with
- non-ASCII characters, as accepted by the str.encode method.
- By default, encoding='utf-8' (characters are encoded with UTF-8), and
- errors='strict' (unsupported characters raise a UnicodeEncodeError).
- """
- if isinstance(string, str):
- if not string:
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'strict'
- string = string.encode(encoding, errors)
- else:
- if encoding is not None:
- raise TypeError("quote() doesn't support 'encoding' for bytes")
- if errors is not None:
- raise TypeError("quote() doesn't support 'errors' for bytes")
- return quote_from_bytes(string, safe)
-
-def quote_plus(string, safe='', encoding=None, errors=None):
- """Like quote(), but also replace ' ' with '+', as required for quoting
- HTML form values. Plus signs in the original string are escaped unless
- they are included in safe. It also does not have safe default to '/'.
- """
- # Check if ' ' in string, where string may either be a str or bytes. If
- # there are no spaces, the regular quote will produce the right answer.
- if ((isinstance(string, str) and ' ' not in string) or
- (isinstance(string, bytes) and b' ' not in string)):
- return quote(string, safe, encoding, errors)
- if isinstance(safe, str):
- space = str(' ')
- else:
- space = bytes(b' ')
- string = quote(string, safe + space, encoding, errors)
- return string.replace(' ', '+')
-
-def quote_from_bytes(bs, safe='/'):
- """Like quote(), but accepts a bytes object rather than a str, and does
- not perform string-to-bytes encoding. It always returns an ASCII string.
- quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'
- """
- if not isinstance(bs, (bytes, bytearray)):
- raise TypeError("quote_from_bytes() expected bytes")
- if not bs:
- return str('')
- ### For Python-Future:
- bs = bytes(bs)
+"""
+Ported using Python-Future from the Python 3.3 standard library.
+
+Parse (absolute and relative) URLs.
+
+urlparse module is based upon the following RFC specifications.
+
+RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
+and L. Masinter, January 2005.
+
+RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
+and L.Masinter, December 1999.
+
+RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
+Berners-Lee, R. Fielding, and L. Masinter, August 1998.
+
+RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
+
+RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
+1995.
+
+RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
+McCahill, December 1994
+
+RFC 3986 is considered the current standard and any future changes to
+urlparse module should conform with it. The urlparse module is
+currently not entirely compliant with this RFC due to defacto
+scenarios for parsing, and for backward compatibility purposes, some
+parsing quirks from older RFCs are retained. The testcases in
+test_urlparse.py provides a good indicator of parsing behavior.
+"""
+from __future__ import absolute_import, division, unicode_literals
+from future.builtins import bytes, chr, dict, int, range, str
+from future.utils import raise_with_traceback
+
+import re
+import sys
+import collections
+
+__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
+ "urlsplit", "urlunsplit", "urlencode", "parse_qs",
+ "parse_qsl", "quote", "quote_plus", "quote_from_bytes",
+ "unquote", "unquote_plus", "unquote_to_bytes"]
+
+# A classification of schemes ('' means apply by default)
+uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
+ 'wais', 'file', 'https', 'shttp', 'mms',
+ 'prospero', 'rtsp', 'rtspu', '', 'sftp',
+ 'svn', 'svn+ssh']
+uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
+ 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
+ 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
+ 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
+uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
+ 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
+ 'mms', '', 'sftp', 'tel']
+
+# These are not actually used anymore, but should stay for backwards
+# compatibility. (They are undocumented, but have a public-looking name.)
+non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
+ 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
+uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
+ 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
+uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
+ 'nntp', 'wais', 'https', 'shttp', 'snews',
+ 'file', 'prospero', '']
+
+# Characters valid in scheme names
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ '0123456789'
+ '+-.')
+
+# XXX: Consider replacing with functools.lru_cache
+MAX_CACHE_SIZE = 20
+_parse_cache = {}
+
+def clear_cache():
+ """Clear the parse cache and the quoters cache."""
+ _parse_cache.clear()
+ _safe_quoters.clear()
+
+
+# Helpers for bytes handling
+# For 3.2, we deliberately require applications that
+# handle improperly quoted URLs to do their own
+# decoding and encoding. If valid use cases are
+# presented, we may relax this by using latin-1
+# decoding internally for 3.3
+_implicit_encoding = 'ascii'
+_implicit_errors = 'strict'
+
+def _noop(obj):
+ return obj
+
+def _encode_result(obj, encoding=_implicit_encoding,
+ errors=_implicit_errors):
+ return obj.encode(encoding, errors)
+
+def _decode_args(args, encoding=_implicit_encoding,
+ errors=_implicit_errors):
+ return tuple(x.decode(encoding, errors) if x else '' for x in args)
+
+def _coerce_args(*args):
+ # Invokes decode if necessary to create str args
+ # and returns the coerced inputs along with
+ # an appropriate result coercion function
+ # - noop for str inputs
+ # - encoding function otherwise
+ str_input = isinstance(args[0], str)
+ for arg in args[1:]:
+ # We special-case the empty string to support the
+ # "scheme=''" default argument to some functions
+ if arg and isinstance(arg, str) != str_input:
+ raise TypeError("Cannot mix str and non-str arguments")
+ if str_input:
+ return args + (_noop,)
+ return _decode_args(args) + (_encode_result,)
+
+# Result objects are more helpful than simple tuples
+class _ResultMixinStr(object):
+ """Standard approach to encoding parsed results from str to bytes"""
+ __slots__ = ()
+
+ def encode(self, encoding='ascii', errors='strict'):
+ return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
+
+
+class _ResultMixinBytes(object):
+ """Standard approach to decoding parsed results from bytes to str"""
+ __slots__ = ()
+
+ def decode(self, encoding='ascii', errors='strict'):
+ return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
+
+
+class _NetlocResultMixinBase(object):
+ """Shared methods for the parsed result objects containing a netloc element"""
+ __slots__ = ()
+
+ @property
+ def username(self):
+ return self._userinfo[0]
+
+ @property
+ def password(self):
+ return self._userinfo[1]
+
+ @property
+ def hostname(self):
+ hostname = self._hostinfo[0]
+ if not hostname:
+ hostname = None
+ elif hostname is not None:
+ hostname = hostname.lower()
+ return hostname
+
+ @property
+ def port(self):
+ port = self._hostinfo[1]
+ if port is not None:
+ port = int(port, 10)
+ # Return None on an illegal port
+ if not ( 0 <= port <= 65535):
+ return None
+ return port
+
+
+class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
+ __slots__ = ()
+
+ @property
+ def _userinfo(self):
+ netloc = self.netloc
+ userinfo, have_info, hostinfo = netloc.rpartition('@')
+ if have_info:
+ username, have_password, password = userinfo.partition(':')
+ if not have_password:
+ password = None
+ else:
+ username = password = None
+ return username, password
+
+ @property
+ def _hostinfo(self):
+ netloc = self.netloc
+ _, _, hostinfo = netloc.rpartition('@')
+ _, have_open_br, bracketed = hostinfo.partition('[')
+ if have_open_br:
+ hostname, _, port = bracketed.partition(']')
+ _, have_port, port = port.partition(':')
+ else:
+ hostname, have_port, port = hostinfo.partition(':')
+ if not have_port:
+ port = None
+ return hostname, port
+
+
+class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
+ __slots__ = ()
+
+ @property
+ def _userinfo(self):
+ netloc = self.netloc
+ userinfo, have_info, hostinfo = netloc.rpartition(b'@')
+ if have_info:
+ username, have_password, password = userinfo.partition(b':')
+ if not have_password:
+ password = None
+ else:
+ username = password = None
+ return username, password
+
+ @property
+ def _hostinfo(self):
+ netloc = self.netloc
+ _, _, hostinfo = netloc.rpartition(b'@')
+ _, have_open_br, bracketed = hostinfo.partition(b'[')
+ if have_open_br:
+ hostname, _, port = bracketed.partition(b']')
+ _, have_port, port = port.partition(b':')
+ else:
+ hostname, have_port, port = hostinfo.partition(b':')
+ if not have_port:
+ port = None
+ return hostname, port
+
+
+from collections import namedtuple
+
+_DefragResultBase = namedtuple('DefragResult', 'url fragment')
+_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')
+_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')
+
+# For backwards compatibility, alias _NetlocResultMixinStr
+# ResultBase is no longer part of the documented API, but it is
+# retained since deprecating it isn't worth the hassle
+ResultBase = _NetlocResultMixinStr
+
+# Structured result objects for string data
+class DefragResult(_DefragResultBase, _ResultMixinStr):
+ __slots__ = ()
+ def geturl(self):
+ if self.fragment:
+ return self.url + '#' + self.fragment
+ else:
+ return self.url
+
+class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
+ __slots__ = ()
+ def geturl(self):
+ return urlunsplit(self)
+
+class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
+ __slots__ = ()
+ def geturl(self):
+ return urlunparse(self)
+
+# Structured result objects for bytes data
+class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
+ __slots__ = ()
+ def geturl(self):
+ if self.fragment:
+ return self.url + b'#' + self.fragment
+ else:
+ return self.url
+
+class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
+ __slots__ = ()
+ def geturl(self):
+ return urlunsplit(self)
+
+class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
+ __slots__ = ()
+ def geturl(self):
+ return urlunparse(self)
+
+# Set up the encode/decode result pairs
+def _fix_result_transcoding():
+ _result_pairs = (
+ (DefragResult, DefragResultBytes),
+ (SplitResult, SplitResultBytes),
+ (ParseResult, ParseResultBytes),
+ )
+ for _decoded, _encoded in _result_pairs:
+ _decoded._encoded_counterpart = _encoded
+ _encoded._decoded_counterpart = _decoded
+
+_fix_result_transcoding()
+del _fix_result_transcoding
+
+def urlparse(url, scheme='', allow_fragments=True):
+ """Parse a URL into 6 components:
+ <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+ Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
+ Note that we don't break the components up in smaller bits
+ (e.g. netloc is a single string) and we don't expand % escapes."""
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
+ splitresult = urlsplit(url, scheme, allow_fragments)
+ scheme, netloc, url, query, fragment = splitresult
+ if scheme in uses_params and ';' in url:
+ url, params = _splitparams(url)
+ else:
+ params = ''
+ result = ParseResult(scheme, netloc, url, params, query, fragment)
+ return _coerce_result(result)
+
+def _splitparams(url):
+ if '/' in url:
+ i = url.find(';', url.rfind('/'))
+ if i < 0:
+ return url, ''
+ else:
+ i = url.find(';')
+ return url[:i], url[i+1:]
+
+def _splitnetloc(url, start=0):
+ delim = len(url) # position of end of domain part of url, default is end
+ for c in '/?#': # look for delimiters; the order is NOT important
+ wdelim = url.find(c, start) # find first of this delim
+ if wdelim >= 0: # if found
+ delim = min(delim, wdelim) # use earliest delim position
+ return url[start:delim], url[delim:] # return (domain, rest)
+
+def urlsplit(url, scheme='', allow_fragments=True):
+ """Parse a URL into 5 components:
+ <scheme>://<netloc>/<path>?<query>#<fragment>
+ Return a 5-tuple: (scheme, netloc, path, query, fragment).
+ Note that we don't break the components up in smaller bits
+ (e.g. netloc is a single string) and we don't expand % escapes."""
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
+ allow_fragments = bool(allow_fragments)
+ key = url, scheme, allow_fragments, type(url), type(scheme)
+ cached = _parse_cache.get(key, None)
+ if cached:
+ return _coerce_result(cached)
+ if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
+ clear_cache()
+ netloc = query = fragment = ''
+ i = url.find(':')
+ if i > 0:
+ if url[:i] == 'http': # optimize the common case
+ scheme = url[:i].lower()
+ url = url[i+1:]
+ if url[:2] == '//':
+ netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
+ if allow_fragments and '#' in url:
+ url, fragment = url.split('#', 1)
+ if '?' in url:
+ url, query = url.split('?', 1)
+ v = SplitResult(scheme, netloc, url, query, fragment)
+ _parse_cache[key] = v
+ return _coerce_result(v)
+ for c in url[:i]:
+ if c not in scheme_chars:
+ break
+ else:
+ # make sure "url" is not actually a port number (in which case
+ # "scheme" is really part of the path)
+ rest = url[i+1:]
+ if not rest or any(c not in '0123456789' for c in rest):
+ # not a port number
+ scheme, url = url[:i].lower(), rest
+
+ if url[:2] == '//':
+ netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
+ if allow_fragments and '#' in url:
+ url, fragment = url.split('#', 1)
+ if '?' in url:
+ url, query = url.split('?', 1)
+ v = SplitResult(scheme, netloc, url, query, fragment)
+ _parse_cache[key] = v
+ return _coerce_result(v)
+
+def urlunparse(components):
+ """Put a parsed URL back together again. This may result in a
+ slightly different, but equivalent URL, if the URL that was parsed
+ originally had redundant delimiters, e.g. a ? with an empty query
+ (the draft states that these are equivalent)."""
+ scheme, netloc, url, params, query, fragment, _coerce_result = (
+ _coerce_args(*components))
+ if params:
+ url = "%s;%s" % (url, params)
+ return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
+
+def urlunsplit(components):
+ """Combine the elements of a tuple as returned by urlsplit() into a
+ complete URL as a string. The data argument can be any five-item iterable.
+ This may result in a slightly different, but equivalent URL, if the URL that
+ was parsed originally had unnecessary delimiters (for example, a ? with an
+ empty query; the RFC states that these are equivalent)."""
+ scheme, netloc, url, query, fragment, _coerce_result = (
+ _coerce_args(*components))
+ if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+ if url and url[:1] != '/': url = '/' + url
+ url = '//' + (netloc or '') + url
+ if scheme:
+ url = scheme + ':' + url
+ if query:
+ url = url + '?' + query
+ if fragment:
+ url = url + '#' + fragment
+ return _coerce_result(url)
+
+def urljoin(base, url, allow_fragments=True):
+ """Join a base URL and a possibly relative URL to form an absolute
+ interpretation of the latter."""
+ if not base:
+ return url
+ if not url:
+ return base
+ base, url, _coerce_result = _coerce_args(base, url)
+ bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
+ urlparse(base, '', allow_fragments)
+ scheme, netloc, path, params, query, fragment = \
+ urlparse(url, bscheme, allow_fragments)
+ if scheme != bscheme or scheme not in uses_relative:
+ return _coerce_result(url)
+ if scheme in uses_netloc:
+ if netloc:
+ return _coerce_result(urlunparse((scheme, netloc, path,
+ params, query, fragment)))
+ netloc = bnetloc
+ if path[:1] == '/':
+ return _coerce_result(urlunparse((scheme, netloc, path,
+ params, query, fragment)))
+ if not path and not params:
+ path = bpath
+ params = bparams
+ if not query:
+ query = bquery
+ return _coerce_result(urlunparse((scheme, netloc, path,
+ params, query, fragment)))
+ segments = bpath.split('/')[:-1] + path.split('/')
+ # XXX The stuff below is bogus in various ways...
+ if segments[-1] == '.':
+ segments[-1] = ''
+ while '.' in segments:
+ segments.remove('.')
+ while 1:
+ i = 1
+ n = len(segments) - 1
+ while i < n:
+ if (segments[i] == '..'
+ and segments[i-1] not in ('', '..')):
+ del segments[i-1:i+1]
+ break
+ i = i+1
+ else:
+ break
+ if segments == ['', '..']:
+ segments[-1] = ''
+ elif len(segments) >= 2 and segments[-1] == '..':
+ segments[-2:] = ['']
+ return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
+ params, query, fragment)))
+
+def urldefrag(url):
+ """Removes any existing fragment from URL.
+
+ Returns a tuple of the defragmented URL and the fragment. If
+ the URL contained no fragments, the second element is the
+ empty string.
+ """
+ url, _coerce_result = _coerce_args(url)
+ if '#' in url:
+ s, n, p, a, q, frag = urlparse(url)
+ defrag = urlunparse((s, n, p, a, q, ''))
+ else:
+ frag = ''
+ defrag = url
+ return _coerce_result(DefragResult(defrag, frag))
+
+_hexdig = '0123456789ABCDEFabcdef'
+_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)]))
+ for a in _hexdig for b in _hexdig)
+
+def unquote_to_bytes(string):
+ """unquote_to_bytes('abc%20def') -> b'abc def'."""
+ # Note: strings are encoded as UTF-8. This is only an issue if it contains
+ # unescaped non-ASCII characters, which URIs should not.
+ if not string:
+ # Is it a string-like object?
+ string.split
+ return bytes(b'')
+ if isinstance(string, str):
+ string = string.encode('utf-8')
+ ### For Python-Future:
+ # It is already a byte-string object, but force it to be newbytes here on
+ # Py2:
+ string = bytes(string)
###
- if isinstance(safe, str):
- # Normalize 'safe' by converting to bytes and removing non-ASCII chars
- safe = str(safe).encode('ascii', 'ignore')
- else:
- ### For Python-Future:
- safe = bytes(safe)
+ bits = string.split(b'%')
+ if len(bits) == 1:
+ return string
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
+ try:
+ append(_hextobyte[item[:2]])
+ append(item[2:])
+ except KeyError:
+ append(b'%')
+ append(item)
+ return bytes(b'').join(res)
+
+_asciire = re.compile('([\x00-\x7f]+)')
+
+def unquote(string, encoding='utf-8', errors='replace'):
+ """Replace %xx escapes by their single-character equivalent. The optional
+ encoding and errors parameters specify how to decode percent-encoded
+ sequences into Unicode characters, as accepted by the bytes.decode()
+ method.
+ By default, percent-encoded sequences are decoded with UTF-8, and invalid
+ sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'.
+ """
+ if '%' not in string:
+ string.split
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ bits = _asciire.split(string)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(unquote_to_bytes(bits[i]).decode(encoding, errors))
+ append(bits[i + 1])
+ return ''.join(res)
+
+def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ """Parse a query given as a string argument.
+
+ Arguments:
+
+ qs: percent-encoded query string to be parsed
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded queries should be treated as blank strings.
+ A true value indicates that blanks should be retained as
+ blank strings. The default false value indicates that
+ blank values are to be ignored and treated as if they were
+ not included.
+
+ strict_parsing: flag indicating what to do with parsing errors.
+ If false (the default), errors are silently ignored.
+ If true, errors raise a ValueError exception.
+
+ encoding and errors: specify how to decode percent-encoded sequences
+ into Unicode characters, as accepted by the bytes.decode() method.
+ """
+ parsed_result = {}
+ pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ """Parse a query given as a string argument.
+
+ Arguments:
+
+ qs: percent-encoded query string to be parsed
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded queries should be treated as blank strings. A
+ true value indicates that blanks should be retained as blank
+ strings. The default false value indicates that blank values
+ are to be ignored and treated as if they were not included.
+
+ strict_parsing: flag indicating what to do with parsing errors. If
+ false (the default), errors are silently ignored. If true,
+ errors raise a ValueError exception.
+
+ encoding and errors: specify how to decode percent-encoded sequences
+ into Unicode characters, as accepted by the bytes.decode() method.
+
+ Returns a list, as G-d intended.
+ """
+ qs, _coerce_result = _coerce_args(qs)
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError("bad query field: %r" % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = unquote(name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = unquote(value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+def unquote_plus(string, encoding='utf-8', errors='replace'):
+ """Like unquote(), but also replace plus signs by spaces, as required for
+ unquoting HTML form values.
+
+ unquote_plus('%7e/abc+def') -> '~/abc def'
+ """
+ string = string.replace('+', ' ')
+ return unquote(string, encoding, errors)
+
+_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ b'abcdefghijklmnopqrstuvwxyz'
+ b'0123456789'
+ b'_.-'))
+_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
+_safe_quoters = {}
+
+class Quoter(collections.defaultdict):
+ """A mapping from bytes (in range(0,256)) to strings.
+
+ String values are percent-encoded byte values, unless the key < 128, and
+ in the "safe" set (either the specified safe set, or default set).
+ """
+ # Keeps a cache internally, using defaultdict, for efficiency (lookups
+ # of cached keys don't call Python code at all).
+ def __init__(self, safe):
+ """safe: bytes object."""
+ self.safe = _ALWAYS_SAFE.union(bytes(safe))
+
+ def __repr__(self):
+ # Without this, will just display as a defaultdict
+ return "<Quoter %r>" % dict(self)
+
+ def __missing__(self, b):
+ # Handle a cache miss. Store quoted string in cache and return.
+ res = chr(b) if b in self.safe else '%{0:02X}'.format(b)
+ self[b] = res
+ return res
+
+def quote(string, safe='/', encoding=None, errors=None):
+ """quote('abc def') -> 'abc%20def'
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ By default, the quote function is intended for quoting the path
+ section of a URL. Thus, it will not encode '/'. This character
+ is reserved, but in typical usage the quote function is being
+ called on a path where the existing slash characters are used as
+ reserved characters.
+
+ string and safe may be either str or bytes objects. encoding must
+ not be specified if string is a str.
+
+ The optional encoding and errors parameters specify how to deal with
+ non-ASCII characters, as accepted by the str.encode method.
+ By default, encoding='utf-8' (characters are encoded with UTF-8), and
+ errors='strict' (unsupported characters raise a UnicodeEncodeError).
+ """
+ if isinstance(string, str):
+ if not string:
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'strict'
+ string = string.encode(encoding, errors)
+ else:
+ if encoding is not None:
+ raise TypeError("quote() doesn't support 'encoding' for bytes")
+ if errors is not None:
+ raise TypeError("quote() doesn't support 'errors' for bytes")
+ return quote_from_bytes(string, safe)
+
+def quote_plus(string, safe='', encoding=None, errors=None):
+ """Like quote(), but also replace ' ' with '+', as required for quoting
+ HTML form values. Plus signs in the original string are escaped unless
+ they are included in safe. It also does not have safe default to '/'.
+ """
+ # Check if ' ' in string, where string may either be a str or bytes. If
+ # there are no spaces, the regular quote will produce the right answer.
+ if ((isinstance(string, str) and ' ' not in string) or
+ (isinstance(string, bytes) and b' ' not in string)):
+ return quote(string, safe, encoding, errors)
+ if isinstance(safe, str):
+ space = str(' ')
+ else:
+ space = bytes(b' ')
+ string = quote(string, safe + space, encoding, errors)
+ return string.replace(' ', '+')
+
+def quote_from_bytes(bs, safe='/'):
+ """Like quote(), but accepts a bytes object rather than a str, and does
+ not perform string-to-bytes encoding. It always returns an ASCII string.
+ quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'
+ """
+ if not isinstance(bs, (bytes, bytearray)):
+ raise TypeError("quote_from_bytes() expected bytes")
+ if not bs:
+ return str('')
+ ### For Python-Future:
+ bs = bytes(bs)
+ ###
+ if isinstance(safe, str):
+ # Normalize 'safe' by converting to bytes and removing non-ASCII chars
+ safe = str(safe).encode('ascii', 'ignore')
+ else:
+ ### For Python-Future:
+ safe = bytes(safe)
###
- safe = bytes([c for c in safe if c < 128])
- if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
- return bs.decode()
- try:
- quoter = _safe_quoters[safe]
- except KeyError:
- _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
- return str('').join([quoter(char) for char in bs])
-
-def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
- """Encode a sequence of two-element tuples or dictionary into a URL query string.
-
- If any values in the query arg are sequences and doseq is true, each
- sequence element is converted to a separate parameter.
-
- If the query arg is a sequence of two-element tuples, the order of the
- parameters in the output will match the order of parameters in the
- input.
-
- The query arg may be either a string or a bytes type. When query arg is a
- string, the safe, encoding and error parameters are sent the quote_plus for
- encoding.
- """
-
- if hasattr(query, "items"):
- query = query.items()
- else:
- # It's a bother at times that strings and string-like objects are
- # sequences.
- try:
- # non-sequence items should not work with len()
- # non-empty strings will fail this
- if len(query) and not isinstance(query[0], tuple):
- raise TypeError
- # Zero-length sequences of all types will get here and succeed,
- # but that's a minor nit. Since the original implementation
- # allowed empty dicts that type of behavior probably should be
- # preserved for consistency
- except TypeError:
- ty, va, tb = sys.exc_info()
- raise_with_traceback(TypeError("not a valid non-string sequence "
- "or mapping object"), tb)
-
- l = []
- if not doseq:
- for k, v in query:
- if isinstance(k, bytes):
- k = quote_plus(k, safe)
- else:
- k = quote_plus(str(k), safe, encoding, errors)
-
- if isinstance(v, bytes):
- v = quote_plus(v, safe)
- else:
- v = quote_plus(str(v), safe, encoding, errors)
- l.append(k + '=' + v)
- else:
- for k, v in query:
- if isinstance(k, bytes):
- k = quote_plus(k, safe)
- else:
- k = quote_plus(str(k), safe, encoding, errors)
-
- if isinstance(v, bytes):
- v = quote_plus(v, safe)
- l.append(k + '=' + v)
- elif isinstance(v, str):
- v = quote_plus(v, safe, encoding, errors)
- l.append(k + '=' + v)
- else:
- try:
- # Is this a sufficient test for sequence-ness?
- x = len(v)
- except TypeError:
- # not a sequence
- v = quote_plus(str(v), safe, encoding, errors)
- l.append(k + '=' + v)
- else:
- # loop over the sequence
- for elt in v:
- if isinstance(elt, bytes):
- elt = quote_plus(elt, safe)
- else:
- elt = quote_plus(str(elt), safe, encoding, errors)
- l.append(k + '=' + elt)
- return str('&').join(l)
-
-# Utilities to parse URLs (most of these return None for missing parts):
-# unwrap('<URL:type://host/path>') --> 'type://host/path'
-# splittype('type:opaquestring') --> 'type', 'opaquestring'
-# splithost('//host[:port]/path') --> 'host[:port]', '/path'
-# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
-# splitpasswd('user:passwd') -> 'user', 'passwd'
-# splitport('host:port') --> 'host', 'port'
-# splitquery('/path?query') --> '/path', 'query'
-# splittag('/path#tag') --> '/path', 'tag'
-# splitattr('/path;attr1=value1;attr2=value2;...') ->
-# '/path', ['attr1=value1', 'attr2=value2', ...]
-# splitvalue('attr=value') --> 'attr', 'value'
-# urllib.parse.unquote('abc%20def') -> 'abc def'
-# quote('abc def') -> 'abc%20def')
-
-def to_bytes(url):
- """to_bytes(u"URL") --> 'URL'."""
- # Most URL schemes require ASCII. If that changes, the conversion
- # can be relaxed.
- # XXX get rid of to_bytes()
- if isinstance(url, str):
- try:
- url = url.encode("ASCII").decode()
- except UnicodeError:
- raise UnicodeError("URL " + repr(url) +
- " contains non-ASCII characters")
- return url
-
-def unwrap(url):
- """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
- url = str(url).strip()
- if url[:1] == '<' and url[-1:] == '>':
- url = url[1:-1].strip()
- if url[:4] == 'URL:': url = url[4:].strip()
- return url
-
-_typeprog = None
-def splittype(url):
- """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
- global _typeprog
- if _typeprog is None:
- import re
- _typeprog = re.compile('^([^/:]+):')
-
- match = _typeprog.match(url)
- if match:
- scheme = match.group(1)
- return scheme.lower(), url[len(scheme) + 1:]
- return None, url
-
-_hostprog = None
-def splithost(url):
- """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
- global _hostprog
- if _hostprog is None:
- import re
- _hostprog = re.compile('^//([^/?]*)(.*)$')
-
- match = _hostprog.match(url)
- if match:
- host_port = match.group(1)
- path = match.group(2)
- if path and not path.startswith('/'):
- path = '/' + path
- return host_port, path
- return None, url
-
-_userprog = None
-def splituser(host):
- """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
- global _userprog
- if _userprog is None:
- import re
- _userprog = re.compile('^(.*)@(.*)$')
-
- match = _userprog.match(host)
- if match: return match.group(1, 2)
- return None, host
-
-_passwdprog = None
-def splitpasswd(user):
- """splitpasswd('user:passwd') -> 'user', 'passwd'."""
- global _passwdprog
- if _passwdprog is None:
- import re
- _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
-
- match = _passwdprog.match(user)
- if match: return match.group(1, 2)
- return user, None
-
-# splittag('/path#tag') --> '/path', 'tag'
-_portprog = None
-def splitport(host):
- """splitport('host:port') --> 'host', 'port'."""
- global _portprog
- if _portprog is None:
- import re
- _portprog = re.compile('^(.*):([0-9]+)$')
-
- match = _portprog.match(host)
- if match: return match.group(1, 2)
- return host, None
-
-_nportprog = None
-def splitnport(host, defport=-1):
- """Split host and port, returning numeric port.
- Return given default port if no ':' found; defaults to -1.
- Return numerical port if a valid number are found after ':'.
- Return None if ':' but not a valid number."""
- global _nportprog
- if _nportprog is None:
- import re
- _nportprog = re.compile('^(.*):(.*)$')
-
- match = _nportprog.match(host)
- if match:
- host, port = match.group(1, 2)
- try:
- if not port: raise ValueError("no digits")
- nport = int(port)
- except ValueError:
- nport = None
- return host, nport
- return host, defport
-
-_queryprog = None
-def splitquery(url):
- """splitquery('/path?query') --> '/path', 'query'."""
- global _queryprog
- if _queryprog is None:
- import re
- _queryprog = re.compile('^(.*)\?([^?]*)$')
-
- match = _queryprog.match(url)
- if match: return match.group(1, 2)
- return url, None
-
-_tagprog = None
-def splittag(url):
- """splittag('/path#tag') --> '/path', 'tag'."""
- global _tagprog
- if _tagprog is None:
- import re
- _tagprog = re.compile('^(.*)#([^#]*)$')
-
- match = _tagprog.match(url)
- if match: return match.group(1, 2)
- return url, None
-
-def splitattr(url):
- """splitattr('/path;attr1=value1;attr2=value2;...') ->
- '/path', ['attr1=value1', 'attr2=value2', ...]."""
- words = url.split(';')
- return words[0], words[1:]
-
-_valueprog = None
-def splitvalue(attr):
- """splitvalue('attr=value') --> 'attr', 'value'."""
- global _valueprog
- if _valueprog is None:
- import re
- _valueprog = re.compile('^([^=]*)=(.*)$')
-
- match = _valueprog.match(attr)
- if match: return match.group(1, 2)
- return attr, None
+ safe = bytes([c for c in safe if c < 128])
+ if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
+ return bs.decode()
+ try:
+ quoter = _safe_quoters[safe]
+ except KeyError:
+ _safe_quoters[safe] = quoter = Quoter(safe).__getitem__
+ return str('').join([quoter(char) for char in bs])
+
+def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
+ """Encode a sequence of two-element tuples or dictionary into a URL query string.
+
+ If any values in the query arg are sequences and doseq is true, each
+ sequence element is converted to a separate parameter.
+
+ If the query arg is a sequence of two-element tuples, the order of the
+ parameters in the output will match the order of parameters in the
+ input.
+
+ The query arg may be either a string or a bytes type. When query arg is a
+ string, the safe, encoding and error parameters are sent the quote_plus for
+ encoding.
+ """
+
+ if hasattr(query, "items"):
+ query = query.items()
+ else:
+ # It's a bother at times that strings and string-like objects are
+ # sequences.
+ try:
+ # non-sequence items should not work with len()
+ # non-empty strings will fail this
+ if len(query) and not isinstance(query[0], tuple):
+ raise TypeError
+ # Zero-length sequences of all types will get here and succeed,
+ # but that's a minor nit. Since the original implementation
+ # allowed empty dicts that type of behavior probably should be
+ # preserved for consistency
+ except TypeError:
+ ty, va, tb = sys.exc_info()
+ raise_with_traceback(TypeError("not a valid non-string sequence "
+ "or mapping object"), tb)
+
+ l = []
+ if not doseq:
+ for k, v in query:
+ if isinstance(k, bytes):
+ k = quote_plus(k, safe)
+ else:
+ k = quote_plus(str(k), safe, encoding, errors)
+
+ if isinstance(v, bytes):
+ v = quote_plus(v, safe)
+ else:
+ v = quote_plus(str(v), safe, encoding, errors)
+ l.append(k + '=' + v)
+ else:
+ for k, v in query:
+ if isinstance(k, bytes):
+ k = quote_plus(k, safe)
+ else:
+ k = quote_plus(str(k), safe, encoding, errors)
+
+ if isinstance(v, bytes):
+ v = quote_plus(v, safe)
+ l.append(k + '=' + v)
+ elif isinstance(v, str):
+ v = quote_plus(v, safe, encoding, errors)
+ l.append(k + '=' + v)
+ else:
+ try:
+ # Is this a sufficient test for sequence-ness?
+ x = len(v)
+ except TypeError:
+ # not a sequence
+ v = quote_plus(str(v), safe, encoding, errors)
+ l.append(k + '=' + v)
+ else:
+ # loop over the sequence
+ for elt in v:
+ if isinstance(elt, bytes):
+ elt = quote_plus(elt, safe)
+ else:
+ elt = quote_plus(str(elt), safe, encoding, errors)
+ l.append(k + '=' + elt)
+ return str('&').join(l)
+
+# Utilities to parse URLs (most of these return None for missing parts):
+# unwrap('<URL:type://host/path>') --> 'type://host/path'
+# splittype('type:opaquestring') --> 'type', 'opaquestring'
+# splithost('//host[:port]/path') --> 'host[:port]', '/path'
+# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
+# splitpasswd('user:passwd') -> 'user', 'passwd'
+# splitport('host:port') --> 'host', 'port'
+# splitquery('/path?query') --> '/path', 'query'
+# splittag('/path#tag') --> '/path', 'tag'
+# splitattr('/path;attr1=value1;attr2=value2;...') ->
+# '/path', ['attr1=value1', 'attr2=value2', ...]
+# splitvalue('attr=value') --> 'attr', 'value'
+# urllib.parse.unquote('abc%20def') -> 'abc def'
+# quote('abc def') -> 'abc%20def')
+
+def to_bytes(url):
+ """to_bytes(u"URL") --> 'URL'."""
+ # Most URL schemes require ASCII. If that changes, the conversion
+ # can be relaxed.
+ # XXX get rid of to_bytes()
+ if isinstance(url, str):
+ try:
+ url = url.encode("ASCII").decode()
+ except UnicodeError:
+ raise UnicodeError("URL " + repr(url) +
+ " contains non-ASCII characters")
+ return url
+
+def unwrap(url):
+ """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
+ url = str(url).strip()
+ if url[:1] == '<' and url[-1:] == '>':
+ url = url[1:-1].strip()
+ if url[:4] == 'URL:': url = url[4:].strip()
+ return url
+
+_typeprog = None
+def splittype(url):
+ """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+ global _typeprog
+ if _typeprog is None:
+ import re
+ _typeprog = re.compile('^([^/:]+):')
+
+ match = _typeprog.match(url)
+ if match:
+ scheme = match.group(1)
+ return scheme.lower(), url[len(scheme) + 1:]
+ return None, url
+
+_hostprog = None
+def splithost(url):
+ """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+ global _hostprog
+ if _hostprog is None:
+ import re
+ _hostprog = re.compile('^//([^/?]*)(.*)$')
+
+ match = _hostprog.match(url)
+ if match:
+ host_port = match.group(1)
+ path = match.group(2)
+ if path and not path.startswith('/'):
+ path = '/' + path
+ return host_port, path
+ return None, url
+
+_userprog = None
+def splituser(host):
+ """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+ global _userprog
+ if _userprog is None:
+ import re
+ _userprog = re.compile('^(.*)@(.*)$')
+
+ match = _userprog.match(host)
+ if match: return match.group(1, 2)
+ return None, host
+
+_passwdprog = None
+def splitpasswd(user):
+ """splitpasswd('user:passwd') -> 'user', 'passwd'."""
+ global _passwdprog
+ if _passwdprog is None:
+ import re
+ _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
+
+ match = _passwdprog.match(user)
+ if match: return match.group(1, 2)
+ return user, None
+
+# splittag('/path#tag') --> '/path', 'tag'
+_portprog = None
+def splitport(host):
+ """splitport('host:port') --> 'host', 'port'."""
+ global _portprog
+ if _portprog is None:
+ import re
+ _portprog = re.compile('^(.*):([0-9]+)$')
+
+ match = _portprog.match(host)
+ if match: return match.group(1, 2)
+ return host, None
+
+_nportprog = None
+def splitnport(host, defport=-1):
+ """Split host and port, returning numeric port.
+ Return given default port if no ':' found; defaults to -1.
+ Return numerical port if a valid number are found after ':'.
+ Return None if ':' but not a valid number."""
+ global _nportprog
+ if _nportprog is None:
+ import re
+ _nportprog = re.compile('^(.*):(.*)$')
+
+ match = _nportprog.match(host)
+ if match:
+ host, port = match.group(1, 2)
+ try:
+ if not port: raise ValueError("no digits")
+ nport = int(port)
+ except ValueError:
+ nport = None
+ return host, nport
+ return host, defport
+
+_queryprog = None
+def splitquery(url):
+ """splitquery('/path?query') --> '/path', 'query'."""
+ global _queryprog
+ if _queryprog is None:
+ import re
+ _queryprog = re.compile('^(.*)\?([^?]*)$')
+
+ match = _queryprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+_tagprog = None
+def splittag(url):
+ """splittag('/path#tag') --> '/path', 'tag'."""
+ global _tagprog
+ if _tagprog is None:
+ import re
+ _tagprog = re.compile('^(.*)#([^#]*)$')
+
+ match = _tagprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+def splitattr(url):
+ """splitattr('/path;attr1=value1;attr2=value2;...') ->
+ '/path', ['attr1=value1', 'attr2=value2', ...]."""
+ words = url.split(';')
+ return words[0], words[1:]
+
+_valueprog = None
+def splitvalue(attr):
+ """splitvalue('attr=value') --> 'attr', 'value'."""
+ global _valueprog
+ if _valueprog is None:
+ import re
+ _valueprog = re.compile('^([^=]*)=(.*)$')
+
+ match = _valueprog.match(attr)
+ if match: return match.group(1, 2)
+ return attr, None
diff --git a/contrib/python/future/future/backports/urllib/request.py b/contrib/python/future/future/backports/urllib/request.py
index e270b390aa..baee5401aa 100644
--- a/contrib/python/future/future/backports/urllib/request.py
+++ b/contrib/python/future/future/backports/urllib/request.py
@@ -1,118 +1,118 @@
-"""
-Ported using Python-Future from the Python 3.3 standard library.
-
-An extensible library for opening URLs using a variety of protocols
-
-The simplest way to use this module is to call the urlopen function,
-which accepts a string containing a URL or a Request object (described
-below). It opens the URL and returns the results as file-like
-object; the returned object has some extra methods described below.
-
-The OpenerDirector manages a collection of Handler objects that do
-all the actual work. Each Handler implements a particular protocol or
-option. The OpenerDirector is a composite object that invokes the
-Handlers needed to open the requested URL. For example, the
-HTTPHandler performs HTTP GET and POST requests and deals with
-non-error returns. The HTTPRedirectHandler automatically deals with
-HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
-deals with digest authentication.
-
-urlopen(url, data=None) -- Basic usage is the same as original
-urllib. pass the url and optionally data to post to an HTTP URL, and
-get a file-like object back. One difference is that you can also pass
-a Request instance instead of URL. Raises a URLError (subclass of
-IOError); for HTTP errors, raises an HTTPError, which can also be
-treated as a valid response.
-
-build_opener -- Function that creates a new OpenerDirector instance.
-Will install the default handlers. Accepts one or more Handlers as
-arguments, either instances or Handler classes that it will
-instantiate. If one of the argument is a subclass of the default
-handler, the argument will be installed instead of the default.
-
-install_opener -- Installs a new opener as the default opener.
-
-objects of interest:
-
-OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
-the Handler classes, while dealing with requests and responses.
-
-Request -- An object that encapsulates the state of a request. The
-state can be as simple as the URL. It can also include extra HTTP
-headers, e.g. a User-Agent.
-
-BaseHandler --
-
-internals:
-BaseHandler and parent
-_call_chain conventions
-
-Example usage:
-
-import urllib.request
-
-# set up authentication info
-authinfo = urllib.request.HTTPBasicAuthHandler()
-authinfo.add_password(realm='PDQ Application',
- uri='https://mahler:8092/site-updates.py',
- user='klem',
- passwd='geheim$parole')
-
-proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
-
-# build a new opener that adds authentication and caching FTP handlers
-opener = urllib.request.build_opener(proxy_support, authinfo,
- urllib.request.CacheFTPHandler)
-
-# install it
-urllib.request.install_opener(opener)
-
-f = urllib.request.urlopen('http://www.python.org/')
-"""
-
-# XXX issues:
-# If an authentication error handler that tries to perform
-# authentication for some reason but fails, how should the error be
-# signalled? The client needs to know the HTTP error code. But if
-# the handler knows that the problem was, e.g., that it didn't know
-# that hash algo that requested in the challenge, it would be good to
-# pass that information along to the client, too.
-# ftp errors aren't handled cleanly
-# check digest against correct (i.e. non-apache) implementation
-
-# Possible extensions:
-# complex proxies XXX not sure what exactly was meant by this
-# abstract factory for opener
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-from future.builtins import bytes, dict, filter, input, int, map, open, str
-from future.utils import PY2, PY3, raise_with_traceback
-
-import base64
-import bisect
-import hashlib
-import array
-
-from future.backports import email
-from future.backports.http import client as http_client
-from .error import URLError, HTTPError, ContentTooShortError
-from .parse import (
- urlparse, urlsplit, urljoin, unwrap, quote, unquote,
- splittype, splithost, splitport, splituser, splitpasswd,
- splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
-from .response import addinfourl, addclosehook
-
-import io
-import os
-import posixpath
-import re
-import socket
-import sys
-import time
-import tempfile
-import contextlib
-import warnings
-
+"""
+Ported using Python-Future from the Python 3.3 standard library.
+
+An extensible library for opening URLs using a variety of protocols
+
+The simplest way to use this module is to call the urlopen function,
+which accepts a string containing a URL or a Request object (described
+below). It opens the URL and returns the results as file-like
+object; the returned object has some extra methods described below.
+
+The OpenerDirector manages a collection of Handler objects that do
+all the actual work. Each Handler implements a particular protocol or
+option. The OpenerDirector is a composite object that invokes the
+Handlers needed to open the requested URL. For example, the
+HTTPHandler performs HTTP GET and POST requests and deals with
+non-error returns. The HTTPRedirectHandler automatically deals with
+HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
+deals with digest authentication.
+
+urlopen(url, data=None) -- Basic usage is the same as original
+urllib. pass the url and optionally data to post to an HTTP URL, and
+get a file-like object back. One difference is that you can also pass
+a Request instance instead of URL. Raises a URLError (subclass of
+IOError); for HTTP errors, raises an HTTPError, which can also be
+treated as a valid response.
+
+build_opener -- Function that creates a new OpenerDirector instance.
+Will install the default handlers. Accepts one or more Handlers as
+arguments, either instances or Handler classes that it will
+instantiate. If one of the argument is a subclass of the default
+handler, the argument will be installed instead of the default.
+
+install_opener -- Installs a new opener as the default opener.
+
+objects of interest:
+
+OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
+the Handler classes, while dealing with requests and responses.
+
+Request -- An object that encapsulates the state of a request. The
+state can be as simple as the URL. It can also include extra HTTP
+headers, e.g. a User-Agent.
+
+BaseHandler --
+
+internals:
+BaseHandler and parent
+_call_chain conventions
+
+Example usage:
+
+import urllib.request
+
+# set up authentication info
+authinfo = urllib.request.HTTPBasicAuthHandler()
+authinfo.add_password(realm='PDQ Application',
+ uri='https://mahler:8092/site-updates.py',
+ user='klem',
+ passwd='geheim$parole')
+
+proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
+
+# build a new opener that adds authentication and caching FTP handlers
+opener = urllib.request.build_opener(proxy_support, authinfo,
+ urllib.request.CacheFTPHandler)
+
+# install it
+urllib.request.install_opener(opener)
+
+f = urllib.request.urlopen('http://www.python.org/')
+"""
+
+# XXX issues:
+# If an authentication error handler that tries to perform
+# authentication for some reason but fails, how should the error be
+# signalled? The client needs to know the HTTP error code. But if
+# the handler knows that the problem was, e.g., that it didn't know
+# that hash algo that requested in the challenge, it would be good to
+# pass that information along to the client, too.
+# ftp errors aren't handled cleanly
+# check digest against correct (i.e. non-apache) implementation
+
+# Possible extensions:
+# complex proxies XXX not sure what exactly was meant by this
+# abstract factory for opener
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+from future.builtins import bytes, dict, filter, input, int, map, open, str
+from future.utils import PY2, PY3, raise_with_traceback
+
+import base64
+import bisect
+import hashlib
+import array
+
+from future.backports import email
+from future.backports.http import client as http_client
+from .error import URLError, HTTPError, ContentTooShortError
+from .parse import (
+ urlparse, urlsplit, urljoin, unwrap, quote, unquote,
+ splittype, splithost, splitport, splituser, splitpasswd,
+ splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
+from .response import addinfourl, addclosehook
+
+import io
+import os
+import posixpath
+import re
+import socket
+import sys
+import time
+import tempfile
+import contextlib
+import warnings
+
from future.utils import PY2
if PY2:
@@ -120,2528 +120,2528 @@ if PY2:
else:
from collections.abc import Iterable
-# check for SSL
-try:
- import ssl
- # Not available in the SSL module in Py2:
- from ssl import SSLContext
-except ImportError:
- _have_ssl = False
-else:
- _have_ssl = True
-
-__all__ = [
- # Classes
- 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
- 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
- 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
- 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
- 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
- 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
- 'UnknownHandler', 'HTTPErrorProcessor',
- # Functions
- 'urlopen', 'install_opener', 'build_opener',
- 'pathname2url', 'url2pathname', 'getproxies',
- # Legacy interface
- 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
-]
-
-# used in User-Agent header sent
-__version__ = sys.version[:3]
-
-_opener = None
-def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs):
- if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault']
- else: cadefault = False
- if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath']
- else: capath = None
- if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile']
- else: cafile = None
- global _opener
- if cafile or capath or cadefault:
- if not _have_ssl:
- raise ValueError('SSL support not available')
- context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
- context.options |= ssl.OP_NO_SSLv2
- context.verify_mode = ssl.CERT_REQUIRED
- if cafile or capath:
- context.load_verify_locations(cafile, capath)
- else:
- context.set_default_verify_paths()
- https_handler = HTTPSHandler(context=context, check_hostname=True)
- opener = build_opener(https_handler)
- elif _opener is None:
- _opener = opener = build_opener()
- else:
- opener = _opener
- return opener.open(url, data, timeout)
-
-def install_opener(opener):
- global _opener
- _opener = opener
-
-_url_tempfiles = []
-def urlretrieve(url, filename=None, reporthook=None, data=None):
- """
- Retrieve a URL into a temporary location on disk.
-
- Requires a URL argument. If a filename is passed, it is used as
- the temporary file location. The reporthook argument should be
- a callable that accepts a block number, a read size, and the
- total file size of the URL target. The data argument should be
- valid URL encoded data.
-
- If a filename is passed and the URL points to a local resource,
- the result is a copy from local file to new file.
-
- Returns a tuple containing the path to the newly created
- data file as well as the resulting HTTPMessage object.
- """
- url_type, path = splittype(url)
-
- with contextlib.closing(urlopen(url, data)) as fp:
- headers = fp.info()
-
- # Just return the local path and the "headers" for file://
- # URLs. No sense in performing a copy unless requested.
- if url_type == "file" and not filename:
- return os.path.normpath(path), headers
-
- # Handle temporary file setup.
- if filename:
- tfp = open(filename, 'wb')
- else:
- tfp = tempfile.NamedTemporaryFile(delete=False)
- filename = tfp.name
- _url_tempfiles.append(filename)
-
- with tfp:
- result = filename, headers
- bs = 1024*8
- size = -1
- read = 0
- blocknum = 0
- if "content-length" in headers:
- size = int(headers["Content-Length"])
-
- if reporthook:
- reporthook(blocknum, bs, size)
-
- while True:
- block = fp.read(bs)
- if not block:
- break
- read += len(block)
- tfp.write(block)
- blocknum += 1
- if reporthook:
- reporthook(blocknum, bs, size)
-
- if size >= 0 and read < size:
- raise ContentTooShortError(
- "retrieval incomplete: got only %i out of %i bytes"
- % (read, size), result)
-
- return result
-
-def urlcleanup():
- for temp_file in _url_tempfiles:
- try:
- os.unlink(temp_file)
- except EnvironmentError:
- pass
-
- del _url_tempfiles[:]
- global _opener
- if _opener:
- _opener = None
-
-if PY3:
- _cut_port_re = re.compile(r":\d+$", re.ASCII)
-else:
- _cut_port_re = re.compile(r":\d+$")
-
-def request_host(request):
-
- """Return request-host, as defined by RFC 2965.
-
- Variation from RFC: returned value is lowercased, for convenient
- comparison.
-
- """
- url = request.full_url
- host = urlparse(url)[1]
- if host == "":
- host = request.get_header("Host", "")
-
- # remove port, if present
- host = _cut_port_re.sub("", host, 1)
- return host.lower()
-
-class Request(object):
-
- def __init__(self, url, data=None, headers={},
- origin_req_host=None, unverifiable=False,
- method=None):
- # unwrap('<URL:type://host/path>') --> 'type://host/path'
- self.full_url = unwrap(url)
- self.full_url, self.fragment = splittag(self.full_url)
- self.data = data
- self.headers = {}
- self._tunnel_host = None
- for key, value in headers.items():
- self.add_header(key, value)
- self.unredirected_hdrs = {}
- if origin_req_host is None:
- origin_req_host = request_host(self)
- self.origin_req_host = origin_req_host
- self.unverifiable = unverifiable
- self.method = method
- self._parse()
-
- def _parse(self):
- self.type, rest = splittype(self.full_url)
- if self.type is None:
- raise ValueError("unknown url type: %r" % self.full_url)
- self.host, self.selector = splithost(rest)
- if self.host:
- self.host = unquote(self.host)
-
- def get_method(self):
- """Return a string indicating the HTTP request method."""
- if self.method is not None:
- return self.method
- elif self.data is not None:
- return "POST"
- else:
- return "GET"
-
- def get_full_url(self):
- if self.fragment:
- return '%s#%s' % (self.full_url, self.fragment)
- else:
- return self.full_url
-
- # Begin deprecated methods
-
- def add_data(self, data):
- msg = "Request.add_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- self.data = data
-
- def has_data(self):
- msg = "Request.has_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data is not None
-
- def get_data(self):
- msg = "Request.get_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data
-
- def get_type(self):
- msg = "Request.get_type method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.type
-
- def get_host(self):
- msg = "Request.get_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.host
-
- def get_selector(self):
- msg = "Request.get_selector method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.selector
-
- def is_unverifiable(self):
- msg = "Request.is_unverifiable method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.unverifiable
-
- def get_origin_req_host(self):
- msg = "Request.get_origin_req_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.origin_req_host
-
- # End deprecated methods
-
- def set_proxy(self, host, type):
- if self.type == 'https' and not self._tunnel_host:
- self._tunnel_host = self.host
- else:
- self.type= type
- self.selector = self.full_url
- self.host = host
-
- def has_proxy(self):
- return self.selector == self.full_url
-
- def add_header(self, key, val):
- # useful for something like authentication
- self.headers[key.capitalize()] = val
-
- def add_unredirected_header(self, key, val):
- # will not be added to a redirected request
- self.unredirected_hdrs[key.capitalize()] = val
-
- def has_header(self, header_name):
- return (header_name in self.headers or
- header_name in self.unredirected_hdrs)
-
- def get_header(self, header_name, default=None):
- return self.headers.get(
- header_name,
- self.unredirected_hdrs.get(header_name, default))
-
- def header_items(self):
- hdrs = self.unredirected_hdrs.copy()
- hdrs.update(self.headers)
- return list(hdrs.items())
-
-class OpenerDirector(object):
- def __init__(self):
- client_version = "Python-urllib/%s" % __version__
- self.addheaders = [('User-agent', client_version)]
- # self.handlers is retained only for backward compatibility
- self.handlers = []
- # manage the individual handlers
- self.handle_open = {}
- self.handle_error = {}
- self.process_response = {}
- self.process_request = {}
-
- def add_handler(self, handler):
- if not hasattr(handler, "add_parent"):
- raise TypeError("expected BaseHandler instance, got %r" %
- type(handler))
-
- added = False
- for meth in dir(handler):
- if meth in ["redirect_request", "do_open", "proxy_open"]:
- # oops, coincidental match
- continue
-
- i = meth.find("_")
- protocol = meth[:i]
- condition = meth[i+1:]
-
- if condition.startswith("error"):
- j = condition.find("_") + i + 1
- kind = meth[j+1:]
- try:
- kind = int(kind)
- except ValueError:
- pass
- lookup = self.handle_error.get(protocol, {})
- self.handle_error[protocol] = lookup
- elif condition == "open":
- kind = protocol
- lookup = self.handle_open
- elif condition == "response":
- kind = protocol
- lookup = self.process_response
- elif condition == "request":
- kind = protocol
- lookup = self.process_request
- else:
- continue
-
- handlers = lookup.setdefault(kind, [])
- if handlers:
- bisect.insort(handlers, handler)
- else:
- handlers.append(handler)
- added = True
-
- if added:
- bisect.insort(self.handlers, handler)
- handler.add_parent(self)
-
- def close(self):
- # Only exists for backwards compatibility.
- pass
-
- def _call_chain(self, chain, kind, meth_name, *args):
- # Handlers raise an exception if no one else should try to handle
- # the request, or return None if they can't but another handler
- # could. Otherwise, they return the response.
- handlers = chain.get(kind, ())
- for handler in handlers:
- func = getattr(handler, meth_name)
- result = func(*args)
- if result is not None:
- return result
-
- def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- """
- Accept a URL or a Request object
-
- Python-Future: if the URL is passed as a byte-string, decode it first.
- """
- if isinstance(fullurl, bytes):
- fullurl = fullurl.decode()
- if isinstance(fullurl, str):
- req = Request(fullurl, data)
- else:
- req = fullurl
- if data is not None:
- req.data = data
-
- req.timeout = timeout
- protocol = req.type
-
- # pre-process request
- meth_name = protocol+"_request"
- for processor in self.process_request.get(protocol, []):
- meth = getattr(processor, meth_name)
- req = meth(req)
-
- response = self._open(req, data)
-
- # post-process response
- meth_name = protocol+"_response"
- for processor in self.process_response.get(protocol, []):
- meth = getattr(processor, meth_name)
- response = meth(req, response)
-
- return response
-
- def _open(self, req, data=None):
- result = self._call_chain(self.handle_open, 'default',
- 'default_open', req)
- if result:
- return result
-
- protocol = req.type
- result = self._call_chain(self.handle_open, protocol, protocol +
- '_open', req)
- if result:
- return result
-
- return self._call_chain(self.handle_open, 'unknown',
- 'unknown_open', req)
-
- def error(self, proto, *args):
- if proto in ('http', 'https'):
- # XXX http[s] protocols are special-cased
- dict = self.handle_error['http'] # https is not different than http
- proto = args[2] # YUCK!
- meth_name = 'http_error_%s' % proto
- http_err = 1
- orig_args = args
- else:
- dict = self.handle_error
- meth_name = proto + '_error'
- http_err = 0
- args = (dict, proto, meth_name) + args
- result = self._call_chain(*args)
- if result:
- return result
-
- if http_err:
- args = (dict, 'default', 'http_error_default') + orig_args
- return self._call_chain(*args)
-
-# XXX probably also want an abstract factory that knows when it makes
-# sense to skip a superclass in favor of a subclass and when it might
-# make sense to include both
-
-def build_opener(*handlers):
- """Create an opener object from a list of handlers.
-
- The opener will use several default handlers, including support
- for HTTP, FTP and when applicable HTTPS.
-
- If any of the handlers passed as arguments are subclasses of the
- default handlers, the default handlers will not be used.
- """
- def isclass(obj):
- return isinstance(obj, type) or hasattr(obj, "__bases__")
-
- opener = OpenerDirector()
- default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
- HTTPDefaultErrorHandler, HTTPRedirectHandler,
- FTPHandler, FileHandler, HTTPErrorProcessor]
- if hasattr(http_client, "HTTPSConnection"):
- default_classes.append(HTTPSHandler)
- skip = set()
- for klass in default_classes:
- for check in handlers:
- if isclass(check):
- if issubclass(check, klass):
- skip.add(klass)
- elif isinstance(check, klass):
- skip.add(klass)
- for klass in skip:
- default_classes.remove(klass)
-
- for klass in default_classes:
- opener.add_handler(klass())
-
- for h in handlers:
- if isclass(h):
- h = h()
- opener.add_handler(h)
- return opener
-
-class BaseHandler(object):
- handler_order = 500
-
- def add_parent(self, parent):
- self.parent = parent
-
- def close(self):
- # Only exists for backwards compatibility
- pass
-
- def __lt__(self, other):
- if not hasattr(other, "handler_order"):
- # Try to preserve the old behavior of having custom classes
- # inserted after default ones (works only for custom user
- # classes which are not aware of handler_order).
- return True
- return self.handler_order < other.handler_order
-
-
-class HTTPErrorProcessor(BaseHandler):
- """Process HTTP error responses."""
- handler_order = 1000 # after all other processing
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- # According to RFC 2616, "2xx" code indicates that the client's
- # request was successfully received, understood, and accepted.
- if not (200 <= code < 300):
- response = self.parent.error(
- 'http', request, response, code, msg, hdrs)
-
- return response
-
- https_response = http_response
-
-class HTTPDefaultErrorHandler(BaseHandler):
- def http_error_default(self, req, fp, code, msg, hdrs):
- raise HTTPError(req.full_url, code, msg, hdrs, fp)
-
-class HTTPRedirectHandler(BaseHandler):
- # maximum number of redirections to any single URL
- # this is needed because of the state that cookies introduce
- max_repeats = 4
- # maximum total number of redirections (regardless of URL) before
- # assuming we're in a loop
- max_redirections = 10
-
- def redirect_request(self, req, fp, code, msg, headers, newurl):
- """Return a Request or None in response to a redirect.
-
- This is called by the http_error_30x methods when a
- redirection response is received. If a redirection should
- take place, return a new Request to allow http_error_30x to
- perform the redirect. Otherwise, raise HTTPError if no-one
- else should try to handle this url. Return None if you can't
- but another Handler might.
- """
- m = req.get_method()
- if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
- or code in (301, 302, 303) and m == "POST")):
- raise HTTPError(req.full_url, code, msg, headers, fp)
-
- # Strictly (according to RFC 2616), 301 or 302 in response to
- # a POST MUST NOT cause a redirection without confirmation
- # from the user (of urllib.request, in this case). In practice,
- # essentially all clients do redirect in this case, so we do
- # the same.
- # be conciliant with URIs containing a space
- newurl = newurl.replace(' ', '%20')
- CONTENT_HEADERS = ("content-length", "content-type")
- newheaders = dict((k, v) for k, v in req.headers.items()
- if k.lower() not in CONTENT_HEADERS)
- return Request(newurl,
- headers=newheaders,
- origin_req_host=req.origin_req_host,
- unverifiable=True)
-
- # Implementation note: To avoid the server sending us into an
- # infinite loop, the request object needs to track what URLs we
- # have already seen. Do this by adding a handler-specific
- # attribute to the Request object.
- def http_error_302(self, req, fp, code, msg, headers):
- # Some servers (incorrectly) return multiple Location headers
- # (so probably same goes for URI). Use first header.
- if "location" in headers:
- newurl = headers["location"]
- elif "uri" in headers:
- newurl = headers["uri"]
- else:
- return
-
- # fix a possible malformed URL
- urlparts = urlparse(newurl)
-
- # For security reasons we don't allow redirection to anything other
- # than http, https or ftp.
-
- if urlparts.scheme not in ('http', 'https', 'ftp', ''):
- raise HTTPError(
- newurl, code,
- "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
- headers, fp)
-
- if not urlparts.path:
- urlparts = list(urlparts)
- urlparts[2] = "/"
- newurl = urlunparse(urlparts)
-
- newurl = urljoin(req.full_url, newurl)
-
- # XXX Probably want to forget about the state of the current
- # request, although that might interact poorly with other
- # handlers that also use handler-specific request attributes
- new = self.redirect_request(req, fp, code, msg, headers, newurl)
- if new is None:
- return
-
- # loop detection
- # .redirect_dict has a key url if url was previously visited.
- if hasattr(req, 'redirect_dict'):
- visited = new.redirect_dict = req.redirect_dict
- if (visited.get(newurl, 0) >= self.max_repeats or
- len(visited) >= self.max_redirections):
- raise HTTPError(req.full_url, code,
- self.inf_msg + msg, headers, fp)
- else:
- visited = new.redirect_dict = req.redirect_dict = {}
- visited[newurl] = visited.get(newurl, 0) + 1
-
- # Don't close the fp until we are sure that we won't use it
- # with HTTPError.
- fp.read()
- fp.close()
-
- return self.parent.open(new, timeout=req.timeout)
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
- inf_msg = "The HTTP server returned a redirect error that would " \
- "lead to an infinite loop.\n" \
- "The last 30x error message was:\n"
-
-
-def _parse_proxy(proxy):
- """Return (scheme, user, password, host/port) given a URL or an authority.
-
- If a URL is supplied, it must have an authority (host:port) component.
- According to RFC 3986, having an authority component means the URL must
- have two slashes after the scheme:
-
- >>> _parse_proxy('file:/ftp.example.com/')
- Traceback (most recent call last):
- ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
-
- The first three items of the returned tuple may be None.
-
- Examples of authority parsing:
-
- >>> _parse_proxy('proxy.example.com')
- (None, None, None, 'proxy.example.com')
- >>> _parse_proxy('proxy.example.com:3128')
- (None, None, None, 'proxy.example.com:3128')
-
- The authority component may optionally include userinfo (assumed to be
- username:password):
-
- >>> _parse_proxy('joe:password@proxy.example.com')
- (None, 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('joe:password@proxy.example.com:3128')
- (None, 'joe', 'password', 'proxy.example.com:3128')
-
- Same examples, but with URLs instead:
-
- >>> _parse_proxy('http://proxy.example.com/')
- ('http', None, None, 'proxy.example.com')
- >>> _parse_proxy('http://proxy.example.com:3128/')
- ('http', None, None, 'proxy.example.com:3128')
- >>> _parse_proxy('http://joe:password@proxy.example.com/')
- ('http', 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
- ('http', 'joe', 'password', 'proxy.example.com:3128')
-
- Everything after the authority is ignored:
-
- >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
- ('ftp', 'joe', 'password', 'proxy.example.com')
-
- Test for no trailing '/' case:
-
- >>> _parse_proxy('http://joe:password@proxy.example.com')
- ('http', 'joe', 'password', 'proxy.example.com')
-
- """
- scheme, r_scheme = splittype(proxy)
- if not r_scheme.startswith("/"):
- # authority
- scheme = None
- authority = proxy
- else:
- # URL
- if not r_scheme.startswith("//"):
- raise ValueError("proxy URL with no authority: %r" % proxy)
- # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
- # and 3.3.), path is empty or starts with '/'
- end = r_scheme.find("/", 2)
- if end == -1:
- end = None
- authority = r_scheme[2:end]
- userinfo, hostport = splituser(authority)
- if userinfo is not None:
- user, password = splitpasswd(userinfo)
- else:
- user = password = None
- return scheme, user, password, hostport
-
-class ProxyHandler(BaseHandler):
- # Proxies must be in front
- handler_order = 100
-
- def __init__(self, proxies=None):
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'keys'), "proxies must be a mapping"
- self.proxies = proxies
- for type, url in proxies.items():
- setattr(self, '%s_open' % type,
- lambda r, proxy=url, type=type, meth=self.proxy_open:
- meth(r, proxy, type))
-
- def proxy_open(self, req, proxy, type):
- orig_type = req.type
- proxy_type, user, password, hostport = _parse_proxy(proxy)
- if proxy_type is None:
- proxy_type = orig_type
-
- if req.host and proxy_bypass(req.host):
- return None
-
- if user and password:
- user_pass = '%s:%s' % (unquote(user),
- unquote(password))
- creds = base64.b64encode(user_pass.encode()).decode("ascii")
- req.add_header('Proxy-authorization', 'Basic ' + creds)
- hostport = unquote(hostport)
- req.set_proxy(hostport, proxy_type)
- if orig_type == proxy_type or orig_type == 'https':
- # let other handlers take care of it
- return None
- else:
- # need to start over, because the other handlers don't
- # grok the proxy's URL type
- # e.g. if we have a constructor arg proxies like so:
- # {'http': 'ftp://proxy.example.com'}, we may end up turning
- # a request for http://acme.example.com/a into one for
- # ftp://proxy.example.com/a
- return self.parent.open(req, timeout=req.timeout)
-
-class HTTPPasswordMgr(object):
-
- def __init__(self):
- self.passwd = {}
-
- def add_password(self, realm, uri, user, passwd):
- # uri could be a single URI or a sequence
- if isinstance(uri, str):
- uri = [uri]
- if realm not in self.passwd:
- self.passwd[realm] = {}
- for default_port in True, False:
- reduced_uri = tuple(
- [self.reduce_uri(u, default_port) for u in uri])
- self.passwd[realm][reduced_uri] = (user, passwd)
-
- def find_user_password(self, realm, authuri):
- domains = self.passwd.get(realm, {})
- for default_port in True, False:
- reduced_authuri = self.reduce_uri(authuri, default_port)
- for uris, authinfo in domains.items():
- for uri in uris:
- if self.is_suburi(uri, reduced_authuri):
- return authinfo
- return None, None
-
- def reduce_uri(self, uri, default_port=True):
- """Accept authority or URI and extract only the authority and path."""
- # note HTTP URLs do not have a userinfo component
- parts = urlsplit(uri)
- if parts[1]:
- # URI
- scheme = parts[0]
- authority = parts[1]
- path = parts[2] or '/'
- else:
- # host or host:port
- scheme = None
- authority = uri
- path = '/'
- host, port = splitport(authority)
- if default_port and port is None and scheme is not None:
- dport = {"http": 80,
- "https": 443,
- }.get(scheme)
- if dport is not None:
- authority = "%s:%d" % (host, dport)
- return authority, path
-
- def is_suburi(self, base, test):
- """Check if test is below base in a URI tree
-
- Both args must be URIs in reduced form.
- """
- if base == test:
- return True
- if base[0] != test[0]:
- return False
- common = posixpath.commonprefix((base[1], test[1]))
- if len(common) == len(base[1]):
- return True
- return False
-
-
-class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
-
- def find_user_password(self, realm, authuri):
- user, password = HTTPPasswordMgr.find_user_password(self, realm,
- authuri)
- if user is not None:
- return user, password
- return HTTPPasswordMgr.find_user_password(self, None, authuri)
-
-
-class AbstractBasicAuthHandler(object):
-
- # XXX this allows for multiple auth-schemes, but will stupidly pick
- # the last one with a realm specified.
-
- # allow for double- and single-quoted realm values
- # (single quotes are a violation of the RFC, but appear in the wild)
- rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\']?)([^"\']*)\\2', re.I)
-
- # XXX could pre-emptively send auth info already accepted (RFC 2617,
- # end of section 2, and section 1.2 immediately after "credentials"
- # production).
-
- def __init__(self, password_mgr=None):
- if password_mgr is None:
- password_mgr = HTTPPasswordMgr()
- self.passwd = password_mgr
- self.add_password = self.passwd.add_password
- self.retried = 0
-
- def reset_retry_count(self):
- self.retried = 0
-
- def http_error_auth_reqed(self, authreq, host, req, headers):
- # host may be an authority (without userinfo) or a URL with an
- # authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
-
- if self.retried > 5:
- # retry sending the username:password 5 times before failing.
- raise HTTPError(req.get_full_url(), 401, "basic auth failed",
- headers, None)
- else:
- self.retried += 1
-
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() != 'basic':
- raise ValueError("AbstractBasicAuthHandler does not"
- " support the following scheme: '%s'" %
- scheme)
- else:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if quote not in ['"',"'"]:
- warnings.warn("Basic Auth Realm was unquoted",
- UserWarning, 2)
- if scheme.lower() == 'basic':
- response = self.retry_http_basic_auth(host, req, realm)
- if response and response.code != 401:
- self.retried = 0
- return response
-
- def retry_http_basic_auth(self, host, req, realm):
- user, pw = self.passwd.find_user_password(realm, host)
- if pw is not None:
- raw = "%s:%s" % (user, pw)
- auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
- if req.headers.get(self.auth_header, None) == auth:
- return None
- req.add_unredirected_header(self.auth_header, auth)
- return self.parent.open(req, timeout=req.timeout)
- else:
- return None
-
-
-class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Authorization'
-
- def http_error_401(self, req, fp, code, msg, headers):
- url = req.full_url
- response = self.http_error_auth_reqed('www-authenticate',
- url, req, headers)
- self.reset_retry_count()
- return response
-
-
-class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Proxy-authorization'
-
- def http_error_407(self, req, fp, code, msg, headers):
- # http_error_auth_reqed requires that there is no userinfo component in
- # authority. Assume there isn't one, since urllib.request does not (and
- # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
- # userinfo.
- authority = req.host
- response = self.http_error_auth_reqed('proxy-authenticate',
- authority, req, headers)
- self.reset_retry_count()
- return response
-
-
-# Return n random bytes.
-_randombytes = os.urandom
-
-
-class AbstractDigestAuthHandler(object):
- # Digest authentication is specified in RFC 2617.
-
- # XXX The client does not inspect the Authentication-Info header
- # in a successful response.
-
- # XXX It should be possible to test this implementation against
- # a mock server that just generates a static set of challenges.
-
- # XXX qop="auth-int" supports is shaky
-
- def __init__(self, passwd=None):
- if passwd is None:
- passwd = HTTPPasswordMgr()
- self.passwd = passwd
- self.add_password = self.passwd.add_password
- self.retried = 0
- self.nonce_count = 0
- self.last_nonce = None
-
- def reset_retry_count(self):
- self.retried = 0
-
- def http_error_auth_reqed(self, auth_header, host, req, headers):
- authreq = headers.get(auth_header, None)
- if self.retried > 5:
- # Don't fail endlessly - if we failed once, we'll probably
- # fail a second time. Hm. Unless the Password Manager is
- # prompting for the information. Crap. This isn't great
- # but it's better than the current 'repeat until recursion
- # depth exceeded' approach <wink>
- raise HTTPError(req.full_url, 401, "digest auth failed",
- headers, None)
- else:
- self.retried += 1
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() == 'digest':
- return self.retry_http_digest_auth(req, authreq)
- elif scheme.lower() != 'basic':
- raise ValueError("AbstractDigestAuthHandler does not support"
- " the following scheme: '%s'" % scheme)
-
- def retry_http_digest_auth(self, req, auth):
- token, challenge = auth.split(' ', 1)
- chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
- auth = self.get_authorization(req, chal)
- if auth:
- auth_val = 'Digest %s' % auth
- if req.headers.get(self.auth_header, None) == auth_val:
- return None
- req.add_unredirected_header(self.auth_header, auth_val)
- resp = self.parent.open(req, timeout=req.timeout)
- return resp
-
- def get_cnonce(self, nonce):
- # The cnonce-value is an opaque
- # quoted string value provided by the client and used by both client
- # and server to avoid chosen plaintext attacks, to provide mutual
- # authentication, and to provide some message integrity protection.
- # This isn't a fabulous effort, but it's probably Good Enough.
- s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
- b = s.encode("ascii") + _randombytes(8)
- dig = hashlib.sha1(b).hexdigest()
- return dig[:16]
-
- def get_authorization(self, req, chal):
- try:
- realm = chal['realm']
- nonce = chal['nonce']
- qop = chal.get('qop')
- algorithm = chal.get('algorithm', 'MD5')
- # mod_digest doesn't send an opaque, even though it isn't
- # supposed to be optional
- opaque = chal.get('opaque', None)
- except KeyError:
- return None
-
- H, KD = self.get_algorithm_impls(algorithm)
- if H is None:
- return None
-
- user, pw = self.passwd.find_user_password(realm, req.full_url)
- if user is None:
- return None
-
- # XXX not implemented yet
- if req.data is not None:
- entdig = self.get_entity_digest(req.data, chal)
- else:
- entdig = None
-
- A1 = "%s:%s:%s" % (user, realm, pw)
- A2 = "%s:%s" % (req.get_method(),
- # XXX selector: what about proxies and full urls
- req.selector)
- if qop == 'auth':
- if nonce == self.last_nonce:
- self.nonce_count += 1
- else:
- self.nonce_count = 1
- self.last_nonce = nonce
- ncvalue = '%08x' % self.nonce_count
- cnonce = self.get_cnonce(nonce)
- noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
- respdig = KD(H(A1), noncebit)
- elif qop is None:
- respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
- else:
- # XXX handle auth-int.
- raise URLError("qop '%s' is not supported." % qop)
-
- # XXX should the partial digests be encoded too?
-
- base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
- 'response="%s"' % (user, realm, nonce, req.selector,
- respdig)
- if opaque:
- base += ', opaque="%s"' % opaque
- if entdig:
- base += ', digest="%s"' % entdig
- base += ', algorithm="%s"' % algorithm
- if qop:
- base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
- return base
-
- def get_algorithm_impls(self, algorithm):
- # lambdas assume digest modules are imported at the top level
- if algorithm == 'MD5':
- H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
- elif algorithm == 'SHA':
- H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
- # XXX MD5-sess
- KD = lambda s, d: H("%s:%s" % (s, d))
- return H, KD
-
- def get_entity_digest(self, data, chal):
- # XXX not implemented yet
- return None
-
-
-class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
- """An authentication protocol defined by RFC 2069
-
- Digest authentication improves on basic authentication because it
- does not transmit passwords in the clear.
- """
-
- auth_header = 'Authorization'
- handler_order = 490 # before Basic auth
-
- def http_error_401(self, req, fp, code, msg, headers):
- host = urlparse(req.full_url)[1]
- retry = self.http_error_auth_reqed('www-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-
-class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
-
- auth_header = 'Proxy-Authorization'
- handler_order = 490 # before Basic auth
-
- def http_error_407(self, req, fp, code, msg, headers):
- host = req.host
- retry = self.http_error_auth_reqed('proxy-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-class AbstractHTTPHandler(BaseHandler):
-
- def __init__(self, debuglevel=0):
- self._debuglevel = debuglevel
-
- def set_http_debuglevel(self, level):
- self._debuglevel = level
-
- def do_request_(self, request):
- host = request.host
- if not host:
- raise URLError('no host given')
-
- if request.data is not None: # POST
- data = request.data
- if isinstance(data, str):
- msg = "POST data should be bytes or an iterable of bytes. " \
- "It cannot be of type str."
- raise TypeError(msg)
- if not request.has_header('Content-type'):
- request.add_unredirected_header(
- 'Content-type',
- 'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
- size = None
- try:
- ### For Python-Future:
- if PY2 and isinstance(data, array.array):
- # memoryviews of arrays aren't supported
- # in Py2.7. (e.g. memoryview(array.array('I',
- # [1, 2, 3, 4])) raises a TypeError.)
- # So we calculate the size manually instead:
- size = len(data) * data.itemsize
- ###
- else:
- mv = memoryview(data)
- size = len(mv) * mv.itemsize
- except TypeError:
+# check for SSL
+try:
+ import ssl
+ # Not available in the SSL module in Py2:
+ from ssl import SSLContext
+except ImportError:
+ _have_ssl = False
+else:
+ _have_ssl = True
+
+__all__ = [
+ # Classes
+ 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
+ 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
+ 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
+ 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
+ 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
+ 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+ 'UnknownHandler', 'HTTPErrorProcessor',
+ # Functions
+ 'urlopen', 'install_opener', 'build_opener',
+ 'pathname2url', 'url2pathname', 'getproxies',
+ # Legacy interface
+ 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
+]
+
+# used in User-Agent header sent
+__version__ = sys.version[:3]
+
+_opener = None
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs):
+ if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault']
+ else: cadefault = False
+ if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath']
+ else: capath = None
+ if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile']
+ else: cafile = None
+ global _opener
+ if cafile or capath or cadefault:
+ if not _have_ssl:
+ raise ValueError('SSL support not available')
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ context.verify_mode = ssl.CERT_REQUIRED
+ if cafile or capath:
+ context.load_verify_locations(cafile, capath)
+ else:
+ context.set_default_verify_paths()
+ https_handler = HTTPSHandler(context=context, check_hostname=True)
+ opener = build_opener(https_handler)
+ elif _opener is None:
+ _opener = opener = build_opener()
+ else:
+ opener = _opener
+ return opener.open(url, data, timeout)
+
+def install_opener(opener):
+ global _opener
+ _opener = opener
+
+_url_tempfiles = []
+def urlretrieve(url, filename=None, reporthook=None, data=None):
+ """
+ Retrieve a URL into a temporary location on disk.
+
+ Requires a URL argument. If a filename is passed, it is used as
+ the temporary file location. The reporthook argument should be
+ a callable that accepts a block number, a read size, and the
+ total file size of the URL target. The data argument should be
+ valid URL encoded data.
+
+ If a filename is passed and the URL points to a local resource,
+ the result is a copy from local file to new file.
+
+ Returns a tuple containing the path to the newly created
+ data file as well as the resulting HTTPMessage object.
+ """
+ url_type, path = splittype(url)
+
+ with contextlib.closing(urlopen(url, data)) as fp:
+ headers = fp.info()
+
+ # Just return the local path and the "headers" for file://
+ # URLs. No sense in performing a copy unless requested.
+ if url_type == "file" and not filename:
+ return os.path.normpath(path), headers
+
+ # Handle temporary file setup.
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ tfp = tempfile.NamedTemporaryFile(delete=False)
+ filename = tfp.name
+ _url_tempfiles.append(filename)
+
+ with tfp:
+ result = filename, headers
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+
+ if reporthook:
+ reporthook(blocknum, bs, size)
+
+ while True:
+ block = fp.read(bs)
+ if not block:
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, bs, size)
+
+ if size >= 0 and read < size:
+ raise ContentTooShortError(
+ "retrieval incomplete: got only %i out of %i bytes"
+ % (read, size), result)
+
+ return result
+
+def urlcleanup():
+ for temp_file in _url_tempfiles:
+ try:
+ os.unlink(temp_file)
+ except EnvironmentError:
+ pass
+
+ del _url_tempfiles[:]
+ global _opener
+ if _opener:
+ _opener = None
+
+if PY3:
+ _cut_port_re = re.compile(r":\d+$", re.ASCII)
+else:
+ _cut_port_re = re.compile(r":\d+$")
+
+def request_host(request):
+
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.full_url
+ host = urlparse(url)[1]
+ if host == "":
+ host = request.get_header("Host", "")
+
+ # remove port, if present
+ host = _cut_port_re.sub("", host, 1)
+ return host.lower()
+
+class Request(object):
+
+ def __init__(self, url, data=None, headers={},
+ origin_req_host=None, unverifiable=False,
+ method=None):
+ # unwrap('<URL:type://host/path>') --> 'type://host/path'
+ self.full_url = unwrap(url)
+ self.full_url, self.fragment = splittag(self.full_url)
+ self.data = data
+ self.headers = {}
+ self._tunnel_host = None
+ for key, value in headers.items():
+ self.add_header(key, value)
+ self.unredirected_hdrs = {}
+ if origin_req_host is None:
+ origin_req_host = request_host(self)
+ self.origin_req_host = origin_req_host
+ self.unverifiable = unverifiable
+ self.method = method
+ self._parse()
+
+ def _parse(self):
+ self.type, rest = splittype(self.full_url)
+ if self.type is None:
+ raise ValueError("unknown url type: %r" % self.full_url)
+ self.host, self.selector = splithost(rest)
+ if self.host:
+ self.host = unquote(self.host)
+
+ def get_method(self):
+ """Return a string indicating the HTTP request method."""
+ if self.method is not None:
+ return self.method
+ elif self.data is not None:
+ return "POST"
+ else:
+ return "GET"
+
+ def get_full_url(self):
+ if self.fragment:
+ return '%s#%s' % (self.full_url, self.fragment)
+ else:
+ return self.full_url
+
+ # Begin deprecated methods
+
+ def add_data(self, data):
+ msg = "Request.add_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ self.data = data
+
+ def has_data(self):
+ msg = "Request.has_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.data is not None
+
+ def get_data(self):
+ msg = "Request.get_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.data
+
+ def get_type(self):
+ msg = "Request.get_type method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.type
+
+ def get_host(self):
+ msg = "Request.get_host method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.host
+
+ def get_selector(self):
+ msg = "Request.get_selector method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.selector
+
+ def is_unverifiable(self):
+ msg = "Request.is_unverifiable method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.unverifiable
+
+ def get_origin_req_host(self):
+ msg = "Request.get_origin_req_host method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
+ return self.origin_req_host
+
+ # End deprecated methods
+
+ def set_proxy(self, host, type):
+ if self.type == 'https' and not self._tunnel_host:
+ self._tunnel_host = self.host
+ else:
+ self.type= type
+ self.selector = self.full_url
+ self.host = host
+
+ def has_proxy(self):
+ return self.selector == self.full_url
+
+ def add_header(self, key, val):
+ # useful for something like authentication
+ self.headers[key.capitalize()] = val
+
+ def add_unredirected_header(self, key, val):
+ # will not be added to a redirected request
+ self.unredirected_hdrs[key.capitalize()] = val
+
+ def has_header(self, header_name):
+ return (header_name in self.headers or
+ header_name in self.unredirected_hdrs)
+
+ def get_header(self, header_name, default=None):
+ return self.headers.get(
+ header_name,
+ self.unredirected_hdrs.get(header_name, default))
+
+ def header_items(self):
+ hdrs = self.unredirected_hdrs.copy()
+ hdrs.update(self.headers)
+ return list(hdrs.items())
+
+class OpenerDirector(object):
+ def __init__(self):
+ client_version = "Python-urllib/%s" % __version__
+ self.addheaders = [('User-agent', client_version)]
+ # self.handlers is retained only for backward compatibility
+ self.handlers = []
+ # manage the individual handlers
+ self.handle_open = {}
+ self.handle_error = {}
+ self.process_response = {}
+ self.process_request = {}
+
+ def add_handler(self, handler):
+ if not hasattr(handler, "add_parent"):
+ raise TypeError("expected BaseHandler instance, got %r" %
+ type(handler))
+
+ added = False
+ for meth in dir(handler):
+ if meth in ["redirect_request", "do_open", "proxy_open"]:
+ # oops, coincidental match
+ continue
+
+ i = meth.find("_")
+ protocol = meth[:i]
+ condition = meth[i+1:]
+
+ if condition.startswith("error"):
+ j = condition.find("_") + i + 1
+ kind = meth[j+1:]
+ try:
+ kind = int(kind)
+ except ValueError:
+ pass
+ lookup = self.handle_error.get(protocol, {})
+ self.handle_error[protocol] = lookup
+ elif condition == "open":
+ kind = protocol
+ lookup = self.handle_open
+ elif condition == "response":
+ kind = protocol
+ lookup = self.process_response
+ elif condition == "request":
+ kind = protocol
+ lookup = self.process_request
+ else:
+ continue
+
+ handlers = lookup.setdefault(kind, [])
+ if handlers:
+ bisect.insort(handlers, handler)
+ else:
+ handlers.append(handler)
+ added = True
+
+ if added:
+ bisect.insort(self.handlers, handler)
+ handler.add_parent(self)
+
+ def close(self):
+ # Only exists for backwards compatibility.
+ pass
+
+ def _call_chain(self, chain, kind, meth_name, *args):
+ # Handlers raise an exception if no one else should try to handle
+ # the request, or return None if they can't but another handler
+ # could. Otherwise, they return the response.
+ handlers = chain.get(kind, ())
+ for handler in handlers:
+ func = getattr(handler, meth_name)
+ result = func(*args)
+ if result is not None:
+ return result
+
+ def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ """
+ Accept a URL or a Request object
+
+ Python-Future: if the URL is passed as a byte-string, decode it first.
+ """
+ if isinstance(fullurl, bytes):
+ fullurl = fullurl.decode()
+ if isinstance(fullurl, str):
+ req = Request(fullurl, data)
+ else:
+ req = fullurl
+ if data is not None:
+ req.data = data
+
+ req.timeout = timeout
+ protocol = req.type
+
+ # pre-process request
+ meth_name = protocol+"_request"
+ for processor in self.process_request.get(protocol, []):
+ meth = getattr(processor, meth_name)
+ req = meth(req)
+
+ response = self._open(req, data)
+
+ # post-process response
+ meth_name = protocol+"_response"
+ for processor in self.process_response.get(protocol, []):
+ meth = getattr(processor, meth_name)
+ response = meth(req, response)
+
+ return response
+
+ def _open(self, req, data=None):
+ result = self._call_chain(self.handle_open, 'default',
+ 'default_open', req)
+ if result:
+ return result
+
+ protocol = req.type
+ result = self._call_chain(self.handle_open, protocol, protocol +
+ '_open', req)
+ if result:
+ return result
+
+ return self._call_chain(self.handle_open, 'unknown',
+ 'unknown_open', req)
+
+ def error(self, proto, *args):
+ if proto in ('http', 'https'):
+ # XXX http[s] protocols are special-cased
+ dict = self.handle_error['http'] # https is not different than http
+ proto = args[2] # YUCK!
+ meth_name = 'http_error_%s' % proto
+ http_err = 1
+ orig_args = args
+ else:
+ dict = self.handle_error
+ meth_name = proto + '_error'
+ http_err = 0
+ args = (dict, proto, meth_name) + args
+ result = self._call_chain(*args)
+ if result:
+ return result
+
+ if http_err:
+ args = (dict, 'default', 'http_error_default') + orig_args
+ return self._call_chain(*args)
+
+# XXX probably also want an abstract factory that knows when it makes
+# sense to skip a superclass in favor of a subclass and when it might
+# make sense to include both
+
+def build_opener(*handlers):
+ """Create an opener object from a list of handlers.
+
+ The opener will use several default handlers, including support
+ for HTTP, FTP and when applicable HTTPS.
+
+ If any of the handlers passed as arguments are subclasses of the
+ default handlers, the default handlers will not be used.
+ """
+ def isclass(obj):
+ return isinstance(obj, type) or hasattr(obj, "__bases__")
+
+ opener = OpenerDirector()
+ default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
+ HTTPDefaultErrorHandler, HTTPRedirectHandler,
+ FTPHandler, FileHandler, HTTPErrorProcessor]
+ if hasattr(http_client, "HTTPSConnection"):
+ default_classes.append(HTTPSHandler)
+ skip = set()
+ for klass in default_classes:
+ for check in handlers:
+ if isclass(check):
+ if issubclass(check, klass):
+ skip.add(klass)
+ elif isinstance(check, klass):
+ skip.add(klass)
+ for klass in skip:
+ default_classes.remove(klass)
+
+ for klass in default_classes:
+ opener.add_handler(klass())
+
+ for h in handlers:
+ if isclass(h):
+ h = h()
+ opener.add_handler(h)
+ return opener
+
+class BaseHandler(object):
+ handler_order = 500
+
+ def add_parent(self, parent):
+ self.parent = parent
+
+ def close(self):
+ # Only exists for backwards compatibility
+ pass
+
+ def __lt__(self, other):
+ if not hasattr(other, "handler_order"):
+ # Try to preserve the old behavior of having custom classes
+ # inserted after default ones (works only for custom user
+ # classes which are not aware of handler_order).
+ return True
+ return self.handler_order < other.handler_order
+
+
+class HTTPErrorProcessor(BaseHandler):
+ """Process HTTP error responses."""
+ handler_order = 1000 # after all other processing
+
+ def http_response(self, request, response):
+ code, msg, hdrs = response.code, response.msg, response.info()
+
+ # According to RFC 2616, "2xx" code indicates that the client's
+ # request was successfully received, understood, and accepted.
+ if not (200 <= code < 300):
+ response = self.parent.error(
+ 'http', request, response, code, msg, hdrs)
+
+ return response
+
+ https_response = http_response
+
+class HTTPDefaultErrorHandler(BaseHandler):
+ def http_error_default(self, req, fp, code, msg, hdrs):
+ raise HTTPError(req.full_url, code, msg, hdrs, fp)
+
+class HTTPRedirectHandler(BaseHandler):
+ # maximum number of redirections to any single URL
+ # this is needed because of the state that cookies introduce
+ max_repeats = 4
+ # maximum total number of redirections (regardless of URL) before
+ # assuming we're in a loop
+ max_redirections = 10
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a
+ redirection response is received. If a redirection should
+ take place, return a new Request to allow http_error_30x to
+ perform the redirect. Otherwise, raise HTTPError if no-one
+ else should try to handle this url. Return None if you can't
+ but another Handler might.
+ """
+ m = req.get_method()
+ if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+ or code in (301, 302, 303) and m == "POST")):
+ raise HTTPError(req.full_url, code, msg, headers, fp)
+
+ # Strictly (according to RFC 2616), 301 or 302 in response to
+ # a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib.request, in this case). In practice,
+ # essentially all clients do redirect in this case, so we do
+ # the same.
+ # be conciliant with URIs containing a space
+ newurl = newurl.replace(' ', '%20')
+ CONTENT_HEADERS = ("content-length", "content-type")
+ newheaders = dict((k, v) for k, v in req.headers.items()
+ if k.lower() not in CONTENT_HEADERS)
+ return Request(newurl,
+ headers=newheaders,
+ origin_req_host=req.origin_req_host,
+ unverifiable=True)
+
+ # Implementation note: To avoid the server sending us into an
+ # infinite loop, the request object needs to track what URLs we
+ # have already seen. Do this by adding a handler-specific
+ # attribute to the Request object.
+ def http_error_302(self, req, fp, code, msg, headers):
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
+ if "location" in headers:
+ newurl = headers["location"]
+ elif "uri" in headers:
+ newurl = headers["uri"]
+ else:
+ return
+
+ # fix a possible malformed URL
+ urlparts = urlparse(newurl)
+
+ # For security reasons we don't allow redirection to anything other
+ # than http, https or ftp.
+
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
+ raise HTTPError(
+ newurl, code,
+ "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
+ headers, fp)
+
+ if not urlparts.path:
+ urlparts = list(urlparts)
+ urlparts[2] = "/"
+ newurl = urlunparse(urlparts)
+
+ newurl = urljoin(req.full_url, newurl)
+
+ # XXX Probably want to forget about the state of the current
+ # request, although that might interact poorly with other
+ # handlers that also use handler-specific request attributes
+ new = self.redirect_request(req, fp, code, msg, headers, newurl)
+ if new is None:
+ return
+
+ # loop detection
+ # .redirect_dict has a key url if url was previously visited.
+ if hasattr(req, 'redirect_dict'):
+ visited = new.redirect_dict = req.redirect_dict
+ if (visited.get(newurl, 0) >= self.max_repeats or
+ len(visited) >= self.max_redirections):
+ raise HTTPError(req.full_url, code,
+ self.inf_msg + msg, headers, fp)
+ else:
+ visited = new.redirect_dict = req.redirect_dict = {}
+ visited[newurl] = visited.get(newurl, 0) + 1
+
+ # Don't close the fp until we are sure that we won't use it
+ # with HTTPError.
+ fp.read()
+ fp.close()
+
+ return self.parent.open(new, timeout=req.timeout)
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+ inf_msg = "The HTTP server returned a redirect error that would " \
+ "lead to an infinite loop.\n" \
+ "The last 30x error message was:\n"
+
+
+def _parse_proxy(proxy):
+ """Return (scheme, user, password, host/port) given a URL or an authority.
+
+ If a URL is supplied, it must have an authority (host:port) component.
+ According to RFC 3986, having an authority component means the URL must
+ have two slashes after the scheme:
+
+ >>> _parse_proxy('file:/ftp.example.com/')
+ Traceback (most recent call last):
+ ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+ The first three items of the returned tuple may be None.
+
+ Examples of authority parsing:
+
+ >>> _parse_proxy('proxy.example.com')
+ (None, None, None, 'proxy.example.com')
+ >>> _parse_proxy('proxy.example.com:3128')
+ (None, None, None, 'proxy.example.com:3128')
+
+ The authority component may optionally include userinfo (assumed to be
+ username:password):
+
+ >>> _parse_proxy('joe:password@proxy.example.com')
+ (None, 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('joe:password@proxy.example.com:3128')
+ (None, 'joe', 'password', 'proxy.example.com:3128')
+
+ Same examples, but with URLs instead:
+
+ >>> _parse_proxy('http://proxy.example.com/')
+ ('http', None, None, 'proxy.example.com')
+ >>> _parse_proxy('http://proxy.example.com:3128/')
+ ('http', None, None, 'proxy.example.com:3128')
+ >>> _parse_proxy('http://joe:password@proxy.example.com/')
+ ('http', 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+ ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+ Everything after the authority is ignored:
+
+ >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+ ('ftp', 'joe', 'password', 'proxy.example.com')
+
+ Test for no trailing '/' case:
+
+ >>> _parse_proxy('http://joe:password@proxy.example.com')
+ ('http', 'joe', 'password', 'proxy.example.com')
+
+ """
+ scheme, r_scheme = splittype(proxy)
+ if not r_scheme.startswith("/"):
+ # authority
+ scheme = None
+ authority = proxy
+ else:
+ # URL
+ if not r_scheme.startswith("//"):
+ raise ValueError("proxy URL with no authority: %r" % proxy)
+ # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+ # and 3.3.), path is empty or starts with '/'
+ end = r_scheme.find("/", 2)
+ if end == -1:
+ end = None
+ authority = r_scheme[2:end]
+ userinfo, hostport = splituser(authority)
+ if userinfo is not None:
+ user, password = splitpasswd(userinfo)
+ else:
+ user = password = None
+ return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+ # Proxies must be in front
+ handler_order = 100
+
+ def __init__(self, proxies=None):
+ if proxies is None:
+ proxies = getproxies()
+ assert hasattr(proxies, 'keys'), "proxies must be a mapping"
+ self.proxies = proxies
+ for type, url in proxies.items():
+ setattr(self, '%s_open' % type,
+ lambda r, proxy=url, type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+
+ def proxy_open(self, req, proxy, type):
+ orig_type = req.type
+ proxy_type, user, password, hostport = _parse_proxy(proxy)
+ if proxy_type is None:
+ proxy_type = orig_type
+
+ if req.host and proxy_bypass(req.host):
+ return None
+
+ if user and password:
+ user_pass = '%s:%s' % (unquote(user),
+ unquote(password))
+ creds = base64.b64encode(user_pass.encode()).decode("ascii")
+ req.add_header('Proxy-authorization', 'Basic ' + creds)
+ hostport = unquote(hostport)
+ req.set_proxy(hostport, proxy_type)
+ if orig_type == proxy_type or orig_type == 'https':
+ # let other handlers take care of it
+ return None
+ else:
+ # need to start over, because the other handlers don't
+ # grok the proxy's URL type
+ # e.g. if we have a constructor arg proxies like so:
+ # {'http': 'ftp://proxy.example.com'}, we may end up turning
+ # a request for http://acme.example.com/a into one for
+ # ftp://proxy.example.com/a
+ return self.parent.open(req, timeout=req.timeout)
+
+class HTTPPasswordMgr(object):
+
+ def __init__(self):
+ self.passwd = {}
+
+ def add_password(self, realm, uri, user, passwd):
+ # uri could be a single URI or a sequence
+ if isinstance(uri, str):
+ uri = [uri]
+ if realm not in self.passwd:
+ self.passwd[realm] = {}
+ for default_port in True, False:
+ reduced_uri = tuple(
+ [self.reduce_uri(u, default_port) for u in uri])
+ self.passwd[realm][reduced_uri] = (user, passwd)
+
+ def find_user_password(self, realm, authuri):
+ domains = self.passwd.get(realm, {})
+ for default_port in True, False:
+ reduced_authuri = self.reduce_uri(authuri, default_port)
+ for uris, authinfo in domains.items():
+ for uri in uris:
+ if self.is_suburi(uri, reduced_authuri):
+ return authinfo
+ return None, None
+
+ def reduce_uri(self, uri, default_port=True):
+ """Accept authority or URI and extract only the authority and path."""
+ # note HTTP URLs do not have a userinfo component
+ parts = urlsplit(uri)
+ if parts[1]:
+ # URI
+ scheme = parts[0]
+ authority = parts[1]
+ path = parts[2] or '/'
+ else:
+ # host or host:port
+ scheme = None
+ authority = uri
+ path = '/'
+ host, port = splitport(authority)
+ if default_port and port is None and scheme is not None:
+ dport = {"http": 80,
+ "https": 443,
+ }.get(scheme)
+ if dport is not None:
+ authority = "%s:%d" % (host, dport)
+ return authority, path
+
+ def is_suburi(self, base, test):
+ """Check if test is below base in a URI tree
+
+ Both args must be URIs in reduced form.
+ """
+ if base == test:
+ return True
+ if base[0] != test[0]:
+ return False
+ common = posixpath.commonprefix((base[1], test[1]))
+ if len(common) == len(base[1]):
+ return True
+ return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+ def find_user_password(self, realm, authuri):
+ user, password = HTTPPasswordMgr.find_user_password(self, realm,
+ authuri)
+ if user is not None:
+ return user, password
+ return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler(object):
+
+ # XXX this allows for multiple auth-schemes, but will stupidly pick
+ # the last one with a realm specified.
+
+ # allow for double- and single-quoted realm values
+ # (single quotes are a violation of the RFC, but appear in the wild)
+ rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
+ 'realm=(["\']?)([^"\']*)\\2', re.I)
+
+ # XXX could pre-emptively send auth info already accepted (RFC 2617,
+ # end of section 2, and section 1.2 immediately after "credentials"
+ # production).
+
+ def __init__(self, password_mgr=None):
+ if password_mgr is None:
+ password_mgr = HTTPPasswordMgr()
+ self.passwd = password_mgr
+ self.add_password = self.passwd.add_password
+ self.retried = 0
+
+ def reset_retry_count(self):
+ self.retried = 0
+
+ def http_error_auth_reqed(self, authreq, host, req, headers):
+ # host may be an authority (without userinfo) or a URL with an
+ # authority
+ # XXX could be multiple headers
+ authreq = headers.get(authreq, None)
+
+ if self.retried > 5:
+ # retry sending the username:password 5 times before failing.
+ raise HTTPError(req.get_full_url(), 401, "basic auth failed",
+ headers, None)
+ else:
+ self.retried += 1
+
+ if authreq:
+ scheme = authreq.split()[0]
+ if scheme.lower() != 'basic':
+ raise ValueError("AbstractBasicAuthHandler does not"
+ " support the following scheme: '%s'" %
+ scheme)
+ else:
+ mo = AbstractBasicAuthHandler.rx.search(authreq)
+ if mo:
+ scheme, quote, realm = mo.groups()
+ if quote not in ['"',"'"]:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 2)
+ if scheme.lower() == 'basic':
+ response = self.retry_http_basic_auth(host, req, realm)
+ if response and response.code != 401:
+ self.retried = 0
+ return response
+
+ def retry_http_basic_auth(self, host, req, realm):
+ user, pw = self.passwd.find_user_password(realm, host)
+ if pw is not None:
+ raw = "%s:%s" % (user, pw)
+ auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
+ if req.headers.get(self.auth_header, None) == auth:
+ return None
+ req.add_unredirected_header(self.auth_header, auth)
+ return self.parent.open(req, timeout=req.timeout)
+ else:
+ return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Authorization'
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ url = req.full_url
+ response = self.http_error_auth_reqed('www-authenticate',
+ url, req, headers)
+ self.reset_retry_count()
+ return response
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Proxy-authorization'
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ # http_error_auth_reqed requires that there is no userinfo component in
+ # authority. Assume there isn't one, since urllib.request does not (and
+ # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+ # userinfo.
+ authority = req.host
+ response = self.http_error_auth_reqed('proxy-authenticate',
+ authority, req, headers)
+ self.reset_retry_count()
+ return response
+
+
+# Return n random bytes.
+_randombytes = os.urandom
+
+
+class AbstractDigestAuthHandler(object):
+ # Digest authentication is specified in RFC 2617.
+
+ # XXX The client does not inspect the Authentication-Info header
+ # in a successful response.
+
+ # XXX It should be possible to test this implementation against
+ # a mock server that just generates a static set of challenges.
+
+ # XXX qop="auth-int" supports is shaky
+
+ def __init__(self, passwd=None):
+ if passwd is None:
+ passwd = HTTPPasswordMgr()
+ self.passwd = passwd
+ self.add_password = self.passwd.add_password
+ self.retried = 0
+ self.nonce_count = 0
+ self.last_nonce = None
+
+ def reset_retry_count(self):
+ self.retried = 0
+
+ def http_error_auth_reqed(self, auth_header, host, req, headers):
+ authreq = headers.get(auth_header, None)
+ if self.retried > 5:
+ # Don't fail endlessly - if we failed once, we'll probably
+ # fail a second time. Hm. Unless the Password Manager is
+ # prompting for the information. Crap. This isn't great
+ # but it's better than the current 'repeat until recursion
+ # depth exceeded' approach <wink>
+ raise HTTPError(req.full_url, 401, "digest auth failed",
+ headers, None)
+ else:
+ self.retried += 1
+ if authreq:
+ scheme = authreq.split()[0]
+ if scheme.lower() == 'digest':
+ return self.retry_http_digest_auth(req, authreq)
+ elif scheme.lower() != 'basic':
+ raise ValueError("AbstractDigestAuthHandler does not support"
+ " the following scheme: '%s'" % scheme)
+
+ def retry_http_digest_auth(self, req, auth):
+ token, challenge = auth.split(' ', 1)
+ chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
+ auth = self.get_authorization(req, chal)
+ if auth:
+ auth_val = 'Digest %s' % auth
+ if req.headers.get(self.auth_header, None) == auth_val:
+ return None
+ req.add_unredirected_header(self.auth_header, auth_val)
+ resp = self.parent.open(req, timeout=req.timeout)
+ return resp
+
+ def get_cnonce(self, nonce):
+ # The cnonce-value is an opaque
+ # quoted string value provided by the client and used by both client
+ # and server to avoid chosen plaintext attacks, to provide mutual
+ # authentication, and to provide some message integrity protection.
+ # This isn't a fabulous effort, but it's probably Good Enough.
+ s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
+ b = s.encode("ascii") + _randombytes(8)
+ dig = hashlib.sha1(b).hexdigest()
+ return dig[:16]
+
+ def get_authorization(self, req, chal):
+ try:
+ realm = chal['realm']
+ nonce = chal['nonce']
+ qop = chal.get('qop')
+ algorithm = chal.get('algorithm', 'MD5')
+ # mod_digest doesn't send an opaque, even though it isn't
+ # supposed to be optional
+ opaque = chal.get('opaque', None)
+ except KeyError:
+ return None
+
+ H, KD = self.get_algorithm_impls(algorithm)
+ if H is None:
+ return None
+
+ user, pw = self.passwd.find_user_password(realm, req.full_url)
+ if user is None:
+ return None
+
+ # XXX not implemented yet
+ if req.data is not None:
+ entdig = self.get_entity_digest(req.data, chal)
+ else:
+ entdig = None
+
+ A1 = "%s:%s:%s" % (user, realm, pw)
+ A2 = "%s:%s" % (req.get_method(),
+ # XXX selector: what about proxies and full urls
+ req.selector)
+ if qop == 'auth':
+ if nonce == self.last_nonce:
+ self.nonce_count += 1
+ else:
+ self.nonce_count = 1
+ self.last_nonce = nonce
+ ncvalue = '%08x' % self.nonce_count
+ cnonce = self.get_cnonce(nonce)
+ noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+ respdig = KD(H(A1), noncebit)
+ elif qop is None:
+ respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+ else:
+ # XXX handle auth-int.
+ raise URLError("qop '%s' is not supported." % qop)
+
+ # XXX should the partial digests be encoded too?
+
+ base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+ 'response="%s"' % (user, realm, nonce, req.selector,
+ respdig)
+ if opaque:
+ base += ', opaque="%s"' % opaque
+ if entdig:
+ base += ', digest="%s"' % entdig
+ base += ', algorithm="%s"' % algorithm
+ if qop:
+ base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+ return base
+
+ def get_algorithm_impls(self, algorithm):
+ # lambdas assume digest modules are imported at the top level
+ if algorithm == 'MD5':
+ H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
+ elif algorithm == 'SHA':
+ H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
+ # XXX MD5-sess
+ KD = lambda s, d: H("%s:%s" % (s, d))
+ return H, KD
+
+ def get_entity_digest(self, data, chal):
+ # XXX not implemented yet
+ return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+ """An authentication protocol defined by RFC 2069
+
+ Digest authentication improves on basic authentication because it
+ does not transmit passwords in the clear.
+ """
+
+ auth_header = 'Authorization'
+ handler_order = 490 # before Basic auth
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ host = urlparse(req.full_url)[1]
+ retry = self.http_error_auth_reqed('www-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+ auth_header = 'Proxy-Authorization'
+ handler_order = 490 # before Basic auth
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ host = req.host
+ retry = self.http_error_auth_reqed('proxy-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+class AbstractHTTPHandler(BaseHandler):
+
+ def __init__(self, debuglevel=0):
+ self._debuglevel = debuglevel
+
+ def set_http_debuglevel(self, level):
+ self._debuglevel = level
+
+ def do_request_(self, request):
+ host = request.host
+ if not host:
+ raise URLError('no host given')
+
+ if request.data is not None: # POST
+ data = request.data
+ if isinstance(data, str):
+ msg = "POST data should be bytes or an iterable of bytes. " \
+ "It cannot be of type str."
+ raise TypeError(msg)
+ if not request.has_header('Content-type'):
+ request.add_unredirected_header(
+ 'Content-type',
+ 'application/x-www-form-urlencoded')
+ if not request.has_header('Content-length'):
+ size = None
+ try:
+ ### For Python-Future:
+ if PY2 and isinstance(data, array.array):
+ # memoryviews of arrays aren't supported
+ # in Py2.7. (e.g. memoryview(array.array('I',
+ # [1, 2, 3, 4])) raises a TypeError.)
+ # So we calculate the size manually instead:
+ size = len(data) * data.itemsize
+ ###
+ else:
+ mv = memoryview(data)
+ size = len(mv) * mv.itemsize
+ except TypeError:
if isinstance(data, Iterable):
- raise ValueError("Content-Length should be specified "
- "for iterable data of type %r %r" % (type(data),
- data))
- else:
- request.add_unredirected_header(
- 'Content-length', '%d' % size)
-
- sel_host = host
- if request.has_proxy():
- scheme, sel = splittype(request.selector)
- sel_host, sel_path = splithost(sel)
- if not request.has_header('Host'):
- request.add_unredirected_header('Host', sel_host)
- for name, value in self.parent.addheaders:
- name = name.capitalize()
- if not request.has_header(name):
- request.add_unredirected_header(name, value)
-
- return request
-
- def do_open(self, http_class, req, **http_conn_args):
- """Return an HTTPResponse object for the request, using http_class.
-
- http_class must implement the HTTPConnection API from http.client.
- """
- host = req.host
- if not host:
- raise URLError('no host given')
-
- # will parse host:port
- h = http_class(host, timeout=req.timeout, **http_conn_args)
-
- headers = dict(req.unredirected_hdrs)
- headers.update(dict((k, v) for k, v in req.headers.items()
- if k not in headers))
-
- # TODO(jhylton): Should this be redesigned to handle
- # persistent connections?
-
- # We want to make an HTTP/1.1 request, but the addinfourl
- # class isn't prepared to deal with a persistent connection.
- # It will try to read all remaining data from the socket,
- # which will block while the server waits for the next request.
- # So make sure the connection gets closed after the (only)
- # request.
- headers["Connection"] = "close"
- headers = dict((name.title(), val) for name, val in headers.items())
-
- if req._tunnel_host:
- tunnel_headers = {}
- proxy_auth_hdr = "Proxy-Authorization"
- if proxy_auth_hdr in headers:
- tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
- # Proxy-Authorization should not be sent to origin
- # server.
- del headers[proxy_auth_hdr]
- h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
-
- try:
- h.request(req.get_method(), req.selector, req.data, headers)
- except socket.error as err: # timeout error
- h.close()
- raise URLError(err)
- else:
- r = h.getresponse()
- # If the server does not send us a 'Connection: close' header,
- # HTTPConnection assumes the socket should be left open. Manually
- # mark the socket to be closed when this response object goes away.
- if h.sock:
- h.sock.close()
- h.sock = None
-
-
- r.url = req.get_full_url()
- # This line replaces the .msg attribute of the HTTPResponse
- # with .headers, because urllib clients expect the response to
- # have the reason in .msg. It would be good to mark this
- # attribute is deprecated and get then to use info() or
- # .headers.
- r.msg = r.reason
- return r
-
-
-class HTTPHandler(AbstractHTTPHandler):
-
- def http_open(self, req):
- return self.do_open(http_client.HTTPConnection, req)
-
- http_request = AbstractHTTPHandler.do_request_
-
-if hasattr(http_client, 'HTTPSConnection'):
-
- class HTTPSHandler(AbstractHTTPHandler):
-
- def __init__(self, debuglevel=0, context=None, check_hostname=None):
- AbstractHTTPHandler.__init__(self, debuglevel)
- self._context = context
- self._check_hostname = check_hostname
-
- def https_open(self, req):
- return self.do_open(http_client.HTTPSConnection, req,
- context=self._context, check_hostname=self._check_hostname)
-
- https_request = AbstractHTTPHandler.do_request_
-
- __all__.append('HTTPSHandler')
-
-class HTTPCookieProcessor(BaseHandler):
- def __init__(self, cookiejar=None):
- import future.backports.http.cookiejar as http_cookiejar
- if cookiejar is None:
- cookiejar = http_cookiejar.CookieJar()
- self.cookiejar = cookiejar
-
- def http_request(self, request):
- self.cookiejar.add_cookie_header(request)
- return request
-
- def http_response(self, request, response):
- self.cookiejar.extract_cookies(response, request)
- return response
-
- https_request = http_request
- https_response = http_response
-
-class UnknownHandler(BaseHandler):
- def unknown_open(self, req):
- type = req.type
- raise URLError('unknown url type: %s' % type)
-
-def parse_keqv_list(l):
- """Parse list of key=value strings where keys are not duplicated."""
- parsed = {}
- for elt in l:
- k, v = elt.split('=', 1)
- if v[0] == '"' and v[-1] == '"':
- v = v[1:-1]
- parsed[k] = v
- return parsed
-
-def parse_http_list(s):
- """Parse lists as described by RFC 2068 Section 2.
-
- In particular, parse comma-separated lists where the elements of
- the list may include quoted-strings. A quoted-string could
- contain a comma. A non-quoted string could have quotes in the
- middle. Neither commas nor quotes count if they are escaped.
- Only double-quotes count, not single-quotes.
- """
- res = []
- part = ''
-
- escape = quote = False
- for cur in s:
- if escape:
- part += cur
- escape = False
- continue
- if quote:
- if cur == '\\':
- escape = True
- continue
- elif cur == '"':
- quote = False
- part += cur
- continue
-
- if cur == ',':
- res.append(part)
- part = ''
- continue
-
- if cur == '"':
- quote = True
-
- part += cur
-
- # append last part
- if part:
- res.append(part)
-
- return [part.strip() for part in res]
-
-class FileHandler(BaseHandler):
- # Use local file or FTP depending on form of URL
- def file_open(self, req):
- url = req.selector
- if url[:2] == '//' and url[2:3] != '/' and (req.host and
- req.host != 'localhost'):
- if not req.host is self.get_names():
- raise URLError("file:// scheme is supported only on localhost")
- else:
- return self.open_local_file(req)
-
- # names for the localhost
- names = None
- def get_names(self):
- if FileHandler.names is None:
- try:
- FileHandler.names = tuple(
- socket.gethostbyname_ex('localhost')[2] +
- socket.gethostbyname_ex(socket.gethostname())[2])
- except socket.gaierror:
- FileHandler.names = (socket.gethostbyname('localhost'),)
- return FileHandler.names
-
- # not entirely sure what the rules are here
- def open_local_file(self, req):
- import future.backports.email.utils as email_utils
- import mimetypes
- host = req.host
- filename = req.selector
- localfile = url2pathname(filename)
- try:
- stats = os.stat(localfile)
- size = stats.st_size
- modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(filename)[0]
- headers = email.message_from_string(
- 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified))
- if host:
- host, port = splitport(host)
- if not host or \
- (not port and _safe_gethostbyname(host) in self.get_names()):
- if host:
- origurl = 'file://' + host + filename
- else:
- origurl = 'file://' + filename
- return addinfourl(open(localfile, 'rb'), headers, origurl)
- except OSError as exp:
- # users shouldn't expect OSErrors coming from urlopen()
- raise URLError(exp)
- raise URLError('file not on local host')
-
-def _safe_gethostbyname(host):
- try:
- return socket.gethostbyname(host)
- except socket.gaierror:
- return None
-
-class FTPHandler(BaseHandler):
- def ftp_open(self, req):
- import ftplib
- import mimetypes
- host = req.host
- if not host:
- raise URLError('ftp error: no host given')
- host, port = splitport(host)
- if port is None:
- port = ftplib.FTP_PORT
- else:
- port = int(port)
-
- # username/password handling
- user, host = splituser(host)
- if user:
- user, passwd = splitpasswd(user)
- else:
- passwd = None
- host = unquote(host)
- user = user or ''
- passwd = passwd or ''
-
- try:
- host = socket.gethostbyname(host)
- except socket.error as msg:
- raise URLError(msg)
- path, attrs = splitattr(req.selector)
- dirs = path.split('/')
- dirs = list(map(unquote, dirs))
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]:
- dirs = dirs[1:]
- try:
- fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
- type = file and 'I' or 'D'
- for attr in attrs:
- attr, value = splitvalue(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
- fp, retrlen = fw.retrfile(file, type)
- headers = ""
- mtype = mimetypes.guess_type(req.full_url)[0]
- if mtype:
- headers += "Content-type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-length: %d\n" % retrlen
- headers = email.message_from_string(headers)
- return addinfourl(fp, headers, req.full_url)
- except ftplib.all_errors as exp:
- exc = URLError('ftp error: %r' % exp)
- raise_with_traceback(exc)
-
- def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- return ftpwrapper(user, passwd, host, port, dirs, timeout,
- persistent=False)
-
-class CacheFTPHandler(FTPHandler):
- # XXX would be nice to have pluggable cache strategies
- # XXX this stuff is definitely not thread safe
- def __init__(self):
- self.cache = {}
- self.timeout = {}
- self.soonest = 0
- self.delay = 60
- self.max_conns = 16
-
- def setTimeout(self, t):
- self.delay = t
-
- def setMaxConns(self, m):
- self.max_conns = m
-
- def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- key = user, host, port, '/'.join(dirs), timeout
- if key in self.cache:
- self.timeout[key] = time.time() + self.delay
- else:
- self.cache[key] = ftpwrapper(user, passwd, host, port,
- dirs, timeout)
- self.timeout[key] = time.time() + self.delay
- self.check_cache()
- return self.cache[key]
-
- def check_cache(self):
- # first check for old ones
- t = time.time()
- if self.soonest <= t:
- for k, v in list(self.timeout.items()):
- if v < t:
- self.cache[k].close()
- del self.cache[k]
- del self.timeout[k]
- self.soonest = min(list(self.timeout.values()))
-
- # then check the size
- if len(self.cache) == self.max_conns:
- for k, v in list(self.timeout.items()):
- if v == self.soonest:
- del self.cache[k]
- del self.timeout[k]
- break
- self.soonest = min(list(self.timeout.values()))
-
- def clear_cache(self):
- for conn in self.cache.values():
- conn.close()
- self.cache.clear()
- self.timeout.clear()
-
-
-# Code move from the old urllib module
-
-MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
-
-# Helper for non-unix systems
-if os.name == 'nt':
- from nturl2path import url2pathname, pathname2url
-else:
- def url2pathname(pathname):
- """OS-specific conversion from a relative URL of the 'file' scheme
- to a file system path; not recommended for general use."""
- return unquote(pathname)
-
- def pathname2url(pathname):
- """OS-specific conversion from a file system path to a relative URL
- of the 'file' scheme; not recommended for general use."""
- return quote(pathname)
-
-# This really consists of two pieces:
-# (1) a class which handles opening of all sorts of URLs
-# (plus assorted utilities etc.)
-# (2) a set of functions for parsing URLs
-# XXX Should these be separated out into different modules?
-
-
-ftpcache = {}
-class URLopener(object):
- """Class to open URLs.
- This is a class rather than just a subroutine because we may need
- more than one set of global protocol-specific options.
- Note -- this is a base class for those who don't want the
- automatic handling of errors type 302 (relocated) and 401
- (authorization needed)."""
-
- __tempfiles = None
-
- version = "Python-urllib/%s" % __version__
-
- # Constructor
- def __init__(self, proxies=None, **x509):
- msg = "%(class)s style of invoking requests is deprecated. " \
- "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
- warnings.warn(msg, DeprecationWarning, stacklevel=3)
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'keys'), "proxies must be a mapping"
- self.proxies = proxies
- self.key_file = x509.get('key_file')
- self.cert_file = x509.get('cert_file')
- self.addheaders = [('User-Agent', self.version)]
- self.__tempfiles = []
- self.__unlink = os.unlink # See cleanup()
- self.tempcache = None
- # Undocumented feature: if you assign {} to tempcache,
- # it is used to cache files retrieved with
- # self.retrieve(). This is not enabled by default
- # since it does not work for changing documents (and I
- # haven't got the logic to check expiration headers
- # yet).
- self.ftpcache = ftpcache
- # Undocumented feature: you can use a different
- # ftp cache by assigning to the .ftpcache member;
- # in case you want logically independent URL openers
- # XXX This is not threadsafe. Bah.
-
- def __del__(self):
- self.close()
-
- def close(self):
- self.cleanup()
-
- def cleanup(self):
- # This code sometimes runs when the rest of this module
- # has already been deleted, so it can't use any globals
- # or import anything.
- if self.__tempfiles:
- for file in self.__tempfiles:
- try:
- self.__unlink(file)
- except OSError:
- pass
- del self.__tempfiles[:]
- if self.tempcache:
- self.tempcache.clear()
-
- def addheader(self, *args):
- """Add a header to be used by the HTTP interface only
- e.g. u.addheader('Accept', 'sound/basic')"""
- self.addheaders.append(args)
-
- # External interface
- def open(self, fullurl, data=None):
- """Use URLopener().open(file) instead of open(file, 'r')."""
- fullurl = unwrap(to_bytes(fullurl))
- fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
- if self.tempcache and fullurl in self.tempcache:
- filename, headers = self.tempcache[fullurl]
- fp = open(filename, 'rb')
- return addinfourl(fp, headers, fullurl)
- urltype, url = splittype(fullurl)
- if not urltype:
- urltype = 'file'
- if urltype in self.proxies:
- proxy = self.proxies[urltype]
- urltype, proxyhost = splittype(proxy)
- host, selector = splithost(proxyhost)
- url = (host, fullurl) # Signal special case to open_*()
- else:
- proxy = None
- name = 'open_' + urltype
- self.type = urltype
- name = name.replace('-', '_')
- if not hasattr(self, name):
- if proxy:
- return self.open_unknown_proxy(proxy, fullurl, data)
- else:
- return self.open_unknown(fullurl, data)
- try:
- if data is None:
- return getattr(self, name)(url)
- else:
- return getattr(self, name)(url, data)
- except HTTPError:
- raise
- except socket.error as msg:
- raise_with_traceback(IOError('socket error', msg))
-
- def open_unknown(self, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = splittype(fullurl)
- raise IOError('url error', 'unknown url type', type)
-
- def open_unknown_proxy(self, proxy, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = splittype(fullurl)
- raise IOError('url error', 'invalid proxy for %s' % type, proxy)
-
- # External interface
- def retrieve(self, url, filename=None, reporthook=None, data=None):
- """retrieve(url) returns (filename, headers) for a local object
- or (tempfilename, headers) for a remote object."""
- url = unwrap(to_bytes(url))
- if self.tempcache and url in self.tempcache:
- return self.tempcache[url]
- type, url1 = splittype(url)
- if filename is None and (not type or type == 'file'):
- try:
- fp = self.open_local_file(url1)
- hdrs = fp.info()
- fp.close()
- return url2pathname(splithost(url1)[1]), hdrs
- except IOError as msg:
- pass
- fp = self.open(url, data)
- try:
- headers = fp.info()
- if filename:
- tfp = open(filename, 'wb')
- else:
- import tempfile
- garbage, path = splittype(url)
- garbage, path = splithost(path or "")
- path, garbage = splitquery(path or "")
- path, garbage = splitattr(path or "")
- suffix = os.path.splitext(path)[1]
- (fd, filename) = tempfile.mkstemp(suffix)
- self.__tempfiles.append(filename)
- tfp = os.fdopen(fd, 'wb')
- try:
- result = filename, headers
- if self.tempcache is not None:
- self.tempcache[url] = result
- bs = 1024*8
- size = -1
- read = 0
- blocknum = 0
- if "content-length" in headers:
- size = int(headers["Content-Length"])
- if reporthook:
- reporthook(blocknum, bs, size)
- while 1:
- block = fp.read(bs)
- if not block:
- break
- read += len(block)
- tfp.write(block)
- blocknum += 1
- if reporthook:
- reporthook(blocknum, bs, size)
- finally:
- tfp.close()
- finally:
- fp.close()
-
- # raise exception if actual size does not match content-length header
- if size >= 0 and read < size:
- raise ContentTooShortError(
- "retrieval incomplete: got only %i out of %i bytes"
- % (read, size), result)
-
- return result
-
- # Each method named open_<type> knows how to open that type of URL
-
- def _open_generic_http(self, connection_factory, url, data):
- """Make an HTTP connection using connection_class.
-
- This is an internal method that should be called from
- open_http() or open_https().
-
- Arguments:
- - connection_factory should take a host name and return an
- HTTPConnection instance.
- - url is the url to retrieval or a host, relative-path pair.
- - data is payload for a POST request or None.
- """
-
- user_passwd = None
- proxy_passwd= None
- if isinstance(url, str):
- host, selector = splithost(url)
- if host:
- user_passwd, host = splituser(host)
- host = unquote(host)
- realhost = host
- else:
- host, selector = url
- # check whether the proxy contains authorization information
- proxy_passwd, host = splituser(host)
- # now we proceed with the url we want to obtain
- urltype, rest = splittype(selector)
- url = rest
- user_passwd = None
- if urltype.lower() != 'http':
- realhost = None
- else:
- realhost, rest = splithost(rest)
- if realhost:
- user_passwd, realhost = splituser(realhost)
- if user_passwd:
- selector = "%s://%s%s" % (urltype, realhost, rest)
- if proxy_bypass(realhost):
- host = realhost
-
- if not host: raise IOError('http error', 'no host given')
-
- if proxy_passwd:
- proxy_passwd = unquote(proxy_passwd)
- proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
- else:
- proxy_auth = None
-
- if user_passwd:
- user_passwd = unquote(user_passwd)
- auth = base64.b64encode(user_passwd.encode()).decode('ascii')
- else:
- auth = None
- http_conn = connection_factory(host)
- headers = {}
- if proxy_auth:
- headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
- if auth:
- headers["Authorization"] = "Basic %s" % auth
- if realhost:
- headers["Host"] = realhost
-
- # Add Connection:close as we don't support persistent connections yet.
- # This helps in closing the socket and avoiding ResourceWarning
-
- headers["Connection"] = "close"
-
- for header, value in self.addheaders:
- headers[header] = value
-
- if data is not None:
- headers["Content-Type"] = "application/x-www-form-urlencoded"
- http_conn.request("POST", selector, data, headers)
- else:
- http_conn.request("GET", selector, headers=headers)
-
- try:
- response = http_conn.getresponse()
- except http_client.BadStatusLine:
- # something went wrong with the HTTP status line
- raise URLError("http protocol error: bad status line")
-
- # According to RFC 2616, "2xx" code indicates that the client's
- # request was successfully received, understood, and accepted.
- if 200 <= response.status < 300:
- return addinfourl(response, response.msg, "http:" + url,
- response.status)
- else:
- return self.http_error(
- url, response.fp,
- response.status, response.reason, response.msg, data)
-
- def open_http(self, url, data=None):
- """Use HTTP protocol."""
- return self._open_generic_http(http_client.HTTPConnection, url, data)
-
- def http_error(self, url, fp, errcode, errmsg, headers, data=None):
- """Handle http errors.
-
- Derived class can override this, or provide specific handlers
- named http_error_DDD where DDD is the 3-digit error code."""
- # First check if there's a specific handler for this error
- name = 'http_error_%d' % errcode
- if hasattr(self, name):
- method = getattr(self, name)
- if data is None:
- result = method(url, fp, errcode, errmsg, headers)
- else:
- result = method(url, fp, errcode, errmsg, headers, data)
- if result: return result
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handler: close the connection and raise IOError."""
- fp.close()
- raise HTTPError(url, errcode, errmsg, headers, None)
-
- if _have_ssl:
- def _https_connection(self, host):
- return http_client.HTTPSConnection(host,
- key_file=self.key_file,
- cert_file=self.cert_file)
-
- def open_https(self, url, data=None):
- """Use HTTPS protocol."""
- return self._open_generic_http(self._https_connection, url, data)
-
- def open_file(self, url):
- """Use local file or FTP depending on form of URL."""
- if not isinstance(url, str):
- raise URLError('file error: proxy support for file protocol currently not implemented')
- if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- raise ValueError("file:// scheme is supported only on localhost")
- else:
- return self.open_local_file(url)
-
- def open_local_file(self, url):
- """Use local file."""
- import future.backports.email.utils as email_utils
- import mimetypes
- host, file = splithost(url)
- localname = url2pathname(file)
- try:
- stats = os.stat(localname)
- except OSError as e:
- raise URLError(e.strerror, e.filename)
- size = stats.st_size
- modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(url)[0]
- headers = email.message_from_string(
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified))
- if not host:
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- return addinfourl(open(localname, 'rb'), headers, urlfile)
- host, port = splitport(host)
- if (not port
- and socket.gethostbyname(host) in ((localhost(),) + thishost())):
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- elif file[:2] == './':
- raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
- return addinfourl(open(localname, 'rb'), headers, urlfile)
- raise URLError('local file error: not on local host')
-
- def open_ftp(self, url):
- """Use FTP protocol."""
- if not isinstance(url, str):
- raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
- import mimetypes
- host, path = splithost(url)
- if not host: raise URLError('ftp error: no host given')
- host, port = splitport(host)
- user, host = splituser(host)
- if user: user, passwd = splitpasswd(user)
- else: passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
- host = socket.gethostbyname(host)
- if not port:
- import ftplib
- port = ftplib.FTP_PORT
- else:
- port = int(port)
- path, attrs = splitattr(path)
- path = unquote(path)
- dirs = path.split('/')
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]: dirs = dirs[1:]
- if dirs and not dirs[0]: dirs[0] = '/'
- key = user, host, port, '/'.join(dirs)
- # XXX thread unsafe!
- if len(self.ftpcache) > MAXFTPCACHE:
- # Prune the cache, rather arbitrarily
- for k in self.ftpcache.keys():
- if k != key:
- v = self.ftpcache[k]
- del self.ftpcache[k]
- v.close()
- try:
- if key not in self.ftpcache:
- self.ftpcache[key] = \
- ftpwrapper(user, passwd, host, port, dirs)
- if not file: type = 'D'
- else: type = 'I'
- for attr in attrs:
- attr, value = splitvalue(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
- (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
- mtype = mimetypes.guess_type("ftp:" + url)[0]
- headers = ""
- if mtype:
- headers += "Content-Type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-Length: %d\n" % retrlen
- headers = email.message_from_string(headers)
- return addinfourl(fp, headers, "ftp:" + url)
- except ftperrors() as exp:
- raise_with_traceback(URLError('ftp error %r' % exp))
-
- def open_data(self, url, data=None):
- """Use "data" URL."""
- if not isinstance(url, str):
- raise URLError('data error: proxy support for data protocol currently not implemented')
- # ignore POSTed data
- #
- # syntax of data URLs:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- try:
- [type, data] = url.split(',', 1)
- except ValueError:
- raise IOError('data error', 'bad data URL')
- if not type:
- type = 'text/plain;charset=US-ASCII'
- semi = type.rfind(';')
- if semi >= 0 and '=' not in type[semi:]:
- encoding = type[semi+1:]
- type = type[:semi]
- else:
- encoding = ''
- msg = []
- msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
- time.gmtime(time.time())))
- msg.append('Content-type: %s' % type)
- if encoding == 'base64':
- # XXX is this encoding/decoding ok?
- data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
- else:
- data = unquote(data)
- msg.append('Content-Length: %d' % len(data))
- msg.append('')
- msg.append(data)
- msg = '\n'.join(msg)
- headers = email.message_from_string(msg)
- f = io.StringIO(msg)
- #f.fileno = None # needed for addinfourl
- return addinfourl(f, headers, url)
-
-
-class FancyURLopener(URLopener):
- """Derived class with handlers for errors we can handle (perhaps)."""
-
- def __init__(self, *args, **kwargs):
- URLopener.__init__(self, *args, **kwargs)
- self.auth_cache = {}
- self.tries = 0
- self.maxtries = 10
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handling -- don't raise an exception."""
- return addinfourl(fp, headers, "http:" + url, errcode)
-
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 302 -- relocated (temporarily)."""
- self.tries += 1
- if self.maxtries and self.tries >= self.maxtries:
- if hasattr(self, "http_error_500"):
- meth = self.http_error_500
- else:
- meth = self.http_error_default
- self.tries = 0
- return meth(url, fp, 500,
- "Internal Server Error: Redirect Recursion", headers)
- result = self.redirect_internal(url, fp, errcode, errmsg, headers,
- data)
- self.tries = 0
- return result
-
- def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
- if 'location' in headers:
- newurl = headers['location']
- elif 'uri' in headers:
- newurl = headers['uri']
- else:
- return
- fp.close()
-
- # In case the server sent a relative URL, join with original:
- newurl = urljoin(self.type + ":" + url, newurl)
-
- urlparts = urlparse(newurl)
-
- # For security reasons, we don't allow redirection to anything other
- # than http, https and ftp.
-
- # We are using newer HTTPError with older redirect_internal method
- # This older method will get deprecated in 3.3
-
- if urlparts.scheme not in ('http', 'https', 'ftp', ''):
- raise HTTPError(newurl, errcode,
- errmsg +
- " Redirection to url '%s' is not allowed." % newurl,
- headers, fp)
-
- return self.open(newurl)
-
- def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 301 -- also relocated (permanently)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 303 -- also relocated (essentially identical to 302)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 307 -- relocated, but turn POST into error."""
- if data is None:
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
- else:
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
- retry=False):
- """Error 401 -- authentication required.
- This function supports Basic authentication only."""
- if 'www-authenticate' not in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['www-authenticate']
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- if not retry:
- URLopener.http_error_default(self, url, fp, errcode, errmsg,
- headers)
- name = 'retry_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
- retry=False):
- """Error 407 -- proxy authentication required.
- This function supports Basic authentication only."""
- if 'proxy-authenticate' not in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['proxy-authenticate']
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- if not retry:
- URLopener.http_error_default(self, url, fp, errcode, errmsg,
- headers)
- name = 'retry_proxy_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def retry_proxy_http_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- newurl = 'http://' + host + selector
- proxy = self.proxies['http']
- urltype, proxyhost = splittype(proxy)
- proxyhost, proxyselector = splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), proxyhost)
- self.proxies['http'] = 'http://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_proxy_https_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- newurl = 'https://' + host + selector
- proxy = self.proxies['https']
- urltype, proxyhost = splittype(proxy)
- proxyhost, proxyselector = splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), proxyhost)
- self.proxies['https'] = 'https://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_http_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), host)
- newurl = 'http://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_https_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), host)
- newurl = 'https://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def get_user_passwd(self, host, realm, clear_cache=0):
- key = realm + '@' + host.lower()
- if key in self.auth_cache:
- if clear_cache:
- del self.auth_cache[key]
- else:
- return self.auth_cache[key]
- user, passwd = self.prompt_user_passwd(host, realm)
- if user or passwd: self.auth_cache[key] = (user, passwd)
- return user, passwd
-
- def prompt_user_passwd(self, host, realm):
- """Override this in a GUI environment!"""
- import getpass
- try:
- user = input("Enter username for %s at %s: " % (realm, host))
- passwd = getpass.getpass("Enter password for %s in %s at %s: " %
- (user, realm, host))
- return user, passwd
- except KeyboardInterrupt:
- print()
- return None, None
-
-
-# Utility functions
-
-_localhost = None
-def localhost():
- """Return the IP address of the magic hostname 'localhost'."""
- global _localhost
- if _localhost is None:
- _localhost = socket.gethostbyname('localhost')
- return _localhost
-
-_thishost = None
-def thishost():
- """Return the IP addresses of the current host."""
- global _thishost
- if _thishost is None:
- try:
- _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
- except socket.gaierror:
- _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
- return _thishost
-
-_ftperrors = None
-def ftperrors():
- """Return the set of errors raised by the FTP class."""
- global _ftperrors
- if _ftperrors is None:
- import ftplib
- _ftperrors = ftplib.all_errors
- return _ftperrors
-
-_noheaders = None
-def noheaders():
- """Return an empty email Message object."""
- global _noheaders
- if _noheaders is None:
- _noheaders = email.message_from_string("")
- return _noheaders
-
-
-# Utility classes
-
-class ftpwrapper(object):
- """Class used by open_ftp() for cache of open FTP connections."""
-
- def __init__(self, user, passwd, host, port, dirs, timeout=None,
- persistent=True):
- self.user = user
- self.passwd = passwd
- self.host = host
- self.port = port
- self.dirs = dirs
- self.timeout = timeout
- self.refcount = 0
- self.keepalive = persistent
- self.init()
-
- def init(self):
- import ftplib
- self.busy = 0
- self.ftp = ftplib.FTP()
- self.ftp.connect(self.host, self.port, self.timeout)
- self.ftp.login(self.user, self.passwd)
- _target = '/'.join(self.dirs)
- self.ftp.cwd(_target)
-
- def retrfile(self, file, type):
- import ftplib
- self.endtransfer()
- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
- else: cmd = 'TYPE ' + type; isdir = 0
- try:
- self.ftp.voidcmd(cmd)
- except ftplib.all_errors:
- self.init()
- self.ftp.voidcmd(cmd)
- conn = None
- if file and not isdir:
- # Try to retrieve as a file
- try:
- cmd = 'RETR ' + file
- conn, retrlen = self.ftp.ntransfercmd(cmd)
- except ftplib.error_perm as reason:
- if str(reason)[:3] != '550':
- raise_with_traceback(URLError('ftp error: %r' % reason))
- if not conn:
- # Set transfer mode to ASCII!
- self.ftp.voidcmd('TYPE A')
- # Try a directory listing. Verify that directory exists.
- if file:
- pwd = self.ftp.pwd()
- try:
- try:
- self.ftp.cwd(file)
- except ftplib.error_perm as reason:
- ### Was:
- # raise URLError('ftp error: %r' % reason) from reason
- exc = URLError('ftp error: %r' % reason)
- exc.__cause__ = reason
- raise exc
- finally:
- self.ftp.cwd(pwd)
- cmd = 'LIST ' + file
- else:
- cmd = 'LIST'
- conn, retrlen = self.ftp.ntransfercmd(cmd)
- self.busy = 1
-
- ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
- self.refcount += 1
- conn.close()
- # Pass back both a suitably decorated object and a retrieval length
- return (ftpobj, retrlen)
-
- def endtransfer(self):
- self.busy = 0
-
- def close(self):
- self.keepalive = False
- if self.refcount <= 0:
- self.real_close()
-
- def file_close(self):
- self.endtransfer()
- self.refcount -= 1
- if self.refcount <= 0 and not self.keepalive:
- self.real_close()
-
- def real_close(self):
- self.endtransfer()
- try:
- self.ftp.close()
- except ftperrors():
- pass
-
-# Proxy handling
-def getproxies_environment():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Scan the environment for variables named <scheme>_proxy;
- this seems to be the standard convention. If you need a
- different way, you can pass a proxies dictionary to the
- [Fancy]URLopener constructor.
-
- """
- proxies = {}
- for name, value in os.environ.items():
- name = name.lower()
- if value and name[-6:] == '_proxy':
- proxies[name[:-6]] = value
- return proxies
-
-def proxy_bypass_environment(host):
- """Test if proxies should not be used for a particular host.
-
- Checks the environment for a variable named no_proxy, which should
- be a list of DNS suffixes separated by commas, or '*' for all hosts.
- """
- no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
- # '*' is special case for always bypass
- if no_proxy == '*':
- return 1
- # strip port off host
- hostonly, port = splitport(host)
- # check if the host ends with any of the DNS suffixes
- no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
- for name in no_proxy_list:
- if name and (hostonly.endswith(name) or host.endswith(name)):
- return 1
- # otherwise, don't bypass
- return 0
-
-
-# This code tests an OSX specific data structure but is testable on all
-# platforms
-def _proxy_bypass_macosx_sysconf(host, proxy_settings):
- """
- Return True iff this host shouldn't be accessed using a proxy
-
- This function uses the MacOSX framework SystemConfiguration
- to fetch the proxy information.
-
- proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
- { 'exclude_simple': bool,
- 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
- }
- """
- from fnmatch import fnmatch
-
- hostonly, port = splitport(host)
-
- def ip2num(ipAddr):
- parts = ipAddr.split('.')
- parts = list(map(int, parts))
- if len(parts) != 4:
- parts = (parts + [0, 0, 0, 0])[:4]
- return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
-
- # Check for simple host names:
- if '.' not in host:
- if proxy_settings['exclude_simple']:
- return True
-
- hostIP = None
-
- for value in proxy_settings.get('exceptions', ()):
- # Items in the list are strings like these: *.local, 169.254/16
- if not value: continue
-
- m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
- if m is not None:
- if hostIP is None:
- try:
- hostIP = socket.gethostbyname(hostonly)
- hostIP = ip2num(hostIP)
- except socket.error:
- continue
-
- base = ip2num(m.group(1))
- mask = m.group(2)
- if mask is None:
- mask = 8 * (m.group(1).count('.') + 1)
- else:
- mask = int(mask[1:])
- mask = 32 - mask
-
- if (hostIP >> mask) == (base >> mask):
- return True
-
- elif fnmatch(host, value):
- return True
-
- return False
-
-
-if sys.platform == 'darwin':
- from _scproxy import _get_proxy_settings, _get_proxies
-
- def proxy_bypass_macosx_sysconf(host):
- proxy_settings = _get_proxy_settings()
- return _proxy_bypass_macosx_sysconf(host, proxy_settings)
-
- def getproxies_macosx_sysconf():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- This function uses the MacOSX framework SystemConfiguration
- to fetch the proxy information.
- """
- return _get_proxies()
-
-
-
- def proxy_bypass(host):
- if getproxies_environment():
- return proxy_bypass_environment(host)
- else:
- return proxy_bypass_macosx_sysconf(host)
-
- def getproxies():
- return getproxies_environment() or getproxies_macosx_sysconf()
-
-
-elif os.name == 'nt':
- def getproxies_registry():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Win32 uses the registry to store proxies.
-
- """
- proxies = {}
- try:
- import winreg
- except ImportError:
- # Std module, so should be around - but you never know!
- return proxies
- try:
- internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
- r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
- proxyEnable = winreg.QueryValueEx(internetSettings,
- 'ProxyEnable')[0]
- if proxyEnable:
- # Returned as Unicode but problems if not converted to ASCII
- proxyServer = str(winreg.QueryValueEx(internetSettings,
- 'ProxyServer')[0])
- if '=' in proxyServer:
- # Per-protocol settings
- for p in proxyServer.split(';'):
- protocol, address = p.split('=', 1)
- # See if address has a type:// prefix
- if not re.match('^([^/:]+)://', address):
- address = '%s://%s' % (protocol, address)
- proxies[protocol] = address
- else:
- # Use one setting for all protocols
- if proxyServer[:5] == 'http:':
- proxies['http'] = proxyServer
- else:
- proxies['http'] = 'http://%s' % proxyServer
- proxies['https'] = 'https://%s' % proxyServer
- proxies['ftp'] = 'ftp://%s' % proxyServer
- internetSettings.Close()
- except (WindowsError, ValueError, TypeError):
- # Either registry key not found etc, or the value in an
- # unexpected format.
- # proxies already set up to be empty so nothing to do
- pass
- return proxies
-
- def getproxies():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Returns settings gathered from the environment, if specified,
- or the registry.
-
- """
- return getproxies_environment() or getproxies_registry()
-
- def proxy_bypass_registry(host):
- try:
- import winreg
- except ImportError:
- # Std modules, so should be around - but you never know!
- return 0
- try:
- internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
- r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
- proxyEnable = winreg.QueryValueEx(internetSettings,
- 'ProxyEnable')[0]
- proxyOverride = str(winreg.QueryValueEx(internetSettings,
- 'ProxyOverride')[0])
- # ^^^^ Returned as Unicode but problems if not converted to ASCII
- except WindowsError:
- return 0
- if not proxyEnable or not proxyOverride:
- return 0
- # try to make a host list from name and IP address.
- rawHost, port = splitport(host)
- host = [rawHost]
- try:
- addr = socket.gethostbyname(rawHost)
- if addr != rawHost:
- host.append(addr)
- except socket.error:
- pass
- try:
- fqdn = socket.getfqdn(rawHost)
- if fqdn != rawHost:
- host.append(fqdn)
- except socket.error:
- pass
- # make a check value list from the registry entry: replace the
- # '<local>' string by the localhost entry and the corresponding
- # canonical entry.
- proxyOverride = proxyOverride.split(';')
- # now check if we match one of the registry values.
- for test in proxyOverride:
- if test == '<local>':
- if '.' not in rawHost:
- return 1
- test = test.replace(".", r"\.") # mask dots
- test = test.replace("*", r".*") # change glob sequence
- test = test.replace("?", r".") # change glob char
- for val in host:
- if re.match(test, val, re.I):
- return 1
- return 0
-
- def proxy_bypass(host):
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Returns settings gathered from the environment, if specified,
- or the registry.
-
- """
- if getproxies_environment():
- return proxy_bypass_environment(host)
- else:
- return proxy_bypass_registry(host)
-
-else:
- # By default use environment variables
- getproxies = getproxies_environment
- proxy_bypass = proxy_bypass_environment
+ raise ValueError("Content-Length should be specified "
+ "for iterable data of type %r %r" % (type(data),
+ data))
+ else:
+ request.add_unredirected_header(
+ 'Content-length', '%d' % size)
+
+ sel_host = host
+ if request.has_proxy():
+ scheme, sel = splittype(request.selector)
+ sel_host, sel_path = splithost(sel)
+ if not request.has_header('Host'):
+ request.add_unredirected_header('Host', sel_host)
+ for name, value in self.parent.addheaders:
+ name = name.capitalize()
+ if not request.has_header(name):
+ request.add_unredirected_header(name, value)
+
+ return request
+
+ def do_open(self, http_class, req, **http_conn_args):
+ """Return an HTTPResponse object for the request, using http_class.
+
+ http_class must implement the HTTPConnection API from http.client.
+ """
+ host = req.host
+ if not host:
+ raise URLError('no host given')
+
+ # will parse host:port
+ h = http_class(host, timeout=req.timeout, **http_conn_args)
+
+ headers = dict(req.unredirected_hdrs)
+ headers.update(dict((k, v) for k, v in req.headers.items()
+ if k not in headers))
+
+ # TODO(jhylton): Should this be redesigned to handle
+ # persistent connections?
+
+ # We want to make an HTTP/1.1 request, but the addinfourl
+ # class isn't prepared to deal with a persistent connection.
+ # It will try to read all remaining data from the socket,
+ # which will block while the server waits for the next request.
+ # So make sure the connection gets closed after the (only)
+ # request.
+ headers["Connection"] = "close"
+ headers = dict((name.title(), val) for name, val in headers.items())
+
+ if req._tunnel_host:
+ tunnel_headers = {}
+ proxy_auth_hdr = "Proxy-Authorization"
+ if proxy_auth_hdr in headers:
+ tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
+ # Proxy-Authorization should not be sent to origin
+ # server.
+ del headers[proxy_auth_hdr]
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
+
+ try:
+ h.request(req.get_method(), req.selector, req.data, headers)
+ except socket.error as err: # timeout error
+ h.close()
+ raise URLError(err)
+ else:
+ r = h.getresponse()
+ # If the server does not send us a 'Connection: close' header,
+ # HTTPConnection assumes the socket should be left open. Manually
+ # mark the socket to be closed when this response object goes away.
+ if h.sock:
+ h.sock.close()
+ h.sock = None
+
+
+ r.url = req.get_full_url()
+ # This line replaces the .msg attribute of the HTTPResponse
+ # with .headers, because urllib clients expect the response to
+ # have the reason in .msg. It would be good to mark this
+ # attribute is deprecated and get then to use info() or
+ # .headers.
+ r.msg = r.reason
+ return r
+
+
+class HTTPHandler(AbstractHTTPHandler):
+
+ def http_open(self, req):
+ return self.do_open(http_client.HTTPConnection, req)
+
+ http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(http_client, 'HTTPSConnection'):
+
+ class HTTPSHandler(AbstractHTTPHandler):
+
+ def __init__(self, debuglevel=0, context=None, check_hostname=None):
+ AbstractHTTPHandler.__init__(self, debuglevel)
+ self._context = context
+ self._check_hostname = check_hostname
+
+ def https_open(self, req):
+ return self.do_open(http_client.HTTPSConnection, req,
+ context=self._context, check_hostname=self._check_hostname)
+
+ https_request = AbstractHTTPHandler.do_request_
+
+ __all__.append('HTTPSHandler')
+
+class HTTPCookieProcessor(BaseHandler):
+ def __init__(self, cookiejar=None):
+ import future.backports.http.cookiejar as http_cookiejar
+ if cookiejar is None:
+ cookiejar = http_cookiejar.CookieJar()
+ self.cookiejar = cookiejar
+
+ def http_request(self, request):
+ self.cookiejar.add_cookie_header(request)
+ return request
+
+ def http_response(self, request, response):
+ self.cookiejar.extract_cookies(response, request)
+ return response
+
+ https_request = http_request
+ https_response = http_response
+
+class UnknownHandler(BaseHandler):
+ def unknown_open(self, req):
+ type = req.type
+ raise URLError('unknown url type: %s' % type)
+
+def parse_keqv_list(l):
+ """Parse list of key=value strings where keys are not duplicated."""
+ parsed = {}
+ for elt in l:
+ k, v = elt.split('=', 1)
+ if v[0] == '"' and v[-1] == '"':
+ v = v[1:-1]
+ parsed[k] = v
+ return parsed
+
+def parse_http_list(s):
+ """Parse lists as described by RFC 2068 Section 2.
+
+ In particular, parse comma-separated lists where the elements of
+ the list may include quoted-strings. A quoted-string could
+ contain a comma. A non-quoted string could have quotes in the
+ middle. Neither commas nor quotes count if they are escaped.
+ Only double-quotes count, not single-quotes.
+ """
+ res = []
+ part = ''
+
+ escape = quote = False
+ for cur in s:
+ if escape:
+ part += cur
+ escape = False
+ continue
+ if quote:
+ if cur == '\\':
+ escape = True
+ continue
+ elif cur == '"':
+ quote = False
+ part += cur
+ continue
+
+ if cur == ',':
+ res.append(part)
+ part = ''
+ continue
+
+ if cur == '"':
+ quote = True
+
+ part += cur
+
+ # append last part
+ if part:
+ res.append(part)
+
+ return [part.strip() for part in res]
+
+class FileHandler(BaseHandler):
+ # Use local file or FTP depending on form of URL
+ def file_open(self, req):
+ url = req.selector
+ if url[:2] == '//' and url[2:3] != '/' and (req.host and
+ req.host != 'localhost'):
+ if not req.host is self.get_names():
+ raise URLError("file:// scheme is supported only on localhost")
+ else:
+ return self.open_local_file(req)
+
+ # names for the localhost
+ names = None
+ def get_names(self):
+ if FileHandler.names is None:
+ try:
+ FileHandler.names = tuple(
+ socket.gethostbyname_ex('localhost')[2] +
+ socket.gethostbyname_ex(socket.gethostname())[2])
+ except socket.gaierror:
+ FileHandler.names = (socket.gethostbyname('localhost'),)
+ return FileHandler.names
+
+ # not entirely sure what the rules are here
+ def open_local_file(self, req):
+ import future.backports.email.utils as email_utils
+ import mimetypes
+ host = req.host
+ filename = req.selector
+ localfile = url2pathname(filename)
+ try:
+ stats = os.stat(localfile)
+ size = stats.st_size
+ modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
+ mtype = mimetypes.guess_type(filename)[0]
+ headers = email.message_from_string(
+ 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified))
+ if host:
+ host, port = splitport(host)
+ if not host or \
+ (not port and _safe_gethostbyname(host) in self.get_names()):
+ if host:
+ origurl = 'file://' + host + filename
+ else:
+ origurl = 'file://' + filename
+ return addinfourl(open(localfile, 'rb'), headers, origurl)
+ except OSError as exp:
+ # users shouldn't expect OSErrors coming from urlopen()
+ raise URLError(exp)
+ raise URLError('file not on local host')
+
+def _safe_gethostbyname(host):
+ try:
+ return socket.gethostbyname(host)
+ except socket.gaierror:
+ return None
+
+class FTPHandler(BaseHandler):
+ def ftp_open(self, req):
+ import ftplib
+ import mimetypes
+ host = req.host
+ if not host:
+ raise URLError('ftp error: no host given')
+ host, port = splitport(host)
+ if port is None:
+ port = ftplib.FTP_PORT
+ else:
+ port = int(port)
+
+ # username/password handling
+ user, host = splituser(host)
+ if user:
+ user, passwd = splitpasswd(user)
+ else:
+ passwd = None
+ host = unquote(host)
+ user = user or ''
+ passwd = passwd or ''
+
+ try:
+ host = socket.gethostbyname(host)
+ except socket.error as msg:
+ raise URLError(msg)
+ path, attrs = splitattr(req.selector)
+ dirs = path.split('/')
+ dirs = list(map(unquote, dirs))
+ dirs, file = dirs[:-1], dirs[-1]
+ if dirs and not dirs[0]:
+ dirs = dirs[1:]
+ try:
+ fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
+ type = file and 'I' or 'D'
+ for attr in attrs:
+ attr, value = splitvalue(attr)
+ if attr.lower() == 'type' and \
+ value in ('a', 'A', 'i', 'I', 'd', 'D'):
+ type = value.upper()
+ fp, retrlen = fw.retrfile(file, type)
+ headers = ""
+ mtype = mimetypes.guess_type(req.full_url)[0]
+ if mtype:
+ headers += "Content-type: %s\n" % mtype
+ if retrlen is not None and retrlen >= 0:
+ headers += "Content-length: %d\n" % retrlen
+ headers = email.message_from_string(headers)
+ return addinfourl(fp, headers, req.full_url)
+ except ftplib.all_errors as exp:
+ exc = URLError('ftp error: %r' % exp)
+ raise_with_traceback(exc)
+
+ def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+ return ftpwrapper(user, passwd, host, port, dirs, timeout,
+ persistent=False)
+
+class CacheFTPHandler(FTPHandler):
+ # XXX would be nice to have pluggable cache strategies
+ # XXX this stuff is definitely not thread safe
+ def __init__(self):
+ self.cache = {}
+ self.timeout = {}
+ self.soonest = 0
+ self.delay = 60
+ self.max_conns = 16
+
+ def setTimeout(self, t):
+ self.delay = t
+
+ def setMaxConns(self, m):
+ self.max_conns = m
+
+ def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+ key = user, host, port, '/'.join(dirs), timeout
+ if key in self.cache:
+ self.timeout[key] = time.time() + self.delay
+ else:
+ self.cache[key] = ftpwrapper(user, passwd, host, port,
+ dirs, timeout)
+ self.timeout[key] = time.time() + self.delay
+ self.check_cache()
+ return self.cache[key]
+
+ def check_cache(self):
+ # first check for old ones
+ t = time.time()
+ if self.soonest <= t:
+ for k, v in list(self.timeout.items()):
+ if v < t:
+ self.cache[k].close()
+ del self.cache[k]
+ del self.timeout[k]
+ self.soonest = min(list(self.timeout.values()))
+
+ # then check the size
+ if len(self.cache) == self.max_conns:
+ for k, v in list(self.timeout.items()):
+ if v == self.soonest:
+ del self.cache[k]
+ del self.timeout[k]
+ break
+ self.soonest = min(list(self.timeout.values()))
+
+ def clear_cache(self):
+ for conn in self.cache.values():
+ conn.close()
+ self.cache.clear()
+ self.timeout.clear()
+
+
+# Code move from the old urllib module
+
+MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
+
+# Helper for non-unix systems
+if os.name == 'nt':
+ from nturl2path import url2pathname, pathname2url
+else:
+ def url2pathname(pathname):
+ """OS-specific conversion from a relative URL of the 'file' scheme
+ to a file system path; not recommended for general use."""
+ return unquote(pathname)
+
+ def pathname2url(pathname):
+ """OS-specific conversion from a file system path to a relative URL
+ of the 'file' scheme; not recommended for general use."""
+ return quote(pathname)
+
+# This really consists of two pieces:
+# (1) a class which handles opening of all sorts of URLs
+# (plus assorted utilities etc.)
+# (2) a set of functions for parsing URLs
+# XXX Should these be separated out into different modules?
+
+
+ftpcache = {}
+class URLopener(object):
+ """Class to open URLs.
+ This is a class rather than just a subroutine because we may need
+ more than one set of global protocol-specific options.
+ Note -- this is a base class for those who don't want the
+ automatic handling of errors type 302 (relocated) and 401
+ (authorization needed)."""
+
+ __tempfiles = None
+
+ version = "Python-urllib/%s" % __version__
+
+ # Constructor
+ def __init__(self, proxies=None, **x509):
+ msg = "%(class)s style of invoking requests is deprecated. " \
+ "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
+ warnings.warn(msg, DeprecationWarning, stacklevel=3)
+ if proxies is None:
+ proxies = getproxies()
+ assert hasattr(proxies, 'keys'), "proxies must be a mapping"
+ self.proxies = proxies
+ self.key_file = x509.get('key_file')
+ self.cert_file = x509.get('cert_file')
+ self.addheaders = [('User-Agent', self.version)]
+ self.__tempfiles = []
+ self.__unlink = os.unlink # See cleanup()
+ self.tempcache = None
+ # Undocumented feature: if you assign {} to tempcache,
+ # it is used to cache files retrieved with
+ # self.retrieve(). This is not enabled by default
+ # since it does not work for changing documents (and I
+ # haven't got the logic to check expiration headers
+ # yet).
+ self.ftpcache = ftpcache
+ # Undocumented feature: you can use a different
+ # ftp cache by assigning to the .ftpcache member;
+ # in case you want logically independent URL openers
+ # XXX This is not threadsafe. Bah.
+
+ def __del__(self):
+ self.close()
+
+ def close(self):
+ self.cleanup()
+
+ def cleanup(self):
+ # This code sometimes runs when the rest of this module
+ # has already been deleted, so it can't use any globals
+ # or import anything.
+ if self.__tempfiles:
+ for file in self.__tempfiles:
+ try:
+ self.__unlink(file)
+ except OSError:
+ pass
+ del self.__tempfiles[:]
+ if self.tempcache:
+ self.tempcache.clear()
+
+ def addheader(self, *args):
+ """Add a header to be used by the HTTP interface only
+ e.g. u.addheader('Accept', 'sound/basic')"""
+ self.addheaders.append(args)
+
+ # External interface
+ def open(self, fullurl, data=None):
+ """Use URLopener().open(file) instead of open(file, 'r')."""
+ fullurl = unwrap(to_bytes(fullurl))
+ fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
+ if self.tempcache and fullurl in self.tempcache:
+ filename, headers = self.tempcache[fullurl]
+ fp = open(filename, 'rb')
+ return addinfourl(fp, headers, fullurl)
+ urltype, url = splittype(fullurl)
+ if not urltype:
+ urltype = 'file'
+ if urltype in self.proxies:
+ proxy = self.proxies[urltype]
+ urltype, proxyhost = splittype(proxy)
+ host, selector = splithost(proxyhost)
+ url = (host, fullurl) # Signal special case to open_*()
+ else:
+ proxy = None
+ name = 'open_' + urltype
+ self.type = urltype
+ name = name.replace('-', '_')
+ if not hasattr(self, name):
+ if proxy:
+ return self.open_unknown_proxy(proxy, fullurl, data)
+ else:
+ return self.open_unknown(fullurl, data)
+ try:
+ if data is None:
+ return getattr(self, name)(url)
+ else:
+ return getattr(self, name)(url, data)
+ except HTTPError:
+ raise
+ except socket.error as msg:
+ raise_with_traceback(IOError('socket error', msg))
+
+ def open_unknown(self, fullurl, data=None):
+ """Overridable interface to open unknown URL type."""
+ type, url = splittype(fullurl)
+ raise IOError('url error', 'unknown url type', type)
+
+ def open_unknown_proxy(self, proxy, fullurl, data=None):
+ """Overridable interface to open unknown URL type."""
+ type, url = splittype(fullurl)
+ raise IOError('url error', 'invalid proxy for %s' % type, proxy)
+
+ # External interface
+ def retrieve(self, url, filename=None, reporthook=None, data=None):
+ """retrieve(url) returns (filename, headers) for a local object
+ or (tempfilename, headers) for a remote object."""
+ url = unwrap(to_bytes(url))
+ if self.tempcache and url in self.tempcache:
+ return self.tempcache[url]
+ type, url1 = splittype(url)
+ if filename is None and (not type or type == 'file'):
+ try:
+ fp = self.open_local_file(url1)
+ hdrs = fp.info()
+ fp.close()
+ return url2pathname(splithost(url1)[1]), hdrs
+ except IOError as msg:
+ pass
+ fp = self.open(url, data)
+ try:
+ headers = fp.info()
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ import tempfile
+ garbage, path = splittype(url)
+ garbage, path = splithost(path or "")
+ path, garbage = splitquery(path or "")
+ path, garbage = splitattr(path or "")
+ suffix = os.path.splitext(path)[1]
+ (fd, filename) = tempfile.mkstemp(suffix)
+ self.__tempfiles.append(filename)
+ tfp = os.fdopen(fd, 'wb')
+ try:
+ result = filename, headers
+ if self.tempcache is not None:
+ self.tempcache[url] = result
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+ if reporthook:
+ reporthook(blocknum, bs, size)
+ while 1:
+ block = fp.read(bs)
+ if not block:
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, bs, size)
+ finally:
+ tfp.close()
+ finally:
+ fp.close()
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise ContentTooShortError(
+ "retrieval incomplete: got only %i out of %i bytes"
+ % (read, size), result)
+
+ return result
+
+ # Each method named open_<type> knows how to open that type of URL
+
+ def _open_generic_http(self, connection_factory, url, data):
+ """Make an HTTP connection using connection_class.
+
+ This is an internal method that should be called from
+ open_http() or open_https().
+
+ Arguments:
+ - connection_factory should take a host name and return an
+ HTTPConnection instance.
+ - url is the url to retrieval or a host, relative-path pair.
+ - data is payload for a POST request or None.
+ """
+
+ user_passwd = None
+ proxy_passwd= None
+ if isinstance(url, str):
+ host, selector = splithost(url)
+ if host:
+ user_passwd, host = splituser(host)
+ host = unquote(host)
+ realhost = host
+ else:
+ host, selector = url
+ # check whether the proxy contains authorization information
+ proxy_passwd, host = splituser(host)
+ # now we proceed with the url we want to obtain
+ urltype, rest = splittype(selector)
+ url = rest
+ user_passwd = None
+ if urltype.lower() != 'http':
+ realhost = None
+ else:
+ realhost, rest = splithost(rest)
+ if realhost:
+ user_passwd, realhost = splituser(realhost)
+ if user_passwd:
+ selector = "%s://%s%s" % (urltype, realhost, rest)
+ if proxy_bypass(realhost):
+ host = realhost
+
+ if not host: raise IOError('http error', 'no host given')
+
+ if proxy_passwd:
+ proxy_passwd = unquote(proxy_passwd)
+ proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
+ else:
+ proxy_auth = None
+
+ if user_passwd:
+ user_passwd = unquote(user_passwd)
+ auth = base64.b64encode(user_passwd.encode()).decode('ascii')
+ else:
+ auth = None
+ http_conn = connection_factory(host)
+ headers = {}
+ if proxy_auth:
+ headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
+ if auth:
+ headers["Authorization"] = "Basic %s" % auth
+ if realhost:
+ headers["Host"] = realhost
+
+ # Add Connection:close as we don't support persistent connections yet.
+ # This helps in closing the socket and avoiding ResourceWarning
+
+ headers["Connection"] = "close"
+
+ for header, value in self.addheaders:
+ headers[header] = value
+
+ if data is not None:
+ headers["Content-Type"] = "application/x-www-form-urlencoded"
+ http_conn.request("POST", selector, data, headers)
+ else:
+ http_conn.request("GET", selector, headers=headers)
+
+ try:
+ response = http_conn.getresponse()
+ except http_client.BadStatusLine:
+ # something went wrong with the HTTP status line
+ raise URLError("http protocol error: bad status line")
+
+ # According to RFC 2616, "2xx" code indicates that the client's
+ # request was successfully received, understood, and accepted.
+ if 200 <= response.status < 300:
+ return addinfourl(response, response.msg, "http:" + url,
+ response.status)
+ else:
+ return self.http_error(
+ url, response.fp,
+ response.status, response.reason, response.msg, data)
+
+ def open_http(self, url, data=None):
+ """Use HTTP protocol."""
+ return self._open_generic_http(http_client.HTTPConnection, url, data)
+
+ def http_error(self, url, fp, errcode, errmsg, headers, data=None):
+ """Handle http errors.
+
+ Derived class can override this, or provide specific handlers
+ named http_error_DDD where DDD is the 3-digit error code."""
+ # First check if there's a specific handler for this error
+ name = 'http_error_%d' % errcode
+ if hasattr(self, name):
+ method = getattr(self, name)
+ if data is None:
+ result = method(url, fp, errcode, errmsg, headers)
+ else:
+ result = method(url, fp, errcode, errmsg, headers, data)
+ if result: return result
+ return self.http_error_default(url, fp, errcode, errmsg, headers)
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ """Default error handler: close the connection and raise IOError."""
+ fp.close()
+ raise HTTPError(url, errcode, errmsg, headers, None)
+
+ if _have_ssl:
+ def _https_connection(self, host):
+ return http_client.HTTPSConnection(host,
+ key_file=self.key_file,
+ cert_file=self.cert_file)
+
+ def open_https(self, url, data=None):
+ """Use HTTPS protocol."""
+ return self._open_generic_http(self._https_connection, url, data)
+
+ def open_file(self, url):
+ """Use local file or FTP depending on form of URL."""
+ if not isinstance(url, str):
+ raise URLError('file error: proxy support for file protocol currently not implemented')
+ if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
+ raise ValueError("file:// scheme is supported only on localhost")
+ else:
+ return self.open_local_file(url)
+
+ def open_local_file(self, url):
+ """Use local file."""
+ import future.backports.email.utils as email_utils
+ import mimetypes
+ host, file = splithost(url)
+ localname = url2pathname(file)
+ try:
+ stats = os.stat(localname)
+ except OSError as e:
+ raise URLError(e.strerror, e.filename)
+ size = stats.st_size
+ modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
+ mtype = mimetypes.guess_type(url)[0]
+ headers = email.message_from_string(
+ 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified))
+ if not host:
+ urlfile = file
+ if file[:1] == '/':
+ urlfile = 'file://' + file
+ return addinfourl(open(localname, 'rb'), headers, urlfile)
+ host, port = splitport(host)
+ if (not port
+ and socket.gethostbyname(host) in ((localhost(),) + thishost())):
+ urlfile = file
+ if file[:1] == '/':
+ urlfile = 'file://' + file
+ elif file[:2] == './':
+ raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
+ return addinfourl(open(localname, 'rb'), headers, urlfile)
+ raise URLError('local file error: not on local host')
+
+ def open_ftp(self, url):
+ """Use FTP protocol."""
+ if not isinstance(url, str):
+ raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
+ import mimetypes
+ host, path = splithost(url)
+ if not host: raise URLError('ftp error: no host given')
+ host, port = splitport(host)
+ user, host = splituser(host)
+ if user: user, passwd = splitpasswd(user)
+ else: passwd = None
+ host = unquote(host)
+ user = unquote(user or '')
+ passwd = unquote(passwd or '')
+ host = socket.gethostbyname(host)
+ if not port:
+ import ftplib
+ port = ftplib.FTP_PORT
+ else:
+ port = int(port)
+ path, attrs = splitattr(path)
+ path = unquote(path)
+ dirs = path.split('/')
+ dirs, file = dirs[:-1], dirs[-1]
+ if dirs and not dirs[0]: dirs = dirs[1:]
+ if dirs and not dirs[0]: dirs[0] = '/'
+ key = user, host, port, '/'.join(dirs)
+ # XXX thread unsafe!
+ if len(self.ftpcache) > MAXFTPCACHE:
+ # Prune the cache, rather arbitrarily
+ for k in self.ftpcache.keys():
+ if k != key:
+ v = self.ftpcache[k]
+ del self.ftpcache[k]
+ v.close()
+ try:
+ if key not in self.ftpcache:
+ self.ftpcache[key] = \
+ ftpwrapper(user, passwd, host, port, dirs)
+ if not file: type = 'D'
+ else: type = 'I'
+ for attr in attrs:
+ attr, value = splitvalue(attr)
+ if attr.lower() == 'type' and \
+ value in ('a', 'A', 'i', 'I', 'd', 'D'):
+ type = value.upper()
+ (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
+ mtype = mimetypes.guess_type("ftp:" + url)[0]
+ headers = ""
+ if mtype:
+ headers += "Content-Type: %s\n" % mtype
+ if retrlen is not None and retrlen >= 0:
+ headers += "Content-Length: %d\n" % retrlen
+ headers = email.message_from_string(headers)
+ return addinfourl(fp, headers, "ftp:" + url)
+ except ftperrors() as exp:
+ raise_with_traceback(URLError('ftp error %r' % exp))
+
+ def open_data(self, url, data=None):
+ """Use "data" URL."""
+ if not isinstance(url, str):
+ raise URLError('data error: proxy support for data protocol currently not implemented')
+ # ignore POSTed data
+ #
+ # syntax of data URLs:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ try:
+ [type, data] = url.split(',', 1)
+ except ValueError:
+ raise IOError('data error', 'bad data URL')
+ if not type:
+ type = 'text/plain;charset=US-ASCII'
+ semi = type.rfind(';')
+ if semi >= 0 and '=' not in type[semi:]:
+ encoding = type[semi+1:]
+ type = type[:semi]
+ else:
+ encoding = ''
+ msg = []
+ msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
+ time.gmtime(time.time())))
+ msg.append('Content-type: %s' % type)
+ if encoding == 'base64':
+ # XXX is this encoding/decoding ok?
+ data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
+ else:
+ data = unquote(data)
+ msg.append('Content-Length: %d' % len(data))
+ msg.append('')
+ msg.append(data)
+ msg = '\n'.join(msg)
+ headers = email.message_from_string(msg)
+ f = io.StringIO(msg)
+ #f.fileno = None # needed for addinfourl
+ return addinfourl(f, headers, url)
+
+
+class FancyURLopener(URLopener):
+ """Derived class with handlers for errors we can handle (perhaps)."""
+
+ def __init__(self, *args, **kwargs):
+ URLopener.__init__(self, *args, **kwargs)
+ self.auth_cache = {}
+ self.tries = 0
+ self.maxtries = 10
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ """Default error handling -- don't raise an exception."""
+ return addinfourl(fp, headers, "http:" + url, errcode)
+
+ def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 302 -- relocated (temporarily)."""
+ self.tries += 1
+ if self.maxtries and self.tries >= self.maxtries:
+ if hasattr(self, "http_error_500"):
+ meth = self.http_error_500
+ else:
+ meth = self.http_error_default
+ self.tries = 0
+ return meth(url, fp, 500,
+ "Internal Server Error: Redirect Recursion", headers)
+ result = self.redirect_internal(url, fp, errcode, errmsg, headers,
+ data)
+ self.tries = 0
+ return result
+
+ def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
+ if 'location' in headers:
+ newurl = headers['location']
+ elif 'uri' in headers:
+ newurl = headers['uri']
+ else:
+ return
+ fp.close()
+
+ # In case the server sent a relative URL, join with original:
+ newurl = urljoin(self.type + ":" + url, newurl)
+
+ urlparts = urlparse(newurl)
+
+ # For security reasons, we don't allow redirection to anything other
+ # than http, https and ftp.
+
+ # We are using newer HTTPError with older redirect_internal method
+ # This older method will get deprecated in 3.3
+
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
+ raise HTTPError(newurl, errcode,
+ errmsg +
+ " Redirection to url '%s' is not allowed." % newurl,
+ headers, fp)
+
+ return self.open(newurl)
+
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 301 -- also relocated (permanently)."""
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+
+ def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 303 -- also relocated (essentially identical to 302)."""
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+
+ def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 307 -- relocated, but turn POST into error."""
+ if data is None:
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+ else:
+ return self.http_error_default(url, fp, errcode, errmsg, headers)
+
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
+ retry=False):
+ """Error 401 -- authentication required.
+ This function supports Basic authentication only."""
+ if 'www-authenticate' not in headers:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ stuff = headers['www-authenticate']
+ match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
+ if not match:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ scheme, realm = match.groups()
+ if scheme.lower() != 'basic':
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ if not retry:
+ URLopener.http_error_default(self, url, fp, errcode, errmsg,
+ headers)
+ name = 'retry_' + self.type + '_basic_auth'
+ if data is None:
+ return getattr(self,name)(url, realm)
+ else:
+ return getattr(self,name)(url, realm, data)
+
+ def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
+ retry=False):
+ """Error 407 -- proxy authentication required.
+ This function supports Basic authentication only."""
+ if 'proxy-authenticate' not in headers:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ stuff = headers['proxy-authenticate']
+ match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
+ if not match:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ scheme, realm = match.groups()
+ if scheme.lower() != 'basic':
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ if not retry:
+ URLopener.http_error_default(self, url, fp, errcode, errmsg,
+ headers)
+ name = 'retry_proxy_' + self.type + '_basic_auth'
+ if data is None:
+ return getattr(self,name)(url, realm)
+ else:
+ return getattr(self,name)(url, realm, data)
+
+ def retry_proxy_http_basic_auth(self, url, realm, data=None):
+ host, selector = splithost(url)
+ newurl = 'http://' + host + selector
+ proxy = self.proxies['http']
+ urltype, proxyhost = splittype(proxy)
+ proxyhost, proxyselector = splithost(proxyhost)
+ i = proxyhost.find('@') + 1
+ proxyhost = proxyhost[i:]
+ user, passwd = self.get_user_passwd(proxyhost, realm, i)
+ if not (user or passwd): return None
+ proxyhost = "%s:%s@%s" % (quote(user, safe=''),
+ quote(passwd, safe=''), proxyhost)
+ self.proxies['http'] = 'http://' + proxyhost + proxyselector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_proxy_https_basic_auth(self, url, realm, data=None):
+ host, selector = splithost(url)
+ newurl = 'https://' + host + selector
+ proxy = self.proxies['https']
+ urltype, proxyhost = splittype(proxy)
+ proxyhost, proxyselector = splithost(proxyhost)
+ i = proxyhost.find('@') + 1
+ proxyhost = proxyhost[i:]
+ user, passwd = self.get_user_passwd(proxyhost, realm, i)
+ if not (user or passwd): return None
+ proxyhost = "%s:%s@%s" % (quote(user, safe=''),
+ quote(passwd, safe=''), proxyhost)
+ self.proxies['https'] = 'https://' + proxyhost + proxyselector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_http_basic_auth(self, url, realm, data=None):
+ host, selector = splithost(url)
+ i = host.find('@') + 1
+ host = host[i:]
+ user, passwd = self.get_user_passwd(host, realm, i)
+ if not (user or passwd): return None
+ host = "%s:%s@%s" % (quote(user, safe=''),
+ quote(passwd, safe=''), host)
+ newurl = 'http://' + host + selector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_https_basic_auth(self, url, realm, data=None):
+ host, selector = splithost(url)
+ i = host.find('@') + 1
+ host = host[i:]
+ user, passwd = self.get_user_passwd(host, realm, i)
+ if not (user or passwd): return None
+ host = "%s:%s@%s" % (quote(user, safe=''),
+ quote(passwd, safe=''), host)
+ newurl = 'https://' + host + selector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def get_user_passwd(self, host, realm, clear_cache=0):
+ key = realm + '@' + host.lower()
+ if key in self.auth_cache:
+ if clear_cache:
+ del self.auth_cache[key]
+ else:
+ return self.auth_cache[key]
+ user, passwd = self.prompt_user_passwd(host, realm)
+ if user or passwd: self.auth_cache[key] = (user, passwd)
+ return user, passwd
+
+ def prompt_user_passwd(self, host, realm):
+ """Override this in a GUI environment!"""
+ import getpass
+ try:
+ user = input("Enter username for %s at %s: " % (realm, host))
+ passwd = getpass.getpass("Enter password for %s in %s at %s: " %
+ (user, realm, host))
+ return user, passwd
+ except KeyboardInterrupt:
+ print()
+ return None, None
+
+
+# Utility functions
+
+_localhost = None
+def localhost():
+ """Return the IP address of the magic hostname 'localhost'."""
+ global _localhost
+ if _localhost is None:
+ _localhost = socket.gethostbyname('localhost')
+ return _localhost
+
+_thishost = None
+def thishost():
+ """Return the IP addresses of the current host."""
+ global _thishost
+ if _thishost is None:
+ try:
+ _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
+ except socket.gaierror:
+ _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
+ return _thishost
+
+_ftperrors = None
+def ftperrors():
+ """Return the set of errors raised by the FTP class."""
+ global _ftperrors
+ if _ftperrors is None:
+ import ftplib
+ _ftperrors = ftplib.all_errors
+ return _ftperrors
+
+_noheaders = None
+def noheaders():
+ """Return an empty email Message object."""
+ global _noheaders
+ if _noheaders is None:
+ _noheaders = email.message_from_string("")
+ return _noheaders
+
+
+# Utility classes
+
+class ftpwrapper(object):
+ """Class used by open_ftp() for cache of open FTP connections."""
+
+ def __init__(self, user, passwd, host, port, dirs, timeout=None,
+ persistent=True):
+ self.user = user
+ self.passwd = passwd
+ self.host = host
+ self.port = port
+ self.dirs = dirs
+ self.timeout = timeout
+ self.refcount = 0
+ self.keepalive = persistent
+ self.init()
+
+ def init(self):
+ import ftplib
+ self.busy = 0
+ self.ftp = ftplib.FTP()
+ self.ftp.connect(self.host, self.port, self.timeout)
+ self.ftp.login(self.user, self.passwd)
+ _target = '/'.join(self.dirs)
+ self.ftp.cwd(_target)
+
+ def retrfile(self, file, type):
+ import ftplib
+ self.endtransfer()
+ if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
+ else: cmd = 'TYPE ' + type; isdir = 0
+ try:
+ self.ftp.voidcmd(cmd)
+ except ftplib.all_errors:
+ self.init()
+ self.ftp.voidcmd(cmd)
+ conn = None
+ if file and not isdir:
+ # Try to retrieve as a file
+ try:
+ cmd = 'RETR ' + file
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
+ except ftplib.error_perm as reason:
+ if str(reason)[:3] != '550':
+ raise_with_traceback(URLError('ftp error: %r' % reason))
+ if not conn:
+ # Set transfer mode to ASCII!
+ self.ftp.voidcmd('TYPE A')
+ # Try a directory listing. Verify that directory exists.
+ if file:
+ pwd = self.ftp.pwd()
+ try:
+ try:
+ self.ftp.cwd(file)
+ except ftplib.error_perm as reason:
+ ### Was:
+ # raise URLError('ftp error: %r' % reason) from reason
+ exc = URLError('ftp error: %r' % reason)
+ exc.__cause__ = reason
+ raise exc
+ finally:
+ self.ftp.cwd(pwd)
+ cmd = 'LIST ' + file
+ else:
+ cmd = 'LIST'
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
+ self.busy = 1
+
+ ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
+ self.refcount += 1
+ conn.close()
+ # Pass back both a suitably decorated object and a retrieval length
+ return (ftpobj, retrlen)
+
+ def endtransfer(self):
+ self.busy = 0
+
+ def close(self):
+ self.keepalive = False
+ if self.refcount <= 0:
+ self.real_close()
+
+ def file_close(self):
+ self.endtransfer()
+ self.refcount -= 1
+ if self.refcount <= 0 and not self.keepalive:
+ self.real_close()
+
+ def real_close(self):
+ self.endtransfer()
+ try:
+ self.ftp.close()
+ except ftperrors():
+ pass
+
+# Proxy handling
+def getproxies_environment():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Scan the environment for variables named <scheme>_proxy;
+ this seems to be the standard convention. If you need a
+ different way, you can pass a proxies dictionary to the
+ [Fancy]URLopener constructor.
+
+ """
+ proxies = {}
+ for name, value in os.environ.items():
+ name = name.lower()
+ if value and name[-6:] == '_proxy':
+ proxies[name[:-6]] = value
+ return proxies
+
+def proxy_bypass_environment(host):
+ """Test if proxies should not be used for a particular host.
+
+ Checks the environment for a variable named no_proxy, which should
+ be a list of DNS suffixes separated by commas, or '*' for all hosts.
+ """
+ no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
+ # '*' is special case for always bypass
+ if no_proxy == '*':
+ return 1
+ # strip port off host
+ hostonly, port = splitport(host)
+ # check if the host ends with any of the DNS suffixes
+ no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
+ for name in no_proxy_list:
+ if name and (hostonly.endswith(name) or host.endswith(name)):
+ return 1
+ # otherwise, don't bypass
+ return 0
+
+
+# This code tests an OSX specific data structure but is testable on all
+# platforms
+def _proxy_bypass_macosx_sysconf(host, proxy_settings):
+ """
+ Return True iff this host shouldn't be accessed using a proxy
+
+ This function uses the MacOSX framework SystemConfiguration
+ to fetch the proxy information.
+
+ proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
+ { 'exclude_simple': bool,
+ 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
+ }
+ """
+ from fnmatch import fnmatch
+
+ hostonly, port = splitport(host)
+
+ def ip2num(ipAddr):
+ parts = ipAddr.split('.')
+ parts = list(map(int, parts))
+ if len(parts) != 4:
+ parts = (parts + [0, 0, 0, 0])[:4]
+ return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
+
+ # Check for simple host names:
+ if '.' not in host:
+ if proxy_settings['exclude_simple']:
+ return True
+
+ hostIP = None
+
+ for value in proxy_settings.get('exceptions', ()):
+ # Items in the list are strings like these: *.local, 169.254/16
+ if not value: continue
+
+ m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
+ if m is not None:
+ if hostIP is None:
+ try:
+ hostIP = socket.gethostbyname(hostonly)
+ hostIP = ip2num(hostIP)
+ except socket.error:
+ continue
+
+ base = ip2num(m.group(1))
+ mask = m.group(2)
+ if mask is None:
+ mask = 8 * (m.group(1).count('.') + 1)
+ else:
+ mask = int(mask[1:])
+ mask = 32 - mask
+
+ if (hostIP >> mask) == (base >> mask):
+ return True
+
+ elif fnmatch(host, value):
+ return True
+
+ return False
+
+
+if sys.platform == 'darwin':
+ from _scproxy import _get_proxy_settings, _get_proxies
+
+ def proxy_bypass_macosx_sysconf(host):
+ proxy_settings = _get_proxy_settings()
+ return _proxy_bypass_macosx_sysconf(host, proxy_settings)
+
+ def getproxies_macosx_sysconf():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ This function uses the MacOSX framework SystemConfiguration
+ to fetch the proxy information.
+ """
+ return _get_proxies()
+
+
+
+ def proxy_bypass(host):
+ if getproxies_environment():
+ return proxy_bypass_environment(host)
+ else:
+ return proxy_bypass_macosx_sysconf(host)
+
+ def getproxies():
+ return getproxies_environment() or getproxies_macosx_sysconf()
+
+
+elif os.name == 'nt':
+ def getproxies_registry():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Win32 uses the registry to store proxies.
+
+ """
+ proxies = {}
+ try:
+ import winreg
+ except ImportError:
+ # Std module, so should be around - but you never know!
+ return proxies
+ try:
+ internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+ r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
+ proxyEnable = winreg.QueryValueEx(internetSettings,
+ 'ProxyEnable')[0]
+ if proxyEnable:
+ # Returned as Unicode but problems if not converted to ASCII
+ proxyServer = str(winreg.QueryValueEx(internetSettings,
+ 'ProxyServer')[0])
+ if '=' in proxyServer:
+ # Per-protocol settings
+ for p in proxyServer.split(';'):
+ protocol, address = p.split('=', 1)
+ # See if address has a type:// prefix
+ if not re.match('^([^/:]+)://', address):
+ address = '%s://%s' % (protocol, address)
+ proxies[protocol] = address
+ else:
+ # Use one setting for all protocols
+ if proxyServer[:5] == 'http:':
+ proxies['http'] = proxyServer
+ else:
+ proxies['http'] = 'http://%s' % proxyServer
+ proxies['https'] = 'https://%s' % proxyServer
+ proxies['ftp'] = 'ftp://%s' % proxyServer
+ internetSettings.Close()
+ except (WindowsError, ValueError, TypeError):
+ # Either registry key not found etc, or the value in an
+ # unexpected format.
+ # proxies already set up to be empty so nothing to do
+ pass
+ return proxies
+
+ def getproxies():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Returns settings gathered from the environment, if specified,
+ or the registry.
+
+ """
+ return getproxies_environment() or getproxies_registry()
+
+ def proxy_bypass_registry(host):
+ try:
+ import winreg
+ except ImportError:
+ # Std modules, so should be around - but you never know!
+ return 0
+ try:
+ internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+ r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
+ proxyEnable = winreg.QueryValueEx(internetSettings,
+ 'ProxyEnable')[0]
+ proxyOverride = str(winreg.QueryValueEx(internetSettings,
+ 'ProxyOverride')[0])
+ # ^^^^ Returned as Unicode but problems if not converted to ASCII
+ except WindowsError:
+ return 0
+ if not proxyEnable or not proxyOverride:
+ return 0
+ # try to make a host list from name and IP address.
+ rawHost, port = splitport(host)
+ host = [rawHost]
+ try:
+ addr = socket.gethostbyname(rawHost)
+ if addr != rawHost:
+ host.append(addr)
+ except socket.error:
+ pass
+ try:
+ fqdn = socket.getfqdn(rawHost)
+ if fqdn != rawHost:
+ host.append(fqdn)
+ except socket.error:
+ pass
+ # make a check value list from the registry entry: replace the
+ # '<local>' string by the localhost entry and the corresponding
+ # canonical entry.
+ proxyOverride = proxyOverride.split(';')
+ # now check if we match one of the registry values.
+ for test in proxyOverride:
+ if test == '<local>':
+ if '.' not in rawHost:
+ return 1
+ test = test.replace(".", r"\.") # mask dots
+ test = test.replace("*", r".*") # change glob sequence
+ test = test.replace("?", r".") # change glob char
+ for val in host:
+ if re.match(test, val, re.I):
+ return 1
+ return 0
+
+ def proxy_bypass(host):
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Returns settings gathered from the environment, if specified,
+ or the registry.
+
+ """
+ if getproxies_environment():
+ return proxy_bypass_environment(host)
+ else:
+ return proxy_bypass_registry(host)
+
+else:
+ # By default use environment variables
+ getproxies = getproxies_environment
+ proxy_bypass = proxy_bypass_environment
diff --git a/contrib/python/future/future/backports/urllib/response.py b/contrib/python/future/future/backports/urllib/response.py
index 3a13a3f8a2..adbf6e5ae3 100644
--- a/contrib/python/future/future/backports/urllib/response.py
+++ b/contrib/python/future/future/backports/urllib/response.py
@@ -1,103 +1,103 @@
-"""Response classes used by urllib.
-
-The base class, addbase, defines a minimal file-like interface,
-including read() and readline(). The typical response object is an
-addinfourl instance, which defines an info() method that returns
-headers and a geturl() method that returns the url.
-"""
-from __future__ import absolute_import, division, unicode_literals
-from future.builtins import object
-
-class addbase(object):
- """Base class for addinfo and addclosehook."""
-
- # XXX Add a method to expose the timeout on the underlying socket?
-
- def __init__(self, fp):
- # TODO(jhylton): Is there a better way to delegate using io?
- self.fp = fp
- self.read = self.fp.read
- self.readline = self.fp.readline
- # TODO(jhylton): Make sure an object with readlines() is also iterable
- if hasattr(self.fp, "readlines"):
- self.readlines = self.fp.readlines
- if hasattr(self.fp, "fileno"):
- self.fileno = self.fp.fileno
- else:
- self.fileno = lambda: None
-
- def __iter__(self):
- # Assigning `__iter__` to the instance doesn't work as intended
- # because the iter builtin does something like `cls.__iter__(obj)`
- # and thus fails to find the _bound_ method `obj.__iter__`.
- # Returning just `self.fp` works for built-in file objects but
- # might not work for general file-like objects.
- return iter(self.fp)
-
- def __repr__(self):
- return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
- id(self), self.fp)
-
- def close(self):
- if self.fp:
- self.fp.close()
- self.fp = None
- self.read = None
- self.readline = None
- self.readlines = None
- self.fileno = None
- self.__iter__ = None
- self.__next__ = None
-
- def __enter__(self):
- if self.fp is None:
- raise ValueError("I/O operation on closed file")
- return self
-
- def __exit__(self, type, value, traceback):
- self.close()
-
-class addclosehook(addbase):
- """Class to add a close hook to an open file."""
-
- def __init__(self, fp, closehook, *hookargs):
- addbase.__init__(self, fp)
- self.closehook = closehook
- self.hookargs = hookargs
-
- def close(self):
- if self.closehook:
- self.closehook(*self.hookargs)
- self.closehook = None
- self.hookargs = None
- addbase.close(self)
-
-class addinfo(addbase):
- """class to add an info() method to an open file."""
-
- def __init__(self, fp, headers):
- addbase.__init__(self, fp)
- self.headers = headers
-
- def info(self):
- return self.headers
-
-class addinfourl(addbase):
- """class to add info() and geturl() methods to an open file."""
-
- def __init__(self, fp, headers, url, code=None):
- addbase.__init__(self, fp)
- self.headers = headers
- self.url = url
- self.code = code
-
- def info(self):
- return self.headers
-
- def getcode(self):
- return self.code
-
- def geturl(self):
- return self.url
-
-del absolute_import, division, unicode_literals, object
+"""Response classes used by urllib.
+
+The base class, addbase, defines a minimal file-like interface,
+including read() and readline(). The typical response object is an
+addinfourl instance, which defines an info() method that returns
+headers and a geturl() method that returns the url.
+"""
+from __future__ import absolute_import, division, unicode_literals
+from future.builtins import object
+
+class addbase(object):
+ """Base class for addinfo and addclosehook."""
+
+ # XXX Add a method to expose the timeout on the underlying socket?
+
+ def __init__(self, fp):
+ # TODO(jhylton): Is there a better way to delegate using io?
+ self.fp = fp
+ self.read = self.fp.read
+ self.readline = self.fp.readline
+ # TODO(jhylton): Make sure an object with readlines() is also iterable
+ if hasattr(self.fp, "readlines"):
+ self.readlines = self.fp.readlines
+ if hasattr(self.fp, "fileno"):
+ self.fileno = self.fp.fileno
+ else:
+ self.fileno = lambda: None
+
+ def __iter__(self):
+ # Assigning `__iter__` to the instance doesn't work as intended
+ # because the iter builtin does something like `cls.__iter__(obj)`
+ # and thus fails to find the _bound_ method `obj.__iter__`.
+ # Returning just `self.fp` works for built-in file objects but
+ # might not work for general file-like objects.
+ return iter(self.fp)
+
+ def __repr__(self):
+ return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
+ id(self), self.fp)
+
+ def close(self):
+ if self.fp:
+ self.fp.close()
+ self.fp = None
+ self.read = None
+ self.readline = None
+ self.readlines = None
+ self.fileno = None
+ self.__iter__ = None
+ self.__next__ = None
+
+ def __enter__(self):
+ if self.fp is None:
+ raise ValueError("I/O operation on closed file")
+ return self
+
+ def __exit__(self, type, value, traceback):
+ self.close()
+
+class addclosehook(addbase):
+ """Class to add a close hook to an open file."""
+
+ def __init__(self, fp, closehook, *hookargs):
+ addbase.__init__(self, fp)
+ self.closehook = closehook
+ self.hookargs = hookargs
+
+ def close(self):
+ if self.closehook:
+ self.closehook(*self.hookargs)
+ self.closehook = None
+ self.hookargs = None
+ addbase.close(self)
+
+class addinfo(addbase):
+ """class to add an info() method to an open file."""
+
+ def __init__(self, fp, headers):
+ addbase.__init__(self, fp)
+ self.headers = headers
+
+ def info(self):
+ return self.headers
+
+class addinfourl(addbase):
+ """class to add info() and geturl() methods to an open file."""
+
+ def __init__(self, fp, headers, url, code=None):
+ addbase.__init__(self, fp)
+ self.headers = headers
+ self.url = url
+ self.code = code
+
+ def info(self):
+ return self.headers
+
+ def getcode(self):
+ return self.code
+
+ def geturl(self):
+ return self.url
+
+del absolute_import, division, unicode_literals, object
diff --git a/contrib/python/future/future/backports/urllib/robotparser.py b/contrib/python/future/future/backports/urllib/robotparser.py
index c4687eab47..a0f36511b4 100644
--- a/contrib/python/future/future/backports/urllib/robotparser.py
+++ b/contrib/python/future/future/backports/urllib/robotparser.py
@@ -1,211 +1,211 @@
-from __future__ import absolute_import, division, unicode_literals
-from future.builtins import str
-""" robotparser.py
-
- Copyright (C) 2000 Bastian Kleineidam
-
- You can choose between two licenses when using this package:
- 1) GNU GPLv2
- 2) PSF license for Python 2.2
-
- The robots.txt Exclusion Protocol is implemented as specified in
- http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
-"""
-
-# Was: import urllib.parse, urllib.request
-from future.backports import urllib
-from future.backports.urllib import parse as _parse, request as _request
-urllib.parse = _parse
-urllib.request = _request
-
-
-__all__ = ["RobotFileParser"]
-
-class RobotFileParser(object):
- """ This class provides a set of methods to read, parse and answer
- questions about a single robots.txt file.
-
- """
-
- def __init__(self, url=''):
- self.entries = []
- self.default_entry = None
- self.disallow_all = False
- self.allow_all = False
- self.set_url(url)
- self.last_checked = 0
-
- def mtime(self):
- """Returns the time the robots.txt file was last fetched.
-
- This is useful for long-running web spiders that need to
- check for new robots.txt files periodically.
-
- """
- return self.last_checked
-
- def modified(self):
- """Sets the time the robots.txt file was last fetched to the
- current time.
-
- """
- import time
- self.last_checked = time.time()
-
- def set_url(self, url):
- """Sets the URL referring to a robots.txt file."""
- self.url = url
- self.host, self.path = urllib.parse.urlparse(url)[1:3]
-
- def read(self):
- """Reads the robots.txt URL and feeds it to the parser."""
- try:
- f = urllib.request.urlopen(self.url)
- except urllib.error.HTTPError as err:
- if err.code in (401, 403):
- self.disallow_all = True
- elif err.code >= 400:
- self.allow_all = True
- else:
- raw = f.read()
- self.parse(raw.decode("utf-8").splitlines())
-
- def _add_entry(self, entry):
- if "*" in entry.useragents:
- # the default entry is considered last
- if self.default_entry is None:
- # the first default entry wins
- self.default_entry = entry
- else:
- self.entries.append(entry)
-
- def parse(self, lines):
- """Parse the input lines from a robots.txt file.
-
- We allow that a user-agent: line is not preceded by
- one or more blank lines.
- """
- # states:
- # 0: start state
- # 1: saw user-agent line
- # 2: saw an allow or disallow line
- state = 0
- entry = Entry()
-
- for line in lines:
- if not line:
- if state == 1:
- entry = Entry()
- state = 0
- elif state == 2:
- self._add_entry(entry)
- entry = Entry()
- state = 0
- # remove optional comment and strip line
- i = line.find('#')
- if i >= 0:
- line = line[:i]
- line = line.strip()
- if not line:
- continue
- line = line.split(':', 1)
- if len(line) == 2:
- line[0] = line[0].strip().lower()
- line[1] = urllib.parse.unquote(line[1].strip())
- if line[0] == "user-agent":
- if state == 2:
- self._add_entry(entry)
- entry = Entry()
- entry.useragents.append(line[1])
- state = 1
- elif line[0] == "disallow":
- if state != 0:
- entry.rulelines.append(RuleLine(line[1], False))
- state = 2
- elif line[0] == "allow":
- if state != 0:
- entry.rulelines.append(RuleLine(line[1], True))
- state = 2
- if state == 2:
- self._add_entry(entry)
-
-
- def can_fetch(self, useragent, url):
- """using the parsed robots.txt decide if useragent can fetch url"""
- if self.disallow_all:
- return False
- if self.allow_all:
- return True
- # search for given user agent matches
- # the first match counts
- parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
- url = urllib.parse.urlunparse(('','',parsed_url.path,
- parsed_url.params,parsed_url.query, parsed_url.fragment))
- url = urllib.parse.quote(url)
- if not url:
- url = "/"
- for entry in self.entries:
- if entry.applies_to(useragent):
- return entry.allowance(url)
- # try the default entry last
- if self.default_entry:
- return self.default_entry.allowance(url)
- # agent not found ==> access granted
- return True
-
- def __str__(self):
- return ''.join([str(entry) + "\n" for entry in self.entries])
-
-
-class RuleLine(object):
- """A rule line is a single "Allow:" (allowance==True) or "Disallow:"
- (allowance==False) followed by a path."""
- def __init__(self, path, allowance):
- if path == '' and not allowance:
- # an empty value means allow all
- allowance = True
- self.path = urllib.parse.quote(path)
- self.allowance = allowance
-
- def applies_to(self, filename):
- return self.path == "*" or filename.startswith(self.path)
-
- def __str__(self):
- return (self.allowance and "Allow" or "Disallow") + ": " + self.path
-
-
-class Entry(object):
- """An entry has one or more user-agents and zero or more rulelines"""
- def __init__(self):
- self.useragents = []
- self.rulelines = []
-
- def __str__(self):
- ret = []
- for agent in self.useragents:
- ret.extend(["User-agent: ", agent, "\n"])
- for line in self.rulelines:
- ret.extend([str(line), "\n"])
- return ''.join(ret)
-
- def applies_to(self, useragent):
- """check if this entry applies to the specified agent"""
- # split the name token and make it lower case
- useragent = useragent.split("/")[0].lower()
- for agent in self.useragents:
- if agent == '*':
- # we have the catch-all agent
- return True
- agent = agent.lower()
- if agent in useragent:
- return True
- return False
-
- def allowance(self, filename):
- """Preconditions:
- - our agent applies to this entry
- - filename is URL decoded"""
- for line in self.rulelines:
- if line.applies_to(filename):
- return line.allowance
- return True
+from __future__ import absolute_import, division, unicode_literals
+from future.builtins import str
+""" robotparser.py
+
+ Copyright (C) 2000 Bastian Kleineidam
+
+ You can choose between two licenses when using this package:
+ 1) GNU GPLv2
+ 2) PSF license for Python 2.2
+
+ The robots.txt Exclusion Protocol is implemented as specified in
+ http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
+"""
+
+# Was: import urllib.parse, urllib.request
+from future.backports import urllib
+from future.backports.urllib import parse as _parse, request as _request
+urllib.parse = _parse
+urllib.request = _request
+
+
+__all__ = ["RobotFileParser"]
+
+class RobotFileParser(object):
+ """ This class provides a set of methods to read, parse and answer
+ questions about a single robots.txt file.
+
+ """
+
+ def __init__(self, url=''):
+ self.entries = []
+ self.default_entry = None
+ self.disallow_all = False
+ self.allow_all = False
+ self.set_url(url)
+ self.last_checked = 0
+
+ def mtime(self):
+ """Returns the time the robots.txt file was last fetched.
+
+ This is useful for long-running web spiders that need to
+ check for new robots.txt files periodically.
+
+ """
+ return self.last_checked
+
+ def modified(self):
+ """Sets the time the robots.txt file was last fetched to the
+ current time.
+
+ """
+ import time
+ self.last_checked = time.time()
+
+ def set_url(self, url):
+ """Sets the URL referring to a robots.txt file."""
+ self.url = url
+ self.host, self.path = urllib.parse.urlparse(url)[1:3]
+
+ def read(self):
+ """Reads the robots.txt URL and feeds it to the parser."""
+ try:
+ f = urllib.request.urlopen(self.url)
+ except urllib.error.HTTPError as err:
+ if err.code in (401, 403):
+ self.disallow_all = True
+ elif err.code >= 400:
+ self.allow_all = True
+ else:
+ raw = f.read()
+ self.parse(raw.decode("utf-8").splitlines())
+
+ def _add_entry(self, entry):
+ if "*" in entry.useragents:
+ # the default entry is considered last
+ if self.default_entry is None:
+ # the first default entry wins
+ self.default_entry = entry
+ else:
+ self.entries.append(entry)
+
+ def parse(self, lines):
+ """Parse the input lines from a robots.txt file.
+
+ We allow that a user-agent: line is not preceded by
+ one or more blank lines.
+ """
+ # states:
+ # 0: start state
+ # 1: saw user-agent line
+ # 2: saw an allow or disallow line
+ state = 0
+ entry = Entry()
+
+ for line in lines:
+ if not line:
+ if state == 1:
+ entry = Entry()
+ state = 0
+ elif state == 2:
+ self._add_entry(entry)
+ entry = Entry()
+ state = 0
+ # remove optional comment and strip line
+ i = line.find('#')
+ if i >= 0:
+ line = line[:i]
+ line = line.strip()
+ if not line:
+ continue
+ line = line.split(':', 1)
+ if len(line) == 2:
+ line[0] = line[0].strip().lower()
+ line[1] = urllib.parse.unquote(line[1].strip())
+ if line[0] == "user-agent":
+ if state == 2:
+ self._add_entry(entry)
+ entry = Entry()
+ entry.useragents.append(line[1])
+ state = 1
+ elif line[0] == "disallow":
+ if state != 0:
+ entry.rulelines.append(RuleLine(line[1], False))
+ state = 2
+ elif line[0] == "allow":
+ if state != 0:
+ entry.rulelines.append(RuleLine(line[1], True))
+ state = 2
+ if state == 2:
+ self._add_entry(entry)
+
+
+ def can_fetch(self, useragent, url):
+ """using the parsed robots.txt decide if useragent can fetch url"""
+ if self.disallow_all:
+ return False
+ if self.allow_all:
+ return True
+ # search for given user agent matches
+ # the first match counts
+ parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
+ url = urllib.parse.urlunparse(('','',parsed_url.path,
+ parsed_url.params,parsed_url.query, parsed_url.fragment))
+ url = urllib.parse.quote(url)
+ if not url:
+ url = "/"
+ for entry in self.entries:
+ if entry.applies_to(useragent):
+ return entry.allowance(url)
+ # try the default entry last
+ if self.default_entry:
+ return self.default_entry.allowance(url)
+ # agent not found ==> access granted
+ return True
+
+ def __str__(self):
+ return ''.join([str(entry) + "\n" for entry in self.entries])
+
+
+class RuleLine(object):
+ """A rule line is a single "Allow:" (allowance==True) or "Disallow:"
+ (allowance==False) followed by a path."""
+ def __init__(self, path, allowance):
+ if path == '' and not allowance:
+ # an empty value means allow all
+ allowance = True
+ self.path = urllib.parse.quote(path)
+ self.allowance = allowance
+
+ def applies_to(self, filename):
+ return self.path == "*" or filename.startswith(self.path)
+
+ def __str__(self):
+ return (self.allowance and "Allow" or "Disallow") + ": " + self.path
+
+
+class Entry(object):
+ """An entry has one or more user-agents and zero or more rulelines"""
+ def __init__(self):
+ self.useragents = []
+ self.rulelines = []
+
+ def __str__(self):
+ ret = []
+ for agent in self.useragents:
+ ret.extend(["User-agent: ", agent, "\n"])
+ for line in self.rulelines:
+ ret.extend([str(line), "\n"])
+ return ''.join(ret)
+
+ def applies_to(self, useragent):
+ """check if this entry applies to the specified agent"""
+ # split the name token and make it lower case
+ useragent = useragent.split("/")[0].lower()
+ for agent in self.useragents:
+ if agent == '*':
+ # we have the catch-all agent
+ return True
+ agent = agent.lower()
+ if agent in useragent:
+ return True
+ return False
+
+ def allowance(self, filename):
+ """Preconditions:
+ - our agent applies to this entry
+ - filename is URL decoded"""
+ for line in self.rulelines:
+ if line.applies_to(filename):
+ return line.allowance
+ return True
diff --git a/contrib/python/future/future/backports/xmlrpc/__init__.py b/contrib/python/future/future/backports/xmlrpc/__init__.py
index 9d4f425fe3..196d378857 100644
--- a/contrib/python/future/future/backports/xmlrpc/__init__.py
+++ b/contrib/python/future/future/backports/xmlrpc/__init__.py
@@ -1 +1 @@
-# This directory is a Python package.
+# This directory is a Python package.
diff --git a/contrib/python/future/future/backports/xmlrpc/client.py b/contrib/python/future/future/backports/xmlrpc/client.py
index 1b27e68b25..3f0cae9b00 100644
--- a/contrib/python/future/future/backports/xmlrpc/client.py
+++ b/contrib/python/future/future/backports/xmlrpc/client.py
@@ -1,1500 +1,1500 @@
-#
-# XML-RPC CLIENT LIBRARY
-# $Id$
-#
-# an XML-RPC client interface for Python.
-#
-# the marshalling and response parser code can also be used to
-# implement XML-RPC servers.
-#
-# Notes:
-# this version is designed to work with Python 2.1 or newer.
-#
-# History:
-# 1999-01-14 fl Created
-# 1999-01-15 fl Changed dateTime to use localtime
-# 1999-01-16 fl Added Binary/base64 element, default to RPC2 service
-# 1999-01-19 fl Fixed array data element (from Skip Montanaro)
-# 1999-01-21 fl Fixed dateTime constructor, etc.
-# 1999-02-02 fl Added fault handling, handle empty sequences, etc.
-# 1999-02-10 fl Fixed problem with empty responses (from Skip Montanaro)
-# 1999-06-20 fl Speed improvements, pluggable parsers/transports (0.9.8)
-# 2000-11-28 fl Changed boolean to check the truth value of its argument
-# 2001-02-24 fl Added encoding/Unicode/SafeTransport patches
-# 2001-02-26 fl Added compare support to wrappers (0.9.9/1.0b1)
-# 2001-03-28 fl Make sure response tuple is a singleton
-# 2001-03-29 fl Don't require empty params element (from Nicholas Riley)
-# 2001-06-10 fl Folded in _xmlrpclib accelerator support (1.0b2)
-# 2001-08-20 fl Base xmlrpclib.Error on built-in Exception (from Paul Prescod)
-# 2001-09-03 fl Allow Transport subclass to override getparser
-# 2001-09-10 fl Lazy import of urllib, cgi, xmllib (20x import speedup)
-# 2001-10-01 fl Remove containers from memo cache when done with them
-# 2001-10-01 fl Use faster escape method (80% dumps speedup)
-# 2001-10-02 fl More dumps microtuning
-# 2001-10-04 fl Make sure import expat gets a parser (from Guido van Rossum)
-# 2001-10-10 sm Allow long ints to be passed as ints if they don't overflow
-# 2001-10-17 sm Test for int and long overflow (allows use on 64-bit systems)
-# 2001-11-12 fl Use repr() to marshal doubles (from Paul Felix)
-# 2002-03-17 fl Avoid buffered read when possible (from James Rucker)
-# 2002-04-07 fl Added pythondoc comments
-# 2002-04-16 fl Added __str__ methods to datetime/binary wrappers
-# 2002-05-15 fl Added error constants (from Andrew Kuchling)
-# 2002-06-27 fl Merged with Python CVS version
-# 2002-10-22 fl Added basic authentication (based on code from Phillip Eby)
-# 2003-01-22 sm Add support for the bool type
-# 2003-02-27 gvr Remove apply calls
-# 2003-04-24 sm Use cStringIO if available
-# 2003-04-25 ak Add support for nil
-# 2003-06-15 gn Add support for time.struct_time
-# 2003-07-12 gp Correct marshalling of Faults
-# 2003-10-31 mvl Add multicall support
-# 2004-08-20 mvl Bump minimum supported Python version to 2.1
-#
-# Copyright (c) 1999-2002 by Secret Labs AB.
-# Copyright (c) 1999-2002 by Fredrik Lundh.
-#
-# info@pythonware.com
-# http://www.pythonware.com
-#
-# --------------------------------------------------------------------
-# The XML-RPC client interface is
-#
-# Copyright (c) 1999-2002 by Secret Labs AB
-# Copyright (c) 1999-2002 by Fredrik Lundh
-#
-# By obtaining, using, and/or copying this software and/or its
-# associated documentation, you agree that you have read, understood,
-# and will comply with the following terms and conditions:
-#
-# Permission to use, copy, modify, and distribute this software and
-# its associated documentation for any purpose and without fee is
-# hereby granted, provided that the above copyright notice appears in
-# all copies, and that both that copyright notice and this permission
-# notice appear in supporting documentation, and that the name of
-# Secret Labs AB or the author not be used in advertising or publicity
-# pertaining to distribution of the software without specific, written
-# prior permission.
-#
-# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
-# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
-# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
-# OF THIS SOFTWARE.
-# --------------------------------------------------------------------
-
-"""
-Ported using Python-Future from the Python 3.3 standard library.
-
-An XML-RPC client interface for Python.
-
-The marshalling and response parser code can also be used to
-implement XML-RPC servers.
-
-Exported exceptions:
-
- Error Base class for client errors
- ProtocolError Indicates an HTTP protocol error
- ResponseError Indicates a broken response package
- Fault Indicates an XML-RPC fault package
-
-Exported classes:
-
- ServerProxy Represents a logical connection to an XML-RPC server
-
- MultiCall Executor of boxcared xmlrpc requests
- DateTime dateTime wrapper for an ISO 8601 string or time tuple or
- localtime integer value to generate a "dateTime.iso8601"
- XML-RPC value
- Binary binary data wrapper
-
- Marshaller Generate an XML-RPC params chunk from a Python data structure
- Unmarshaller Unmarshal an XML-RPC response from incoming XML event message
- Transport Handles an HTTP transaction to an XML-RPC server
- SafeTransport Handles an HTTPS transaction to an XML-RPC server
-
-Exported constants:
-
- (none)
-
-Exported functions:
-
- getparser Create instance of the fastest available parser & attach
- to an unmarshalling object
- dumps Convert an argument tuple or a Fault instance to an XML-RPC
- request (or response, if the methodresponse option is used).
- loads Convert an XML-RPC packet to unmarshalled data plus a method
- name (None if not present).
-"""
-
-from __future__ import (absolute_import, division, print_function,
- unicode_literals)
-from future.builtins import bytes, dict, int, range, str
-
+#
+# XML-RPC CLIENT LIBRARY
+# $Id$
+#
+# an XML-RPC client interface for Python.
+#
+# the marshalling and response parser code can also be used to
+# implement XML-RPC servers.
+#
+# Notes:
+# this version is designed to work with Python 2.1 or newer.
+#
+# History:
+# 1999-01-14 fl Created
+# 1999-01-15 fl Changed dateTime to use localtime
+# 1999-01-16 fl Added Binary/base64 element, default to RPC2 service
+# 1999-01-19 fl Fixed array data element (from Skip Montanaro)
+# 1999-01-21 fl Fixed dateTime constructor, etc.
+# 1999-02-02 fl Added fault handling, handle empty sequences, etc.
+# 1999-02-10 fl Fixed problem with empty responses (from Skip Montanaro)
+# 1999-06-20 fl Speed improvements, pluggable parsers/transports (0.9.8)
+# 2000-11-28 fl Changed boolean to check the truth value of its argument
+# 2001-02-24 fl Added encoding/Unicode/SafeTransport patches
+# 2001-02-26 fl Added compare support to wrappers (0.9.9/1.0b1)
+# 2001-03-28 fl Make sure response tuple is a singleton
+# 2001-03-29 fl Don't require empty params element (from Nicholas Riley)
+# 2001-06-10 fl Folded in _xmlrpclib accelerator support (1.0b2)
+# 2001-08-20 fl Base xmlrpclib.Error on built-in Exception (from Paul Prescod)
+# 2001-09-03 fl Allow Transport subclass to override getparser
+# 2001-09-10 fl Lazy import of urllib, cgi, xmllib (20x import speedup)
+# 2001-10-01 fl Remove containers from memo cache when done with them
+# 2001-10-01 fl Use faster escape method (80% dumps speedup)
+# 2001-10-02 fl More dumps microtuning
+# 2001-10-04 fl Make sure import expat gets a parser (from Guido van Rossum)
+# 2001-10-10 sm Allow long ints to be passed as ints if they don't overflow
+# 2001-10-17 sm Test for int and long overflow (allows use on 64-bit systems)
+# 2001-11-12 fl Use repr() to marshal doubles (from Paul Felix)
+# 2002-03-17 fl Avoid buffered read when possible (from James Rucker)
+# 2002-04-07 fl Added pythondoc comments
+# 2002-04-16 fl Added __str__ methods to datetime/binary wrappers
+# 2002-05-15 fl Added error constants (from Andrew Kuchling)
+# 2002-06-27 fl Merged with Python CVS version
+# 2002-10-22 fl Added basic authentication (based on code from Phillip Eby)
+# 2003-01-22 sm Add support for the bool type
+# 2003-02-27 gvr Remove apply calls
+# 2003-04-24 sm Use cStringIO if available
+# 2003-04-25 ak Add support for nil
+# 2003-06-15 gn Add support for time.struct_time
+# 2003-07-12 gp Correct marshalling of Faults
+# 2003-10-31 mvl Add multicall support
+# 2004-08-20 mvl Bump minimum supported Python version to 2.1
+#
+# Copyright (c) 1999-2002 by Secret Labs AB.
+# Copyright (c) 1999-2002 by Fredrik Lundh.
+#
+# info@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The XML-RPC client interface is
+#
+# Copyright (c) 1999-2002 by Secret Labs AB
+# Copyright (c) 1999-2002 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+"""
+Ported using Python-Future from the Python 3.3 standard library.
+
+An XML-RPC client interface for Python.
+
+The marshalling and response parser code can also be used to
+implement XML-RPC servers.
+
+Exported exceptions:
+
+ Error Base class for client errors
+ ProtocolError Indicates an HTTP protocol error
+ ResponseError Indicates a broken response package
+ Fault Indicates an XML-RPC fault package
+
+Exported classes:
+
+ ServerProxy Represents a logical connection to an XML-RPC server
+
+ MultiCall Executor of boxcared xmlrpc requests
+ DateTime dateTime wrapper for an ISO 8601 string or time tuple or
+ localtime integer value to generate a "dateTime.iso8601"
+ XML-RPC value
+ Binary binary data wrapper
+
+ Marshaller Generate an XML-RPC params chunk from a Python data structure
+ Unmarshaller Unmarshal an XML-RPC response from incoming XML event message
+ Transport Handles an HTTP transaction to an XML-RPC server
+ SafeTransport Handles an HTTPS transaction to an XML-RPC server
+
+Exported constants:
+
+ (none)
+
+Exported functions:
+
+ getparser Create instance of the fastest available parser & attach
+ to an unmarshalling object
+ dumps Convert an argument tuple or a Fault instance to an XML-RPC
+ request (or response, if the methodresponse option is used).
+ loads Convert an XML-RPC packet to unmarshalled data plus a method
+ name (None if not present).
+"""
+
+from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+from future.builtins import bytes, dict, int, range, str
+
import sys
-import base64
+import base64
if sys.version_info[0] < 3:
# Py2.7 compatibility hack
base64.encodebytes = base64.encodestring
base64.decodebytes = base64.decodestring
-import time
-from datetime import datetime
-from future.backports.http import client as http_client
-from future.backports.urllib import parse as urllib_parse
-from future.utils import ensure_new_type
-from xml.parsers import expat
-import socket
-import errno
-from io import BytesIO
-try:
- import gzip
-except ImportError:
- gzip = None #python can be built without zlib/gzip support
-
-# --------------------------------------------------------------------
-# Internal stuff
-
-def escape(s):
- s = s.replace("&", "&amp;")
- s = s.replace("<", "&lt;")
- return s.replace(">", "&gt;",)
-
-# used in User-Agent header sent
-__version__ = sys.version[:3]
-
-# xmlrpc integer limits
-MAXINT = 2**31-1
-MININT = -2**31
-
-# --------------------------------------------------------------------
-# Error constants (from Dan Libby's specification at
-# http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php)
-
-# Ranges of errors
-PARSE_ERROR = -32700
-SERVER_ERROR = -32600
-APPLICATION_ERROR = -32500
-SYSTEM_ERROR = -32400
-TRANSPORT_ERROR = -32300
-
-# Specific errors
-NOT_WELLFORMED_ERROR = -32700
-UNSUPPORTED_ENCODING = -32701
-INVALID_ENCODING_CHAR = -32702
-INVALID_XMLRPC = -32600
-METHOD_NOT_FOUND = -32601
-INVALID_METHOD_PARAMS = -32602
-INTERNAL_ERROR = -32603
-
-# --------------------------------------------------------------------
-# Exceptions
-
-##
-# Base class for all kinds of client-side errors.
-
-class Error(Exception):
- """Base class for client errors."""
- def __str__(self):
- return repr(self)
-
-##
-# Indicates an HTTP-level protocol error. This is raised by the HTTP
-# transport layer, if the server returns an error code other than 200
-# (OK).
-#
-# @param url The target URL.
-# @param errcode The HTTP error code.
-# @param errmsg The HTTP error message.
-# @param headers The HTTP header dictionary.
-
-class ProtocolError(Error):
- """Indicates an HTTP protocol error."""
- def __init__(self, url, errcode, errmsg, headers):
- Error.__init__(self)
- self.url = url
- self.errcode = errcode
- self.errmsg = errmsg
- self.headers = headers
- def __repr__(self):
- return (
- "<ProtocolError for %s: %s %s>" %
- (self.url, self.errcode, self.errmsg)
- )
-
-##
-# Indicates a broken XML-RPC response package. This exception is
-# raised by the unmarshalling layer, if the XML-RPC response is
-# malformed.
-
-class ResponseError(Error):
- """Indicates a broken response package."""
- pass
-
-##
-# Indicates an XML-RPC fault response package. This exception is
-# raised by the unmarshalling layer, if the XML-RPC response contains
-# a fault string. This exception can also be used as a class, to
-# generate a fault XML-RPC message.
-#
-# @param faultCode The XML-RPC fault code.
-# @param faultString The XML-RPC fault string.
-
-class Fault(Error):
- """Indicates an XML-RPC fault package."""
- def __init__(self, faultCode, faultString, **extra):
- Error.__init__(self)
- self.faultCode = faultCode
- self.faultString = faultString
- def __repr__(self):
- return "<Fault %s: %r>" % (ensure_new_type(self.faultCode),
- ensure_new_type(self.faultString))
-
-# --------------------------------------------------------------------
-# Special values
-
-##
-# Backwards compatibility
-
-boolean = Boolean = bool
-
-##
-# Wrapper for XML-RPC DateTime values. This converts a time value to
-# the format used by XML-RPC.
-# <p>
-# The value can be given as a datetime object, as a string in the
-# format "yyyymmddThh:mm:ss", as a 9-item time tuple (as returned by
-# time.localtime()), or an integer value (as returned by time.time()).
-# The wrapper uses time.localtime() to convert an integer to a time
-# tuple.
-#
-# @param value The time, given as a datetime object, an ISO 8601 string,
-# a time tuple, or an integer time value.
-
-
-### For Python-Future:
-def _iso8601_format(value):
- return "%04d%02d%02dT%02d:%02d:%02d" % (
- value.year, value.month, value.day,
- value.hour, value.minute, value.second)
-###
-# Issue #13305: different format codes across platforms
-# _day0 = datetime(1, 1, 1)
-# if _day0.strftime('%Y') == '0001': # Mac OS X
-# def _iso8601_format(value):
-# return value.strftime("%Y%m%dT%H:%M:%S")
-# elif _day0.strftime('%4Y') == '0001': # Linux
-# def _iso8601_format(value):
-# return value.strftime("%4Y%m%dT%H:%M:%S")
-# else:
-# def _iso8601_format(value):
-# return value.strftime("%Y%m%dT%H:%M:%S").zfill(17)
-# del _day0
-
-
-def _strftime(value):
- if isinstance(value, datetime):
- return _iso8601_format(value)
-
- if not isinstance(value, (tuple, time.struct_time)):
- if value == 0:
- value = time.time()
- value = time.localtime(value)
-
- return "%04d%02d%02dT%02d:%02d:%02d" % value[:6]
-
-class DateTime(object):
- """DateTime wrapper for an ISO 8601 string or time tuple or
- localtime integer value to generate 'dateTime.iso8601' XML-RPC
- value.
- """
-
- def __init__(self, value=0):
- if isinstance(value, str):
- self.value = value
- else:
- self.value = _strftime(value)
-
- def make_comparable(self, other):
- if isinstance(other, DateTime):
- s = self.value
- o = other.value
- elif isinstance(other, datetime):
- s = self.value
- o = _iso8601_format(other)
- elif isinstance(other, str):
- s = self.value
- o = other
- elif hasattr(other, "timetuple"):
- s = self.timetuple()
- o = other.timetuple()
- else:
- otype = (hasattr(other, "__class__")
- and other.__class__.__name__
- or type(other))
- raise TypeError("Can't compare %s and %s" %
- (self.__class__.__name__, otype))
- return s, o
-
- def __lt__(self, other):
- s, o = self.make_comparable(other)
- return s < o
-
- def __le__(self, other):
- s, o = self.make_comparable(other)
- return s <= o
-
- def __gt__(self, other):
- s, o = self.make_comparable(other)
- return s > o
-
- def __ge__(self, other):
- s, o = self.make_comparable(other)
- return s >= o
-
- def __eq__(self, other):
- s, o = self.make_comparable(other)
- return s == o
-
- def __ne__(self, other):
- s, o = self.make_comparable(other)
- return s != o
-
- def timetuple(self):
- return time.strptime(self.value, "%Y%m%dT%H:%M:%S")
-
- ##
- # Get date/time value.
- #
- # @return Date/time value, as an ISO 8601 string.
-
- def __str__(self):
- return self.value
-
- def __repr__(self):
- return "<DateTime %r at %x>" % (ensure_new_type(self.value), id(self))
-
- def decode(self, data):
- self.value = str(data).strip()
-
- def encode(self, out):
- out.write("<value><dateTime.iso8601>")
- out.write(self.value)
- out.write("</dateTime.iso8601></value>\n")
-
-def _datetime(data):
- # decode xml element contents into a DateTime structure.
- value = DateTime()
- value.decode(data)
- return value
-
-def _datetime_type(data):
- return datetime.strptime(data, "%Y%m%dT%H:%M:%S")
-
-##
-# Wrapper for binary data. This can be used to transport any kind
-# of binary data over XML-RPC, using BASE64 encoding.
-#
-# @param data An 8-bit string containing arbitrary data.
-
-class Binary(object):
- """Wrapper for binary data."""
-
- def __init__(self, data=None):
- if data is None:
- data = b""
- else:
- if not isinstance(data, (bytes, bytearray)):
- raise TypeError("expected bytes or bytearray, not %s" %
- data.__class__.__name__)
- data = bytes(data) # Make a copy of the bytes!
- self.data = data
-
- ##
- # Get buffer contents.
- #
- # @return Buffer contents, as an 8-bit string.
-
- def __str__(self):
- return str(self.data, "latin-1") # XXX encoding?!
-
- def __eq__(self, other):
- if isinstance(other, Binary):
- other = other.data
- return self.data == other
-
- def __ne__(self, other):
- if isinstance(other, Binary):
- other = other.data
- return self.data != other
-
- def decode(self, data):
- self.data = base64.decodebytes(data)
-
- def encode(self, out):
- out.write("<value><base64>\n")
- encoded = base64.encodebytes(self.data)
- out.write(encoded.decode('ascii'))
- out.write("</base64></value>\n")
-
-def _binary(data):
- # decode xml element contents into a Binary structure
- value = Binary()
- value.decode(data)
- return value
-
-WRAPPERS = (DateTime, Binary)
-
-# --------------------------------------------------------------------
-# XML parsers
-
-class ExpatParser(object):
- # fast expat parser for Python 2.0 and later.
- def __init__(self, target):
- self._parser = parser = expat.ParserCreate(None, None)
- self._target = target
- parser.StartElementHandler = target.start
- parser.EndElementHandler = target.end
- parser.CharacterDataHandler = target.data
- encoding = None
- target.xml(encoding, None)
-
- def feed(self, data):
- self._parser.Parse(data, 0)
-
- def close(self):
- self._parser.Parse("", 1) # end of data
- del self._target, self._parser # get rid of circular references
-
-# --------------------------------------------------------------------
-# XML-RPC marshalling and unmarshalling code
-
-##
-# XML-RPC marshaller.
-#
-# @param encoding Default encoding for 8-bit strings. The default
-# value is None (interpreted as UTF-8).
-# @see dumps
-
-class Marshaller(object):
- """Generate an XML-RPC params chunk from a Python data structure.
-
- Create a Marshaller instance for each set of parameters, and use
- the "dumps" method to convert your data (represented as a tuple)
- to an XML-RPC params chunk. To write a fault response, pass a
- Fault instance instead. You may prefer to use the "dumps" module
- function for this purpose.
- """
-
- # by the way, if you don't understand what's going on in here,
- # that's perfectly ok.
-
- def __init__(self, encoding=None, allow_none=False):
- self.memo = {}
- self.data = None
- self.encoding = encoding
- self.allow_none = allow_none
-
- dispatch = {}
-
- def dumps(self, values):
- out = []
- write = out.append
- dump = self.__dump
- if isinstance(values, Fault):
- # fault instance
- write("<fault>\n")
- dump({'faultCode': values.faultCode,
- 'faultString': values.faultString},
- write)
- write("</fault>\n")
- else:
- # parameter block
- # FIXME: the xml-rpc specification allows us to leave out
- # the entire <params> block if there are no parameters.
- # however, changing this may break older code (including
- # old versions of xmlrpclib.py), so this is better left as
- # is for now. See @XMLRPC3 for more information. /F
- write("<params>\n")
- for v in values:
- write("<param>\n")
- dump(v, write)
- write("</param>\n")
- write("</params>\n")
- result = "".join(out)
- return str(result)
-
- def __dump(self, value, write):
- try:
- f = self.dispatch[type(ensure_new_type(value))]
- except KeyError:
- # check if this object can be marshalled as a structure
- if not hasattr(value, '__dict__'):
- raise TypeError("cannot marshal %s objects" % type(value))
- # check if this class is a sub-class of a basic type,
- # because we don't know how to marshal these types
- # (e.g. a string sub-class)
- for type_ in type(value).__mro__:
- if type_ in self.dispatch.keys():
- raise TypeError("cannot marshal %s objects" % type(value))
- # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix
- # for the p3yk merge, this should probably be fixed more neatly.
- f = self.dispatch["_arbitrary_instance"]
- f(self, value, write)
-
- def dump_nil (self, value, write):
- if not self.allow_none:
- raise TypeError("cannot marshal None unless allow_none is enabled")
- write("<value><nil/></value>")
- dispatch[type(None)] = dump_nil
-
- def dump_bool(self, value, write):
- write("<value><boolean>")
- write(value and "1" or "0")
- write("</boolean></value>\n")
- dispatch[bool] = dump_bool
-
- def dump_long(self, value, write):
- if value > MAXINT or value < MININT:
- raise OverflowError("long int exceeds XML-RPC limits")
- write("<value><int>")
- write(str(int(value)))
- write("</int></value>\n")
- dispatch[int] = dump_long
-
- # backward compatible
- dump_int = dump_long
-
- def dump_double(self, value, write):
- write("<value><double>")
- write(repr(ensure_new_type(value)))
- write("</double></value>\n")
- dispatch[float] = dump_double
-
- def dump_unicode(self, value, write, escape=escape):
- write("<value><string>")
- write(escape(value))
- write("</string></value>\n")
- dispatch[str] = dump_unicode
-
- def dump_bytes(self, value, write):
- write("<value><base64>\n")
- encoded = base64.encodebytes(value)
- write(encoded.decode('ascii'))
- write("</base64></value>\n")
- dispatch[bytes] = dump_bytes
- dispatch[bytearray] = dump_bytes
-
- def dump_array(self, value, write):
- i = id(value)
- if i in self.memo:
- raise TypeError("cannot marshal recursive sequences")
- self.memo[i] = None
- dump = self.__dump
- write("<value><array><data>\n")
- for v in value:
- dump(v, write)
- write("</data></array></value>\n")
- del self.memo[i]
- dispatch[tuple] = dump_array
- dispatch[list] = dump_array
-
- def dump_struct(self, value, write, escape=escape):
- i = id(value)
- if i in self.memo:
- raise TypeError("cannot marshal recursive dictionaries")
- self.memo[i] = None
- dump = self.__dump
- write("<value><struct>\n")
- for k, v in value.items():
- write("<member>\n")
- if not isinstance(k, str):
- raise TypeError("dictionary key must be string")
- write("<name>%s</name>\n" % escape(k))
- dump(v, write)
- write("</member>\n")
- write("</struct></value>\n")
- del self.memo[i]
- dispatch[dict] = dump_struct
-
- def dump_datetime(self, value, write):
- write("<value><dateTime.iso8601>")
- write(_strftime(value))
- write("</dateTime.iso8601></value>\n")
- dispatch[datetime] = dump_datetime
-
- def dump_instance(self, value, write):
- # check for special wrappers
- if value.__class__ in WRAPPERS:
- self.write = write
- value.encode(self)
- del self.write
- else:
- # store instance attributes as a struct (really?)
- self.dump_struct(value.__dict__, write)
- dispatch[DateTime] = dump_instance
- dispatch[Binary] = dump_instance
- # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix
- # for the p3yk merge, this should probably be fixed more neatly.
- dispatch["_arbitrary_instance"] = dump_instance
-
-##
-# XML-RPC unmarshaller.
-#
-# @see loads
-
-class Unmarshaller(object):
- """Unmarshal an XML-RPC response, based on incoming XML event
- messages (start, data, end). Call close() to get the resulting
- data structure.
-
- Note that this reader is fairly tolerant, and gladly accepts bogus
- XML-RPC data without complaining (but not bogus XML).
- """
-
- # and again, if you don't understand what's going on in here,
- # that's perfectly ok.
-
- def __init__(self, use_datetime=False, use_builtin_types=False):
- self._type = None
- self._stack = []
- self._marks = []
- self._data = []
- self._methodname = None
- self._encoding = "utf-8"
- self.append = self._stack.append
- self._use_datetime = use_builtin_types or use_datetime
- self._use_bytes = use_builtin_types
-
- def close(self):
- # return response tuple and target method
- if self._type is None or self._marks:
- raise ResponseError()
- if self._type == "fault":
- raise Fault(**self._stack[0])
- return tuple(self._stack)
-
- def getmethodname(self):
- return self._methodname
-
- #
- # event handlers
-
- def xml(self, encoding, standalone):
- self._encoding = encoding
- # FIXME: assert standalone == 1 ???
-
- def start(self, tag, attrs):
- # prepare to handle this element
- if tag == "array" or tag == "struct":
- self._marks.append(len(self._stack))
- self._data = []
- self._value = (tag == "value")
-
- def data(self, text):
- self._data.append(text)
-
- def end(self, tag):
- # call the appropriate end tag handler
- try:
- f = self.dispatch[tag]
- except KeyError:
- pass # unknown tag ?
- else:
- return f(self, "".join(self._data))
-
- #
- # accelerator support
-
- def end_dispatch(self, tag, data):
- # dispatch data
- try:
- f = self.dispatch[tag]
- except KeyError:
- pass # unknown tag ?
- else:
- return f(self, data)
-
- #
- # element decoders
-
- dispatch = {}
-
- def end_nil (self, data):
- self.append(None)
- self._value = 0
- dispatch["nil"] = end_nil
-
- def end_boolean(self, data):
- if data == "0":
- self.append(False)
- elif data == "1":
- self.append(True)
- else:
- raise TypeError("bad boolean value")
- self._value = 0
- dispatch["boolean"] = end_boolean
-
- def end_int(self, data):
- self.append(int(data))
- self._value = 0
- dispatch["i4"] = end_int
- dispatch["i8"] = end_int
- dispatch["int"] = end_int
-
- def end_double(self, data):
- self.append(float(data))
- self._value = 0
- dispatch["double"] = end_double
-
- def end_string(self, data):
- if self._encoding:
- data = data.decode(self._encoding)
- self.append(data)
- self._value = 0
- dispatch["string"] = end_string
- dispatch["name"] = end_string # struct keys are always strings
-
- def end_array(self, data):
- mark = self._marks.pop()
- # map arrays to Python lists
- self._stack[mark:] = [self._stack[mark:]]
- self._value = 0
- dispatch["array"] = end_array
-
- def end_struct(self, data):
- mark = self._marks.pop()
- # map structs to Python dictionaries
- dict = {}
- items = self._stack[mark:]
- for i in range(0, len(items), 2):
- dict[items[i]] = items[i+1]
- self._stack[mark:] = [dict]
- self._value = 0
- dispatch["struct"] = end_struct
-
- def end_base64(self, data):
- value = Binary()
- value.decode(data.encode("ascii"))
- if self._use_bytes:
- value = value.data
- self.append(value)
- self._value = 0
- dispatch["base64"] = end_base64
-
- def end_dateTime(self, data):
- value = DateTime()
- value.decode(data)
- if self._use_datetime:
- value = _datetime_type(data)
- self.append(value)
- dispatch["dateTime.iso8601"] = end_dateTime
-
- def end_value(self, data):
- # if we stumble upon a value element with no internal
- # elements, treat it as a string element
- if self._value:
- self.end_string(data)
- dispatch["value"] = end_value
-
- def end_params(self, data):
- self._type = "params"
- dispatch["params"] = end_params
-
- def end_fault(self, data):
- self._type = "fault"
- dispatch["fault"] = end_fault
-
- def end_methodName(self, data):
- if self._encoding:
- data = data.decode(self._encoding)
- self._methodname = data
- self._type = "methodName" # no params
- dispatch["methodName"] = end_methodName
-
-## Multicall support
-#
-
-class _MultiCallMethod(object):
- # some lesser magic to store calls made to a MultiCall object
- # for batch execution
- def __init__(self, call_list, name):
- self.__call_list = call_list
- self.__name = name
- def __getattr__(self, name):
- return _MultiCallMethod(self.__call_list, "%s.%s" % (self.__name, name))
- def __call__(self, *args):
- self.__call_list.append((self.__name, args))
-
-class MultiCallIterator(object):
- """Iterates over the results of a multicall. Exceptions are
- raised in response to xmlrpc faults."""
-
- def __init__(self, results):
- self.results = results
-
- def __getitem__(self, i):
- item = self.results[i]
- if isinstance(type(item), dict):
- raise Fault(item['faultCode'], item['faultString'])
- elif type(item) == type([]):
- return item[0]
- else:
- raise ValueError("unexpected type in multicall result")
-
-class MultiCall(object):
- """server -> a object used to boxcar method calls
-
- server should be a ServerProxy object.
-
- Methods can be added to the MultiCall using normal
- method call syntax e.g.:
-
- multicall = MultiCall(server_proxy)
- multicall.add(2,3)
- multicall.get_address("Guido")
-
- To execute the multicall, call the MultiCall object e.g.:
-
- add_result, address = multicall()
- """
-
- def __init__(self, server):
- self.__server = server
- self.__call_list = []
-
- def __repr__(self):
- return "<MultiCall at %x>" % id(self)
-
- __str__ = __repr__
-
- def __getattr__(self, name):
- return _MultiCallMethod(self.__call_list, name)
-
- def __call__(self):
- marshalled_list = []
- for name, args in self.__call_list:
- marshalled_list.append({'methodName' : name, 'params' : args})
-
- return MultiCallIterator(self.__server.system.multicall(marshalled_list))
-
-# --------------------------------------------------------------------
-# convenience functions
-
-FastMarshaller = FastParser = FastUnmarshaller = None
-
-##
-# Create a parser object, and connect it to an unmarshalling instance.
-# This function picks the fastest available XML parser.
-#
-# return A (parser, unmarshaller) tuple.
-
-def getparser(use_datetime=False, use_builtin_types=False):
- """getparser() -> parser, unmarshaller
-
- Create an instance of the fastest available parser, and attach it
- to an unmarshalling object. Return both objects.
- """
- if FastParser and FastUnmarshaller:
- if use_builtin_types:
- mkdatetime = _datetime_type
- mkbytes = base64.decodebytes
- elif use_datetime:
- mkdatetime = _datetime_type
- mkbytes = _binary
- else:
- mkdatetime = _datetime
- mkbytes = _binary
- target = FastUnmarshaller(True, False, mkbytes, mkdatetime, Fault)
- parser = FastParser(target)
- else:
- target = Unmarshaller(use_datetime=use_datetime, use_builtin_types=use_builtin_types)
- if FastParser:
- parser = FastParser(target)
- else:
- parser = ExpatParser(target)
- return parser, target
-
-##
-# Convert a Python tuple or a Fault instance to an XML-RPC packet.
-#
-# @def dumps(params, **options)
-# @param params A tuple or Fault instance.
-# @keyparam methodname If given, create a methodCall request for
-# this method name.
-# @keyparam methodresponse If given, create a methodResponse packet.
-# If used with a tuple, the tuple must be a singleton (that is,
-# it must contain exactly one element).
-# @keyparam encoding The packet encoding.
-# @return A string containing marshalled data.
-
-def dumps(params, methodname=None, methodresponse=None, encoding=None,
- allow_none=False):
- """data [,options] -> marshalled data
-
- Convert an argument tuple or a Fault instance to an XML-RPC
- request (or response, if the methodresponse option is used).
-
- In addition to the data object, the following options can be given
- as keyword arguments:
-
- methodname: the method name for a methodCall packet
-
- methodresponse: true to create a methodResponse packet.
- If this option is used with a tuple, the tuple must be
- a singleton (i.e. it can contain only one element).
-
- encoding: the packet encoding (default is UTF-8)
-
- All byte strings in the data structure are assumed to use the
- packet encoding. Unicode strings are automatically converted,
- where necessary.
- """
-
- assert isinstance(params, (tuple, Fault)), "argument must be tuple or Fault instance"
- if isinstance(params, Fault):
- methodresponse = 1
- elif methodresponse and isinstance(params, tuple):
- assert len(params) == 1, "response tuple must be a singleton"
-
- if not encoding:
- encoding = "utf-8"
-
- if FastMarshaller:
- m = FastMarshaller(encoding)
- else:
- m = Marshaller(encoding, allow_none)
-
- data = m.dumps(params)
-
- if encoding != "utf-8":
- xmlheader = "<?xml version='1.0' encoding='%s'?>\n" % str(encoding)
- else:
- xmlheader = "<?xml version='1.0'?>\n" # utf-8 is default
-
- # standard XML-RPC wrappings
- if methodname:
- # a method call
- if not isinstance(methodname, str):
- methodname = methodname.encode(encoding)
- data = (
- xmlheader,
- "<methodCall>\n"
- "<methodName>", methodname, "</methodName>\n",
- data,
- "</methodCall>\n"
- )
- elif methodresponse:
- # a method response, or a fault structure
- data = (
- xmlheader,
- "<methodResponse>\n",
- data,
- "</methodResponse>\n"
- )
- else:
- return data # return as is
- return str("").join(data)
-
-##
-# Convert an XML-RPC packet to a Python object. If the XML-RPC packet
-# represents a fault condition, this function raises a Fault exception.
-#
-# @param data An XML-RPC packet, given as an 8-bit string.
-# @return A tuple containing the unpacked data, and the method name
-# (None if not present).
-# @see Fault
-
-def loads(data, use_datetime=False, use_builtin_types=False):
- """data -> unmarshalled data, method name
-
- Convert an XML-RPC packet to unmarshalled data plus a method
- name (None if not present).
-
- If the XML-RPC packet represents a fault condition, this function
- raises a Fault exception.
- """
- p, u = getparser(use_datetime=use_datetime, use_builtin_types=use_builtin_types)
- p.feed(data)
- p.close()
- return u.close(), u.getmethodname()
-
-##
-# Encode a string using the gzip content encoding such as specified by the
-# Content-Encoding: gzip
-# in the HTTP header, as described in RFC 1952
-#
-# @param data the unencoded data
-# @return the encoded data
-
-def gzip_encode(data):
- """data -> gzip encoded data
-
- Encode data using the gzip content encoding as described in RFC 1952
- """
- if not gzip:
- raise NotImplementedError
- f = BytesIO()
- gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1)
- gzf.write(data)
- gzf.close()
- encoded = f.getvalue()
- f.close()
- return encoded
-
-##
-# Decode a string using the gzip content encoding such as specified by the
-# Content-Encoding: gzip
-# in the HTTP header, as described in RFC 1952
-#
-# @param data The encoded data
-# @return the unencoded data
-# @raises ValueError if data is not correctly coded.
-
-def gzip_decode(data):
- """gzip encoded data -> unencoded data
-
- Decode data using the gzip content encoding as described in RFC 1952
- """
- if not gzip:
- raise NotImplementedError
- f = BytesIO(data)
- gzf = gzip.GzipFile(mode="rb", fileobj=f)
- try:
- decoded = gzf.read()
- except IOError:
- raise ValueError("invalid data")
- f.close()
- gzf.close()
- return decoded
-
-##
-# Return a decoded file-like object for the gzip encoding
-# as described in RFC 1952.
-#
-# @param response A stream supporting a read() method
-# @return a file-like object that the decoded data can be read() from
-
-class GzipDecodedResponse(gzip.GzipFile if gzip else object):
- """a file-like object to decode a response encoded with the gzip
- method, as described in RFC 1952.
- """
- def __init__(self, response):
- #response doesn't support tell() and read(), required by
- #GzipFile
- if not gzip:
- raise NotImplementedError
- self.io = BytesIO(response.read())
- gzip.GzipFile.__init__(self, mode="rb", fileobj=self.io)
-
- def close(self):
- gzip.GzipFile.close(self)
- self.io.close()
-
-
-# --------------------------------------------------------------------
-# request dispatcher
-
-class _Method(object):
- # some magic to bind an XML-RPC method to an RPC server.
- # supports "nested" methods (e.g. examples.getStateName)
- def __init__(self, send, name):
- self.__send = send
- self.__name = name
+import time
+from datetime import datetime
+from future.backports.http import client as http_client
+from future.backports.urllib import parse as urllib_parse
+from future.utils import ensure_new_type
+from xml.parsers import expat
+import socket
+import errno
+from io import BytesIO
+try:
+ import gzip
+except ImportError:
+ gzip = None #python can be built without zlib/gzip support
+
+# --------------------------------------------------------------------
+# Internal stuff
+
+def escape(s):
+ s = s.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ return s.replace(">", "&gt;",)
+
+# used in User-Agent header sent
+__version__ = sys.version[:3]
+
+# xmlrpc integer limits
+MAXINT = 2**31-1
+MININT = -2**31
+
+# --------------------------------------------------------------------
+# Error constants (from Dan Libby's specification at
+# http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php)
+
+# Ranges of errors
+PARSE_ERROR = -32700
+SERVER_ERROR = -32600
+APPLICATION_ERROR = -32500
+SYSTEM_ERROR = -32400
+TRANSPORT_ERROR = -32300
+
+# Specific errors
+NOT_WELLFORMED_ERROR = -32700
+UNSUPPORTED_ENCODING = -32701
+INVALID_ENCODING_CHAR = -32702
+INVALID_XMLRPC = -32600
+METHOD_NOT_FOUND = -32601
+INVALID_METHOD_PARAMS = -32602
+INTERNAL_ERROR = -32603
+
+# --------------------------------------------------------------------
+# Exceptions
+
+##
+# Base class for all kinds of client-side errors.
+
+class Error(Exception):
+ """Base class for client errors."""
+ def __str__(self):
+ return repr(self)
+
+##
+# Indicates an HTTP-level protocol error. This is raised by the HTTP
+# transport layer, if the server returns an error code other than 200
+# (OK).
+#
+# @param url The target URL.
+# @param errcode The HTTP error code.
+# @param errmsg The HTTP error message.
+# @param headers The HTTP header dictionary.
+
+class ProtocolError(Error):
+ """Indicates an HTTP protocol error."""
+ def __init__(self, url, errcode, errmsg, headers):
+ Error.__init__(self)
+ self.url = url
+ self.errcode = errcode
+ self.errmsg = errmsg
+ self.headers = headers
+ def __repr__(self):
+ return (
+ "<ProtocolError for %s: %s %s>" %
+ (self.url, self.errcode, self.errmsg)
+ )
+
+##
+# Indicates a broken XML-RPC response package. This exception is
+# raised by the unmarshalling layer, if the XML-RPC response is
+# malformed.
+
+class ResponseError(Error):
+ """Indicates a broken response package."""
+ pass
+
+##
+# Indicates an XML-RPC fault response package. This exception is
+# raised by the unmarshalling layer, if the XML-RPC response contains
+# a fault string. This exception can also be used as a class, to
+# generate a fault XML-RPC message.
+#
+# @param faultCode The XML-RPC fault code.
+# @param faultString The XML-RPC fault string.
+
+class Fault(Error):
+ """Indicates an XML-RPC fault package."""
+ def __init__(self, faultCode, faultString, **extra):
+ Error.__init__(self)
+ self.faultCode = faultCode
+ self.faultString = faultString
+ def __repr__(self):
+ return "<Fault %s: %r>" % (ensure_new_type(self.faultCode),
+ ensure_new_type(self.faultString))
+
+# --------------------------------------------------------------------
+# Special values
+
+##
+# Backwards compatibility
+
+boolean = Boolean = bool
+
+##
+# Wrapper for XML-RPC DateTime values. This converts a time value to
+# the format used by XML-RPC.
+# <p>
+# The value can be given as a datetime object, as a string in the
+# format "yyyymmddThh:mm:ss", as a 9-item time tuple (as returned by
+# time.localtime()), or an integer value (as returned by time.time()).
+# The wrapper uses time.localtime() to convert an integer to a time
+# tuple.
+#
+# @param value The time, given as a datetime object, an ISO 8601 string,
+# a time tuple, or an integer time value.
+
+
+### For Python-Future:
+def _iso8601_format(value):
+ return "%04d%02d%02dT%02d:%02d:%02d" % (
+ value.year, value.month, value.day,
+ value.hour, value.minute, value.second)
+###
+# Issue #13305: different format codes across platforms
+# _day0 = datetime(1, 1, 1)
+# if _day0.strftime('%Y') == '0001': # Mac OS X
+# def _iso8601_format(value):
+# return value.strftime("%Y%m%dT%H:%M:%S")
+# elif _day0.strftime('%4Y') == '0001': # Linux
+# def _iso8601_format(value):
+# return value.strftime("%4Y%m%dT%H:%M:%S")
+# else:
+# def _iso8601_format(value):
+# return value.strftime("%Y%m%dT%H:%M:%S").zfill(17)
+# del _day0
+
+
+def _strftime(value):
+ if isinstance(value, datetime):
+ return _iso8601_format(value)
+
+ if not isinstance(value, (tuple, time.struct_time)):
+ if value == 0:
+ value = time.time()
+ value = time.localtime(value)
+
+ return "%04d%02d%02dT%02d:%02d:%02d" % value[:6]
+
+class DateTime(object):
+ """DateTime wrapper for an ISO 8601 string or time tuple or
+ localtime integer value to generate 'dateTime.iso8601' XML-RPC
+ value.
+ """
+
+ def __init__(self, value=0):
+ if isinstance(value, str):
+ self.value = value
+ else:
+ self.value = _strftime(value)
+
+ def make_comparable(self, other):
+ if isinstance(other, DateTime):
+ s = self.value
+ o = other.value
+ elif isinstance(other, datetime):
+ s = self.value
+ o = _iso8601_format(other)
+ elif isinstance(other, str):
+ s = self.value
+ o = other
+ elif hasattr(other, "timetuple"):
+ s = self.timetuple()
+ o = other.timetuple()
+ else:
+ otype = (hasattr(other, "__class__")
+ and other.__class__.__name__
+ or type(other))
+ raise TypeError("Can't compare %s and %s" %
+ (self.__class__.__name__, otype))
+ return s, o
+
+ def __lt__(self, other):
+ s, o = self.make_comparable(other)
+ return s < o
+
+ def __le__(self, other):
+ s, o = self.make_comparable(other)
+ return s <= o
+
+ def __gt__(self, other):
+ s, o = self.make_comparable(other)
+ return s > o
+
+ def __ge__(self, other):
+ s, o = self.make_comparable(other)
+ return s >= o
+
+ def __eq__(self, other):
+ s, o = self.make_comparable(other)
+ return s == o
+
+ def __ne__(self, other):
+ s, o = self.make_comparable(other)
+ return s != o
+
+ def timetuple(self):
+ return time.strptime(self.value, "%Y%m%dT%H:%M:%S")
+
+ ##
+ # Get date/time value.
+ #
+ # @return Date/time value, as an ISO 8601 string.
+
+ def __str__(self):
+ return self.value
+
+ def __repr__(self):
+ return "<DateTime %r at %x>" % (ensure_new_type(self.value), id(self))
+
+ def decode(self, data):
+ self.value = str(data).strip()
+
+ def encode(self, out):
+ out.write("<value><dateTime.iso8601>")
+ out.write(self.value)
+ out.write("</dateTime.iso8601></value>\n")
+
+def _datetime(data):
+ # decode xml element contents into a DateTime structure.
+ value = DateTime()
+ value.decode(data)
+ return value
+
+def _datetime_type(data):
+ return datetime.strptime(data, "%Y%m%dT%H:%M:%S")
+
+##
+# Wrapper for binary data. This can be used to transport any kind
+# of binary data over XML-RPC, using BASE64 encoding.
+#
+# @param data An 8-bit string containing arbitrary data.
+
+class Binary(object):
+ """Wrapper for binary data."""
+
+ def __init__(self, data=None):
+ if data is None:
+ data = b""
+ else:
+ if not isinstance(data, (bytes, bytearray)):
+ raise TypeError("expected bytes or bytearray, not %s" %
+ data.__class__.__name__)
+ data = bytes(data) # Make a copy of the bytes!
+ self.data = data
+
+ ##
+ # Get buffer contents.
+ #
+ # @return Buffer contents, as an 8-bit string.
+
+ def __str__(self):
+ return str(self.data, "latin-1") # XXX encoding?!
+
+ def __eq__(self, other):
+ if isinstance(other, Binary):
+ other = other.data
+ return self.data == other
+
+ def __ne__(self, other):
+ if isinstance(other, Binary):
+ other = other.data
+ return self.data != other
+
+ def decode(self, data):
+ self.data = base64.decodebytes(data)
+
+ def encode(self, out):
+ out.write("<value><base64>\n")
+ encoded = base64.encodebytes(self.data)
+ out.write(encoded.decode('ascii'))
+ out.write("</base64></value>\n")
+
+def _binary(data):
+ # decode xml element contents into a Binary structure
+ value = Binary()
+ value.decode(data)
+ return value
+
+WRAPPERS = (DateTime, Binary)
+
+# --------------------------------------------------------------------
+# XML parsers
+
+class ExpatParser(object):
+ # fast expat parser for Python 2.0 and later.
+ def __init__(self, target):
+ self._parser = parser = expat.ParserCreate(None, None)
+ self._target = target
+ parser.StartElementHandler = target.start
+ parser.EndElementHandler = target.end
+ parser.CharacterDataHandler = target.data
+ encoding = None
+ target.xml(encoding, None)
+
+ def feed(self, data):
+ self._parser.Parse(data, 0)
+
+ def close(self):
+ self._parser.Parse("", 1) # end of data
+ del self._target, self._parser # get rid of circular references
+
+# --------------------------------------------------------------------
+# XML-RPC marshalling and unmarshalling code
+
+##
+# XML-RPC marshaller.
+#
+# @param encoding Default encoding for 8-bit strings. The default
+# value is None (interpreted as UTF-8).
+# @see dumps
+
+class Marshaller(object):
+ """Generate an XML-RPC params chunk from a Python data structure.
+
+ Create a Marshaller instance for each set of parameters, and use
+ the "dumps" method to convert your data (represented as a tuple)
+ to an XML-RPC params chunk. To write a fault response, pass a
+ Fault instance instead. You may prefer to use the "dumps" module
+ function for this purpose.
+ """
+
+ # by the way, if you don't understand what's going on in here,
+ # that's perfectly ok.
+
+ def __init__(self, encoding=None, allow_none=False):
+ self.memo = {}
+ self.data = None
+ self.encoding = encoding
+ self.allow_none = allow_none
+
+ dispatch = {}
+
+ def dumps(self, values):
+ out = []
+ write = out.append
+ dump = self.__dump
+ if isinstance(values, Fault):
+ # fault instance
+ write("<fault>\n")
+ dump({'faultCode': values.faultCode,
+ 'faultString': values.faultString},
+ write)
+ write("</fault>\n")
+ else:
+ # parameter block
+ # FIXME: the xml-rpc specification allows us to leave out
+ # the entire <params> block if there are no parameters.
+ # however, changing this may break older code (including
+ # old versions of xmlrpclib.py), so this is better left as
+ # is for now. See @XMLRPC3 for more information. /F
+ write("<params>\n")
+ for v in values:
+ write("<param>\n")
+ dump(v, write)
+ write("</param>\n")
+ write("</params>\n")
+ result = "".join(out)
+ return str(result)
+
+ def __dump(self, value, write):
+ try:
+ f = self.dispatch[type(ensure_new_type(value))]
+ except KeyError:
+ # check if this object can be marshalled as a structure
+ if not hasattr(value, '__dict__'):
+ raise TypeError("cannot marshal %s objects" % type(value))
+ # check if this class is a sub-class of a basic type,
+ # because we don't know how to marshal these types
+ # (e.g. a string sub-class)
+ for type_ in type(value).__mro__:
+ if type_ in self.dispatch.keys():
+ raise TypeError("cannot marshal %s objects" % type(value))
+ # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix
+ # for the p3yk merge, this should probably be fixed more neatly.
+ f = self.dispatch["_arbitrary_instance"]
+ f(self, value, write)
+
+ def dump_nil (self, value, write):
+ if not self.allow_none:
+ raise TypeError("cannot marshal None unless allow_none is enabled")
+ write("<value><nil/></value>")
+ dispatch[type(None)] = dump_nil
+
+ def dump_bool(self, value, write):
+ write("<value><boolean>")
+ write(value and "1" or "0")
+ write("</boolean></value>\n")
+ dispatch[bool] = dump_bool
+
+ def dump_long(self, value, write):
+ if value > MAXINT or value < MININT:
+ raise OverflowError("long int exceeds XML-RPC limits")
+ write("<value><int>")
+ write(str(int(value)))
+ write("</int></value>\n")
+ dispatch[int] = dump_long
+
+ # backward compatible
+ dump_int = dump_long
+
+ def dump_double(self, value, write):
+ write("<value><double>")
+ write(repr(ensure_new_type(value)))
+ write("</double></value>\n")
+ dispatch[float] = dump_double
+
+ def dump_unicode(self, value, write, escape=escape):
+ write("<value><string>")
+ write(escape(value))
+ write("</string></value>\n")
+ dispatch[str] = dump_unicode
+
+ def dump_bytes(self, value, write):
+ write("<value><base64>\n")
+ encoded = base64.encodebytes(value)
+ write(encoded.decode('ascii'))
+ write("</base64></value>\n")
+ dispatch[bytes] = dump_bytes
+ dispatch[bytearray] = dump_bytes
+
+ def dump_array(self, value, write):
+ i = id(value)
+ if i in self.memo:
+ raise TypeError("cannot marshal recursive sequences")
+ self.memo[i] = None
+ dump = self.__dump
+ write("<value><array><data>\n")
+ for v in value:
+ dump(v, write)
+ write("</data></array></value>\n")
+ del self.memo[i]
+ dispatch[tuple] = dump_array
+ dispatch[list] = dump_array
+
+ def dump_struct(self, value, write, escape=escape):
+ i = id(value)
+ if i in self.memo:
+ raise TypeError("cannot marshal recursive dictionaries")
+ self.memo[i] = None
+ dump = self.__dump
+ write("<value><struct>\n")
+ for k, v in value.items():
+ write("<member>\n")
+ if not isinstance(k, str):
+ raise TypeError("dictionary key must be string")
+ write("<name>%s</name>\n" % escape(k))
+ dump(v, write)
+ write("</member>\n")
+ write("</struct></value>\n")
+ del self.memo[i]
+ dispatch[dict] = dump_struct
+
+ def dump_datetime(self, value, write):
+ write("<value><dateTime.iso8601>")
+ write(_strftime(value))
+ write("</dateTime.iso8601></value>\n")
+ dispatch[datetime] = dump_datetime
+
+ def dump_instance(self, value, write):
+ # check for special wrappers
+ if value.__class__ in WRAPPERS:
+ self.write = write
+ value.encode(self)
+ del self.write
+ else:
+ # store instance attributes as a struct (really?)
+ self.dump_struct(value.__dict__, write)
+ dispatch[DateTime] = dump_instance
+ dispatch[Binary] = dump_instance
+ # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix
+ # for the p3yk merge, this should probably be fixed more neatly.
+ dispatch["_arbitrary_instance"] = dump_instance
+
+##
+# XML-RPC unmarshaller.
+#
+# @see loads
+
+class Unmarshaller(object):
+ """Unmarshal an XML-RPC response, based on incoming XML event
+ messages (start, data, end). Call close() to get the resulting
+ data structure.
+
+ Note that this reader is fairly tolerant, and gladly accepts bogus
+ XML-RPC data without complaining (but not bogus XML).
+ """
+
+ # and again, if you don't understand what's going on in here,
+ # that's perfectly ok.
+
+ def __init__(self, use_datetime=False, use_builtin_types=False):
+ self._type = None
+ self._stack = []
+ self._marks = []
+ self._data = []
+ self._methodname = None
+ self._encoding = "utf-8"
+ self.append = self._stack.append
+ self._use_datetime = use_builtin_types or use_datetime
+ self._use_bytes = use_builtin_types
+
+ def close(self):
+ # return response tuple and target method
+ if self._type is None or self._marks:
+ raise ResponseError()
+ if self._type == "fault":
+ raise Fault(**self._stack[0])
+ return tuple(self._stack)
+
+ def getmethodname(self):
+ return self._methodname
+
+ #
+ # event handlers
+
+ def xml(self, encoding, standalone):
+ self._encoding = encoding
+ # FIXME: assert standalone == 1 ???
+
+ def start(self, tag, attrs):
+ # prepare to handle this element
+ if tag == "array" or tag == "struct":
+ self._marks.append(len(self._stack))
+ self._data = []
+ self._value = (tag == "value")
+
+ def data(self, text):
+ self._data.append(text)
+
+ def end(self, tag):
+ # call the appropriate end tag handler
+ try:
+ f = self.dispatch[tag]
+ except KeyError:
+ pass # unknown tag ?
+ else:
+ return f(self, "".join(self._data))
+
+ #
+ # accelerator support
+
+ def end_dispatch(self, tag, data):
+ # dispatch data
+ try:
+ f = self.dispatch[tag]
+ except KeyError:
+ pass # unknown tag ?
+ else:
+ return f(self, data)
+
+ #
+ # element decoders
+
+ dispatch = {}
+
+ def end_nil (self, data):
+ self.append(None)
+ self._value = 0
+ dispatch["nil"] = end_nil
+
+ def end_boolean(self, data):
+ if data == "0":
+ self.append(False)
+ elif data == "1":
+ self.append(True)
+ else:
+ raise TypeError("bad boolean value")
+ self._value = 0
+ dispatch["boolean"] = end_boolean
+
+ def end_int(self, data):
+ self.append(int(data))
+ self._value = 0
+ dispatch["i4"] = end_int
+ dispatch["i8"] = end_int
+ dispatch["int"] = end_int
+
+ def end_double(self, data):
+ self.append(float(data))
+ self._value = 0
+ dispatch["double"] = end_double
+
+ def end_string(self, data):
+ if self._encoding:
+ data = data.decode(self._encoding)
+ self.append(data)
+ self._value = 0
+ dispatch["string"] = end_string
+ dispatch["name"] = end_string # struct keys are always strings
+
+ def end_array(self, data):
+ mark = self._marks.pop()
+ # map arrays to Python lists
+ self._stack[mark:] = [self._stack[mark:]]
+ self._value = 0
+ dispatch["array"] = end_array
+
+ def end_struct(self, data):
+ mark = self._marks.pop()
+ # map structs to Python dictionaries
+ dict = {}
+ items = self._stack[mark:]
+ for i in range(0, len(items), 2):
+ dict[items[i]] = items[i+1]
+ self._stack[mark:] = [dict]
+ self._value = 0
+ dispatch["struct"] = end_struct
+
+ def end_base64(self, data):
+ value = Binary()
+ value.decode(data.encode("ascii"))
+ if self._use_bytes:
+ value = value.data
+ self.append(value)
+ self._value = 0
+ dispatch["base64"] = end_base64
+
+ def end_dateTime(self, data):
+ value = DateTime()
+ value.decode(data)
+ if self._use_datetime:
+ value = _datetime_type(data)
+ self.append(value)
+ dispatch["dateTime.iso8601"] = end_dateTime
+
+ def end_value(self, data):
+ # if we stumble upon a value element with no internal
+ # elements, treat it as a string element
+ if self._value:
+ self.end_string(data)
+ dispatch["value"] = end_value
+
+ def end_params(self, data):
+ self._type = "params"
+ dispatch["params"] = end_params
+
+ def end_fault(self, data):
+ self._type = "fault"
+ dispatch["fault"] = end_fault
+
+ def end_methodName(self, data):
+ if self._encoding:
+ data = data.decode(self._encoding)
+ self._methodname = data
+ self._type = "methodName" # no params
+ dispatch["methodName"] = end_methodName
+
+## Multicall support
+#
+
+class _MultiCallMethod(object):
+ # some lesser magic to store calls made to a MultiCall object
+ # for batch execution
+ def __init__(self, call_list, name):
+ self.__call_list = call_list
+ self.__name = name
+ def __getattr__(self, name):
+ return _MultiCallMethod(self.__call_list, "%s.%s" % (self.__name, name))
+ def __call__(self, *args):
+ self.__call_list.append((self.__name, args))
+
+class MultiCallIterator(object):
+ """Iterates over the results of a multicall. Exceptions are
+ raised in response to xmlrpc faults."""
+
+ def __init__(self, results):
+ self.results = results
+
+ def __getitem__(self, i):
+ item = self.results[i]
+ if isinstance(type(item), dict):
+ raise Fault(item['faultCode'], item['faultString'])
+ elif type(item) == type([]):
+ return item[0]
+ else:
+ raise ValueError("unexpected type in multicall result")
+
+class MultiCall(object):
+ """server -> a object used to boxcar method calls
+
+ server should be a ServerProxy object.
+
+ Methods can be added to the MultiCall using normal
+ method call syntax e.g.:
+
+ multicall = MultiCall(server_proxy)
+ multicall.add(2,3)
+ multicall.get_address("Guido")
+
+ To execute the multicall, call the MultiCall object e.g.:
+
+ add_result, address = multicall()
+ """
+
+ def __init__(self, server):
+ self.__server = server
+ self.__call_list = []
+
+ def __repr__(self):
+ return "<MultiCall at %x>" % id(self)
+
+ __str__ = __repr__
+
+ def __getattr__(self, name):
+ return _MultiCallMethod(self.__call_list, name)
+
+ def __call__(self):
+ marshalled_list = []
+ for name, args in self.__call_list:
+ marshalled_list.append({'methodName' : name, 'params' : args})
+
+ return MultiCallIterator(self.__server.system.multicall(marshalled_list))
+
+# --------------------------------------------------------------------
+# convenience functions
+
+FastMarshaller = FastParser = FastUnmarshaller = None
+
+##
+# Create a parser object, and connect it to an unmarshalling instance.
+# This function picks the fastest available XML parser.
+#
+# return A (parser, unmarshaller) tuple.
+
+def getparser(use_datetime=False, use_builtin_types=False):
+ """getparser() -> parser, unmarshaller
+
+ Create an instance of the fastest available parser, and attach it
+ to an unmarshalling object. Return both objects.
+ """
+ if FastParser and FastUnmarshaller:
+ if use_builtin_types:
+ mkdatetime = _datetime_type
+ mkbytes = base64.decodebytes
+ elif use_datetime:
+ mkdatetime = _datetime_type
+ mkbytes = _binary
+ else:
+ mkdatetime = _datetime
+ mkbytes = _binary
+ target = FastUnmarshaller(True, False, mkbytes, mkdatetime, Fault)
+ parser = FastParser(target)
+ else:
+ target = Unmarshaller(use_datetime=use_datetime, use_builtin_types=use_builtin_types)
+ if FastParser:
+ parser = FastParser(target)
+ else:
+ parser = ExpatParser(target)
+ return parser, target
+
+##
+# Convert a Python tuple or a Fault instance to an XML-RPC packet.
+#
+# @def dumps(params, **options)
+# @param params A tuple or Fault instance.
+# @keyparam methodname If given, create a methodCall request for
+# this method name.
+# @keyparam methodresponse If given, create a methodResponse packet.
+# If used with a tuple, the tuple must be a singleton (that is,
+# it must contain exactly one element).
+# @keyparam encoding The packet encoding.
+# @return A string containing marshalled data.
+
+def dumps(params, methodname=None, methodresponse=None, encoding=None,
+ allow_none=False):
+ """data [,options] -> marshalled data
+
+ Convert an argument tuple or a Fault instance to an XML-RPC
+ request (or response, if the methodresponse option is used).
+
+ In addition to the data object, the following options can be given
+ as keyword arguments:
+
+ methodname: the method name for a methodCall packet
+
+ methodresponse: true to create a methodResponse packet.
+ If this option is used with a tuple, the tuple must be
+ a singleton (i.e. it can contain only one element).
+
+ encoding: the packet encoding (default is UTF-8)
+
+ All byte strings in the data structure are assumed to use the
+ packet encoding. Unicode strings are automatically converted,
+ where necessary.
+ """
+
+ assert isinstance(params, (tuple, Fault)), "argument must be tuple or Fault instance"
+ if isinstance(params, Fault):
+ methodresponse = 1
+ elif methodresponse and isinstance(params, tuple):
+ assert len(params) == 1, "response tuple must be a singleton"
+
+ if not encoding:
+ encoding = "utf-8"
+
+ if FastMarshaller:
+ m = FastMarshaller(encoding)
+ else:
+ m = Marshaller(encoding, allow_none)
+
+ data = m.dumps(params)
+
+ if encoding != "utf-8":
+ xmlheader = "<?xml version='1.0' encoding='%s'?>\n" % str(encoding)
+ else:
+ xmlheader = "<?xml version='1.0'?>\n" # utf-8 is default
+
+ # standard XML-RPC wrappings
+ if methodname:
+ # a method call
+ if not isinstance(methodname, str):
+ methodname = methodname.encode(encoding)
+ data = (
+ xmlheader,
+ "<methodCall>\n"
+ "<methodName>", methodname, "</methodName>\n",
+ data,
+ "</methodCall>\n"
+ )
+ elif methodresponse:
+ # a method response, or a fault structure
+ data = (
+ xmlheader,
+ "<methodResponse>\n",
+ data,
+ "</methodResponse>\n"
+ )
+ else:
+ return data # return as is
+ return str("").join(data)
+
+##
+# Convert an XML-RPC packet to a Python object. If the XML-RPC packet
+# represents a fault condition, this function raises a Fault exception.
+#
+# @param data An XML-RPC packet, given as an 8-bit string.
+# @return A tuple containing the unpacked data, and the method name
+# (None if not present).
+# @see Fault
+
+def loads(data, use_datetime=False, use_builtin_types=False):
+ """data -> unmarshalled data, method name
+
+ Convert an XML-RPC packet to unmarshalled data plus a method
+ name (None if not present).
+
+ If the XML-RPC packet represents a fault condition, this function
+ raises a Fault exception.
+ """
+ p, u = getparser(use_datetime=use_datetime, use_builtin_types=use_builtin_types)
+ p.feed(data)
+ p.close()
+ return u.close(), u.getmethodname()
+
+##
+# Encode a string using the gzip content encoding such as specified by the
+# Content-Encoding: gzip
+# in the HTTP header, as described in RFC 1952
+#
+# @param data the unencoded data
+# @return the encoded data
+
+def gzip_encode(data):
+ """data -> gzip encoded data
+
+ Encode data using the gzip content encoding as described in RFC 1952
+ """
+ if not gzip:
+ raise NotImplementedError
+ f = BytesIO()
+ gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1)
+ gzf.write(data)
+ gzf.close()
+ encoded = f.getvalue()
+ f.close()
+ return encoded
+
+##
+# Decode a string using the gzip content encoding such as specified by the
+# Content-Encoding: gzip
+# in the HTTP header, as described in RFC 1952
+#
+# @param data The encoded data
+# @return the unencoded data
+# @raises ValueError if data is not correctly coded.
+
+def gzip_decode(data):
+ """gzip encoded data -> unencoded data
+
+ Decode data using the gzip content encoding as described in RFC 1952
+ """
+ if not gzip:
+ raise NotImplementedError
+ f = BytesIO(data)
+ gzf = gzip.GzipFile(mode="rb", fileobj=f)
+ try:
+ decoded = gzf.read()
+ except IOError:
+ raise ValueError("invalid data")
+ f.close()
+ gzf.close()
+ return decoded
+
+##
+# Return a decoded file-like object for the gzip encoding
+# as described in RFC 1952.
+#
+# @param response A stream supporting a read() method
+# @return a file-like object that the decoded data can be read() from
+
+class GzipDecodedResponse(gzip.GzipFile if gzip else object):
+ """a file-like object to decode a response encoded with the gzip
+ method, as described in RFC 1952.
+ """
+ def __init__(self, response):
+ #response doesn't support tell() and read(), required by
+ #GzipFile
+ if not gzip:
+ raise NotImplementedError
+ self.io = BytesIO(response.read())
+ gzip.GzipFile.__init__(self, mode="rb", fileobj=self.io)
+
+ def close(self):
+ gzip.GzipFile.close(self)
+ self.io.close()
+
+
+# --------------------------------------------------------------------
+# request dispatcher
+
+class _Method(object):
+ # some magic to bind an XML-RPC method to an RPC server.
+ # supports "nested" methods (e.g. examples.getStateName)
+ def __init__(self, send, name):
+ self.__send = send
+ self.__name = name
def __repr__(self):
return "<_Method for %s>" % self.__name
__str__ = __repr__
- def __getattr__(self, name):
- return _Method(self.__send, "%s.%s" % (self.__name, name))
- def __call__(self, *args):
- return self.__send(self.__name, args)
-
-##
-# Standard transport class for XML-RPC over HTTP.
-# <p>
-# You can create custom transports by subclassing this method, and
-# overriding selected methods.
-
-class Transport(object):
- """Handles an HTTP transaction to an XML-RPC server."""
-
- # client identifier (may be overridden)
- user_agent = "Python-xmlrpc/%s" % __version__
-
- #if true, we'll request gzip encoding
- accept_gzip_encoding = True
-
- # if positive, encode request using gzip if it exceeds this threshold
- # note that many server will get confused, so only use it if you know
- # that they can decode such a request
- encode_threshold = None #None = don't encode
-
- def __init__(self, use_datetime=False, use_builtin_types=False):
- self._use_datetime = use_datetime
- self._use_builtin_types = use_builtin_types
- self._connection = (None, None)
- self._extra_headers = []
-
- ##
- # Send a complete request, and parse the response.
- # Retry request if a cached connection has disconnected.
- #
- # @param host Target host.
- # @param handler Target PRC handler.
- # @param request_body XML-RPC request body.
- # @param verbose Debugging flag.
- # @return Parsed response.
-
- def request(self, host, handler, request_body, verbose=False):
- #retry request once if cached connection has gone cold
- for i in (0, 1):
- try:
- return self.single_request(host, handler, request_body, verbose)
- except socket.error as e:
- if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE):
- raise
- except http_client.BadStatusLine: #close after we sent request
- if i:
- raise
-
- def single_request(self, host, handler, request_body, verbose=False):
- # issue XML-RPC request
- try:
- http_conn = self.send_request(host, handler, request_body, verbose)
- resp = http_conn.getresponse()
- if resp.status == 200:
- self.verbose = verbose
- return self.parse_response(resp)
-
- except Fault:
- raise
- except Exception:
- #All unexpected errors leave connection in
- # a strange state, so we clear it.
- self.close()
- raise
-
- #We got an error response.
- #Discard any response data and raise exception
- if resp.getheader("content-length", ""):
- resp.read()
- raise ProtocolError(
- host + handler,
- resp.status, resp.reason,
- dict(resp.getheaders())
- )
-
-
- ##
- # Create parser.
- #
- # @return A 2-tuple containing a parser and a unmarshaller.
-
- def getparser(self):
- # get parser and unmarshaller
- return getparser(use_datetime=self._use_datetime,
- use_builtin_types=self._use_builtin_types)
-
- ##
- # Get authorization info from host parameter
- # Host may be a string, or a (host, x509-dict) tuple; if a string,
- # it is checked for a "user:pw@host" format, and a "Basic
- # Authentication" header is added if appropriate.
- #
- # @param host Host descriptor (URL or (URL, x509 info) tuple).
- # @return A 3-tuple containing (actual host, extra headers,
- # x509 info). The header and x509 fields may be None.
-
- def get_host_info(self, host):
-
- x509 = {}
- if isinstance(host, tuple):
- host, x509 = host
-
- auth, host = urllib_parse.splituser(host)
-
- if auth:
- auth = urllib_parse.unquote_to_bytes(auth)
- auth = base64.encodebytes(auth).decode("utf-8")
- auth = "".join(auth.split()) # get rid of whitespace
- extra_headers = [
- ("Authorization", "Basic " + auth)
- ]
- else:
- extra_headers = []
-
- return host, extra_headers, x509
-
- ##
- # Connect to server.
- #
- # @param host Target host.
- # @return An HTTPConnection object
-
- def make_connection(self, host):
- #return an existing connection if possible. This allows
- #HTTP/1.1 keep-alive.
- if self._connection and host == self._connection[0]:
- return self._connection[1]
- # create a HTTP connection object from a host descriptor
- chost, self._extra_headers, x509 = self.get_host_info(host)
- self._connection = host, http_client.HTTPConnection(chost)
- return self._connection[1]
-
- ##
- # Clear any cached connection object.
- # Used in the event of socket errors.
- #
- def close(self):
- if self._connection[1]:
- self._connection[1].close()
- self._connection = (None, None)
-
- ##
- # Send HTTP request.
- #
- # @param host Host descriptor (URL or (URL, x509 info) tuple).
- # @param handler Targer RPC handler (a path relative to host)
- # @param request_body The XML-RPC request body
- # @param debug Enable debugging if debug is true.
- # @return An HTTPConnection.
-
- def send_request(self, host, handler, request_body, debug):
- connection = self.make_connection(host)
- headers = self._extra_headers[:]
- if debug:
- connection.set_debuglevel(1)
- if self.accept_gzip_encoding and gzip:
- connection.putrequest("POST", handler, skip_accept_encoding=True)
- headers.append(("Accept-Encoding", "gzip"))
- else:
- connection.putrequest("POST", handler)
- headers.append(("Content-Type", "text/xml"))
- headers.append(("User-Agent", self.user_agent))
- self.send_headers(connection, headers)
- self.send_content(connection, request_body)
- return connection
-
- ##
- # Send request headers.
- # This function provides a useful hook for subclassing
- #
- # @param connection httpConnection.
- # @param headers list of key,value pairs for HTTP headers
-
- def send_headers(self, connection, headers):
- for key, val in headers:
- connection.putheader(key, val)
-
- ##
- # Send request body.
- # This function provides a useful hook for subclassing
- #
- # @param connection httpConnection.
- # @param request_body XML-RPC request body.
-
- def send_content(self, connection, request_body):
- #optionally encode the request
- if (self.encode_threshold is not None and
- self.encode_threshold < len(request_body) and
- gzip):
- connection.putheader("Content-Encoding", "gzip")
- request_body = gzip_encode(request_body)
-
- connection.putheader("Content-Length", str(len(request_body)))
- connection.endheaders(request_body)
-
- ##
- # Parse response.
- #
- # @param file Stream.
- # @return Response tuple and target method.
-
- def parse_response(self, response):
- # read response data from httpresponse, and parse it
- # Check for new http response object, otherwise it is a file object.
- if hasattr(response, 'getheader'):
- if response.getheader("Content-Encoding", "") == "gzip":
- stream = GzipDecodedResponse(response)
- else:
- stream = response
- else:
- stream = response
-
- p, u = self.getparser()
-
- while 1:
- data = stream.read(1024)
- if not data:
- break
- if self.verbose:
- print("body:", repr(data))
- p.feed(data)
-
- if stream is not response:
- stream.close()
- p.close()
-
- return u.close()
-
-##
-# Standard transport class for XML-RPC over HTTPS.
-
-class SafeTransport(Transport):
- """Handles an HTTPS transaction to an XML-RPC server."""
-
- # FIXME: mostly untested
-
- def make_connection(self, host):
- if self._connection and host == self._connection[0]:
- return self._connection[1]
-
- if not hasattr(http_client, "HTTPSConnection"):
- raise NotImplementedError(
- "your version of http.client doesn't support HTTPS")
- # create a HTTPS connection object from a host descriptor
- # host may be a string, or a (host, x509-dict) tuple
- chost, self._extra_headers, x509 = self.get_host_info(host)
- self._connection = host, http_client.HTTPSConnection(chost,
- None, **(x509 or {}))
- return self._connection[1]
-
-##
-# Standard server proxy. This class establishes a virtual connection
-# to an XML-RPC server.
-# <p>
-# This class is available as ServerProxy and Server. New code should
-# use ServerProxy, to avoid confusion.
-#
-# @def ServerProxy(uri, **options)
-# @param uri The connection point on the server.
-# @keyparam transport A transport factory, compatible with the
-# standard transport class.
-# @keyparam encoding The default encoding used for 8-bit strings
-# (default is UTF-8).
-# @keyparam verbose Use a true value to enable debugging output.
-# (printed to standard output).
-# @see Transport
-
-class ServerProxy(object):
- """uri [,options] -> a logical connection to an XML-RPC server
-
- uri is the connection point on the server, given as
- scheme://host/target.
-
- The standard implementation always supports the "http" scheme. If
- SSL socket support is available (Python 2.0), it also supports
- "https".
-
- If the target part and the slash preceding it are both omitted,
- "/RPC2" is assumed.
-
- The following options can be given as keyword arguments:
-
- transport: a transport factory
- encoding: the request encoding (default is UTF-8)
-
- All 8-bit strings passed to the server proxy are assumed to use
- the given encoding.
- """
-
- def __init__(self, uri, transport=None, encoding=None, verbose=False,
- allow_none=False, use_datetime=False, use_builtin_types=False):
- # establish a "logical" server connection
-
- # get the url
- type, uri = urllib_parse.splittype(uri)
- if type not in ("http", "https"):
- raise IOError("unsupported XML-RPC protocol")
- self.__host, self.__handler = urllib_parse.splithost(uri)
- if not self.__handler:
- self.__handler = "/RPC2"
-
- if transport is None:
- if type == "https":
- handler = SafeTransport
- else:
- handler = Transport
- transport = handler(use_datetime=use_datetime,
- use_builtin_types=use_builtin_types)
- self.__transport = transport
-
- self.__encoding = encoding or 'utf-8'
- self.__verbose = verbose
- self.__allow_none = allow_none
-
- def __close(self):
- self.__transport.close()
-
- def __request(self, methodname, params):
- # call a method on the remote server
-
- request = dumps(params, methodname, encoding=self.__encoding,
- allow_none=self.__allow_none).encode(self.__encoding)
-
- response = self.__transport.request(
- self.__host,
- self.__handler,
- request,
- verbose=self.__verbose
- )
-
- if len(response) == 1:
- response = response[0]
-
- return response
-
- def __repr__(self):
- return (
- "<ServerProxy for %s%s>" %
- (self.__host, self.__handler)
- )
-
- __str__ = __repr__
-
- def __getattr__(self, name):
- # magic method dispatcher
- return _Method(self.__request, name)
-
- # note: to call a remote object with an non-standard name, use
- # result getattr(server, "strange-python-name")(args)
-
- def __call__(self, attr):
- """A workaround to get special attributes on the ServerProxy
- without interfering with the magic __getattr__
- """
- if attr == "close":
- return self.__close
- elif attr == "transport":
- return self.__transport
- raise AttributeError("Attribute %r not found" % (attr,))
-
-# compatibility
-
-Server = ServerProxy
-
-# --------------------------------------------------------------------
-# test code
-
-if __name__ == "__main__":
-
- # simple test program (from the XML-RPC specification)
-
- # local server, available from Lib/xmlrpc/server.py
- server = ServerProxy("http://localhost:8000")
-
- try:
- print(server.currentTime.getCurrentTime())
- except Error as v:
- print("ERROR", v)
-
- multi = MultiCall(server)
- multi.getData()
- multi.pow(2,9)
- multi.add(1,2)
- try:
- for response in multi():
- print(response)
- except Error as v:
- print("ERROR", v)
+ def __getattr__(self, name):
+ return _Method(self.__send, "%s.%s" % (self.__name, name))
+ def __call__(self, *args):
+ return self.__send(self.__name, args)
+
+##
+# Standard transport class for XML-RPC over HTTP.
+# <p>
+# You can create custom transports by subclassing this method, and
+# overriding selected methods.
+
+class Transport(object):
+ """Handles an HTTP transaction to an XML-RPC server."""
+
+ # client identifier (may be overridden)
+ user_agent = "Python-xmlrpc/%s" % __version__
+
+ #if true, we'll request gzip encoding
+ accept_gzip_encoding = True
+
+ # if positive, encode request using gzip if it exceeds this threshold
+ # note that many server will get confused, so only use it if you know
+ # that they can decode such a request
+ encode_threshold = None #None = don't encode
+
+ def __init__(self, use_datetime=False, use_builtin_types=False):
+ self._use_datetime = use_datetime
+ self._use_builtin_types = use_builtin_types
+ self._connection = (None, None)
+ self._extra_headers = []
+
+ ##
+ # Send a complete request, and parse the response.
+ # Retry request if a cached connection has disconnected.
+ #
+ # @param host Target host.
+ # @param handler Target PRC handler.
+ # @param request_body XML-RPC request body.
+ # @param verbose Debugging flag.
+ # @return Parsed response.
+
+ def request(self, host, handler, request_body, verbose=False):
+ #retry request once if cached connection has gone cold
+ for i in (0, 1):
+ try:
+ return self.single_request(host, handler, request_body, verbose)
+ except socket.error as e:
+ if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE):
+ raise
+ except http_client.BadStatusLine: #close after we sent request
+ if i:
+ raise
+
+ def single_request(self, host, handler, request_body, verbose=False):
+ # issue XML-RPC request
+ try:
+ http_conn = self.send_request(host, handler, request_body, verbose)
+ resp = http_conn.getresponse()
+ if resp.status == 200:
+ self.verbose = verbose
+ return self.parse_response(resp)
+
+ except Fault:
+ raise
+ except Exception:
+ #All unexpected errors leave connection in
+ # a strange state, so we clear it.
+ self.close()
+ raise
+
+ #We got an error response.
+ #Discard any response data and raise exception
+ if resp.getheader("content-length", ""):
+ resp.read()
+ raise ProtocolError(
+ host + handler,
+ resp.status, resp.reason,
+ dict(resp.getheaders())
+ )
+
+
+ ##
+ # Create parser.
+ #
+ # @return A 2-tuple containing a parser and a unmarshaller.
+
+ def getparser(self):
+ # get parser and unmarshaller
+ return getparser(use_datetime=self._use_datetime,
+ use_builtin_types=self._use_builtin_types)
+
+ ##
+ # Get authorization info from host parameter
+ # Host may be a string, or a (host, x509-dict) tuple; if a string,
+ # it is checked for a "user:pw@host" format, and a "Basic
+ # Authentication" header is added if appropriate.
+ #
+ # @param host Host descriptor (URL or (URL, x509 info) tuple).
+ # @return A 3-tuple containing (actual host, extra headers,
+ # x509 info). The header and x509 fields may be None.
+
+ def get_host_info(self, host):
+
+ x509 = {}
+ if isinstance(host, tuple):
+ host, x509 = host
+
+ auth, host = urllib_parse.splituser(host)
+
+ if auth:
+ auth = urllib_parse.unquote_to_bytes(auth)
+ auth = base64.encodebytes(auth).decode("utf-8")
+ auth = "".join(auth.split()) # get rid of whitespace
+ extra_headers = [
+ ("Authorization", "Basic " + auth)
+ ]
+ else:
+ extra_headers = []
+
+ return host, extra_headers, x509
+
+ ##
+ # Connect to server.
+ #
+ # @param host Target host.
+ # @return An HTTPConnection object
+
+ def make_connection(self, host):
+ #return an existing connection if possible. This allows
+ #HTTP/1.1 keep-alive.
+ if self._connection and host == self._connection[0]:
+ return self._connection[1]
+ # create a HTTP connection object from a host descriptor
+ chost, self._extra_headers, x509 = self.get_host_info(host)
+ self._connection = host, http_client.HTTPConnection(chost)
+ return self._connection[1]
+
+ ##
+ # Clear any cached connection object.
+ # Used in the event of socket errors.
+ #
+ def close(self):
+ if self._connection[1]:
+ self._connection[1].close()
+ self._connection = (None, None)
+
+ ##
+ # Send HTTP request.
+ #
+ # @param host Host descriptor (URL or (URL, x509 info) tuple).
+ # @param handler Targer RPC handler (a path relative to host)
+ # @param request_body The XML-RPC request body
+ # @param debug Enable debugging if debug is true.
+ # @return An HTTPConnection.
+
+ def send_request(self, host, handler, request_body, debug):
+ connection = self.make_connection(host)
+ headers = self._extra_headers[:]
+ if debug:
+ connection.set_debuglevel(1)
+ if self.accept_gzip_encoding and gzip:
+ connection.putrequest("POST", handler, skip_accept_encoding=True)
+ headers.append(("Accept-Encoding", "gzip"))
+ else:
+ connection.putrequest("POST", handler)
+ headers.append(("Content-Type", "text/xml"))
+ headers.append(("User-Agent", self.user_agent))
+ self.send_headers(connection, headers)
+ self.send_content(connection, request_body)
+ return connection
+
+ ##
+ # Send request headers.
+ # This function provides a useful hook for subclassing
+ #
+ # @param connection httpConnection.
+ # @param headers list of key,value pairs for HTTP headers
+
+ def send_headers(self, connection, headers):
+ for key, val in headers:
+ connection.putheader(key, val)
+
+ ##
+ # Send request body.
+ # This function provides a useful hook for subclassing
+ #
+ # @param connection httpConnection.
+ # @param request_body XML-RPC request body.
+
+ def send_content(self, connection, request_body):
+ #optionally encode the request
+ if (self.encode_threshold is not None and
+ self.encode_threshold < len(request_body) and
+ gzip):
+ connection.putheader("Content-Encoding", "gzip")
+ request_body = gzip_encode(request_body)
+
+ connection.putheader("Content-Length", str(len(request_body)))
+ connection.endheaders(request_body)
+
+ ##
+ # Parse response.
+ #
+ # @param file Stream.
+ # @return Response tuple and target method.
+
+ def parse_response(self, response):
+ # read response data from httpresponse, and parse it
+ # Check for new http response object, otherwise it is a file object.
+ if hasattr(response, 'getheader'):
+ if response.getheader("Content-Encoding", "") == "gzip":
+ stream = GzipDecodedResponse(response)
+ else:
+ stream = response
+ else:
+ stream = response
+
+ p, u = self.getparser()
+
+ while 1:
+ data = stream.read(1024)
+ if not data:
+ break
+ if self.verbose:
+ print("body:", repr(data))
+ p.feed(data)
+
+ if stream is not response:
+ stream.close()
+ p.close()
+
+ return u.close()
+
+##
+# Standard transport class for XML-RPC over HTTPS.
+
+class SafeTransport(Transport):
+ """Handles an HTTPS transaction to an XML-RPC server."""
+
+ # FIXME: mostly untested
+
+ def make_connection(self, host):
+ if self._connection and host == self._connection[0]:
+ return self._connection[1]
+
+ if not hasattr(http_client, "HTTPSConnection"):
+ raise NotImplementedError(
+ "your version of http.client doesn't support HTTPS")
+ # create a HTTPS connection object from a host descriptor
+ # host may be a string, or a (host, x509-dict) tuple
+ chost, self._extra_headers, x509 = self.get_host_info(host)
+ self._connection = host, http_client.HTTPSConnection(chost,
+ None, **(x509 or {}))
+ return self._connection[1]
+
+##
+# Standard server proxy. This class establishes a virtual connection
+# to an XML-RPC server.
+# <p>
+# This class is available as ServerProxy and Server. New code should
+# use ServerProxy, to avoid confusion.
+#
+# @def ServerProxy(uri, **options)
+# @param uri The connection point on the server.
+# @keyparam transport A transport factory, compatible with the
+# standard transport class.
+# @keyparam encoding The default encoding used for 8-bit strings
+# (default is UTF-8).
+# @keyparam verbose Use a true value to enable debugging output.
+# (printed to standard output).
+# @see Transport
+
+class ServerProxy(object):
+ """uri [,options] -> a logical connection to an XML-RPC server
+
+ uri is the connection point on the server, given as
+ scheme://host/target.
+
+ The standard implementation always supports the "http" scheme. If
+ SSL socket support is available (Python 2.0), it also supports
+ "https".
+
+ If the target part and the slash preceding it are both omitted,
+ "/RPC2" is assumed.
+
+ The following options can be given as keyword arguments:
+
+ transport: a transport factory
+ encoding: the request encoding (default is UTF-8)
+
+ All 8-bit strings passed to the server proxy are assumed to use
+ the given encoding.
+ """
+
+ def __init__(self, uri, transport=None, encoding=None, verbose=False,
+ allow_none=False, use_datetime=False, use_builtin_types=False):
+ # establish a "logical" server connection
+
+ # get the url
+ type, uri = urllib_parse.splittype(uri)
+ if type not in ("http", "https"):
+ raise IOError("unsupported XML-RPC protocol")
+ self.__host, self.__handler = urllib_parse.splithost(uri)
+ if not self.__handler:
+ self.__handler = "/RPC2"
+
+ if transport is None:
+ if type == "https":
+ handler = SafeTransport
+ else:
+ handler = Transport
+ transport = handler(use_datetime=use_datetime,
+ use_builtin_types=use_builtin_types)
+ self.__transport = transport
+
+ self.__encoding = encoding or 'utf-8'
+ self.__verbose = verbose
+ self.__allow_none = allow_none
+
+ def __close(self):
+ self.__transport.close()
+
+ def __request(self, methodname, params):
+ # call a method on the remote server
+
+ request = dumps(params, methodname, encoding=self.__encoding,
+ allow_none=self.__allow_none).encode(self.__encoding)
+
+ response = self.__transport.request(
+ self.__host,
+ self.__handler,
+ request,
+ verbose=self.__verbose
+ )
+
+ if len(response) == 1:
+ response = response[0]
+
+ return response
+
+ def __repr__(self):
+ return (
+ "<ServerProxy for %s%s>" %
+ (self.__host, self.__handler)
+ )
+
+ __str__ = __repr__
+
+ def __getattr__(self, name):
+ # magic method dispatcher
+ return _Method(self.__request, name)
+
+ # note: to call a remote object with an non-standard name, use
+ # result getattr(server, "strange-python-name")(args)
+
+ def __call__(self, attr):
+ """A workaround to get special attributes on the ServerProxy
+ without interfering with the magic __getattr__
+ """
+ if attr == "close":
+ return self.__close
+ elif attr == "transport":
+ return self.__transport
+ raise AttributeError("Attribute %r not found" % (attr,))
+
+# compatibility
+
+Server = ServerProxy
+
+# --------------------------------------------------------------------
+# test code
+
+if __name__ == "__main__":
+
+ # simple test program (from the XML-RPC specification)
+
+ # local server, available from Lib/xmlrpc/server.py
+ server = ServerProxy("http://localhost:8000")
+
+ try:
+ print(server.currentTime.getCurrentTime())
+ except Error as v:
+ print("ERROR", v)
+
+ multi = MultiCall(server)
+ multi.getData()
+ multi.pow(2,9)
+ multi.add(1,2)
+ try:
+ for response in multi():
+ print(response)
+ except Error as v:
+ print("ERROR", v)
diff --git a/contrib/python/future/future/backports/xmlrpc/server.py b/contrib/python/future/future/backports/xmlrpc/server.py
index 57281ec144..28072bfecd 100644
--- a/contrib/python/future/future/backports/xmlrpc/server.py
+++ b/contrib/python/future/future/backports/xmlrpc/server.py
@@ -1,999 +1,999 @@
-r"""
-Ported using Python-Future from the Python 3.3 standard library.
-
-XML-RPC Servers.
-
-This module can be used to create simple XML-RPC servers
-by creating a server and either installing functions, a
-class instance, or by extending the SimpleXMLRPCServer
-class.
-
-It can also be used to handle XML-RPC requests in a CGI
-environment using CGIXMLRPCRequestHandler.
-
-The Doc* classes can be used to create XML-RPC servers that
-serve pydoc-style documentation in response to HTTP
-GET requests. This documentation is dynamically generated
-based on the functions and methods registered with the
-server.
-
-A list of possible usage patterns follows:
-
-1. Install functions:
-
-server = SimpleXMLRPCServer(("localhost", 8000))
-server.register_function(pow)
-server.register_function(lambda x,y: x+y, 'add')
-server.serve_forever()
-
-2. Install an instance:
-
-class MyFuncs:
- def __init__(self):
- # make all of the sys functions available through sys.func_name
- import sys
- self.sys = sys
- def _listMethods(self):
- # implement this method so that system.listMethods
- # knows to advertise the sys methods
- return list_public_methods(self) + \
- ['sys.' + method for method in list_public_methods(self.sys)]
- def pow(self, x, y): return pow(x, y)
- def add(self, x, y) : return x + y
-
-server = SimpleXMLRPCServer(("localhost", 8000))
-server.register_introspection_functions()
-server.register_instance(MyFuncs())
-server.serve_forever()
-
-3. Install an instance with custom dispatch method:
-
-class Math:
- def _listMethods(self):
- # this method must be present for system.listMethods
- # to work
- return ['add', 'pow']
- def _methodHelp(self, method):
- # this method must be present for system.methodHelp
- # to work
- if method == 'add':
- return "add(2,3) => 5"
- elif method == 'pow':
- return "pow(x, y[, z]) => number"
- else:
- # By convention, return empty
- # string if no help is available
- return ""
- def _dispatch(self, method, params):
- if method == 'pow':
- return pow(*params)
- elif method == 'add':
- return params[0] + params[1]
- else:
- raise ValueError('bad method')
-
-server = SimpleXMLRPCServer(("localhost", 8000))
-server.register_introspection_functions()
-server.register_instance(Math())
-server.serve_forever()
-
-4. Subclass SimpleXMLRPCServer:
-
-class MathServer(SimpleXMLRPCServer):
- def _dispatch(self, method, params):
- try:
- # We are forcing the 'export_' prefix on methods that are
- # callable through XML-RPC to prevent potential security
- # problems
- func = getattr(self, 'export_' + method)
- except AttributeError:
- raise Exception('method "%s" is not supported' % method)
- else:
- return func(*params)
-
- def export_add(self, x, y):
- return x + y
-
-server = MathServer(("localhost", 8000))
-server.serve_forever()
-
-5. CGI script:
-
-server = CGIXMLRPCRequestHandler()
-server.register_function(pow)
-server.handle_request()
-"""
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-from future.builtins import int, str
-
-# Written by Brian Quinlan (brian@sweetapp.com).
-# Based on code written by Fredrik Lundh.
-
-from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode
-from future.backports.http.server import BaseHTTPRequestHandler
-import future.backports.http.server as http_server
-from future.backports import socketserver
-import sys
-import os
-import re
-import pydoc
-import inspect
-import traceback
-try:
- import fcntl
-except ImportError:
- fcntl = None
-
-def resolve_dotted_attribute(obj, attr, allow_dotted_names=True):
- """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d
-
- Resolves a dotted attribute name to an object. Raises
- an AttributeError if any attribute in the chain starts with a '_'.
-
- If the optional allow_dotted_names argument is false, dots are not
- supported and this function operates similar to getattr(obj, attr).
- """
-
- if allow_dotted_names:
- attrs = attr.split('.')
- else:
- attrs = [attr]
-
- for i in attrs:
- if i.startswith('_'):
- raise AttributeError(
- 'attempt to access private attribute "%s"' % i
- )
- else:
- obj = getattr(obj,i)
- return obj
-
-def list_public_methods(obj):
- """Returns a list of attribute strings, found in the specified
- object, which represent callable attributes"""
-
- return [member for member in dir(obj)
- if not member.startswith('_') and
- callable(getattr(obj, member))]
-
-class SimpleXMLRPCDispatcher(object):
- """Mix-in class that dispatches XML-RPC requests.
-
- This class is used to register XML-RPC method handlers
- and then to dispatch them. This class doesn't need to be
- instanced directly when used by SimpleXMLRPCServer but it
- can be instanced when used by the MultiPathXMLRPCServer
- """
-
- def __init__(self, allow_none=False, encoding=None,
- use_builtin_types=False):
- self.funcs = {}
- self.instance = None
- self.allow_none = allow_none
- self.encoding = encoding or 'utf-8'
- self.use_builtin_types = use_builtin_types
-
- def register_instance(self, instance, allow_dotted_names=False):
- """Registers an instance to respond to XML-RPC requests.
-
- Only one instance can be installed at a time.
-
- If the registered instance has a _dispatch method then that
- method will be called with the name of the XML-RPC method and
- its parameters as a tuple
- e.g. instance._dispatch('add',(2,3))
-
- If the registered instance does not have a _dispatch method
- then the instance will be searched to find a matching method
- and, if found, will be called. Methods beginning with an '_'
- are considered private and will not be called by
- SimpleXMLRPCServer.
-
- If a registered function matches a XML-RPC request, then it
- will be called instead of the registered instance.
-
- If the optional allow_dotted_names argument is true and the
- instance does not have a _dispatch method, method names
- containing dots are supported and resolved, as long as none of
- the name segments start with an '_'.
-
- *** SECURITY WARNING: ***
-
- Enabling the allow_dotted_names options allows intruders
- to access your module's global variables and may allow
- intruders to execute arbitrary code on your machine. Only
- use this option on a secure, closed network.
-
- """
-
- self.instance = instance
- self.allow_dotted_names = allow_dotted_names
-
- def register_function(self, function, name=None):
- """Registers a function to respond to XML-RPC requests.
-
- The optional name argument can be used to set a Unicode name
- for the function.
- """
-
- if name is None:
- name = function.__name__
- self.funcs[name] = function
-
- def register_introspection_functions(self):
- """Registers the XML-RPC introspection methods in the system
- namespace.
-
- see http://xmlrpc.usefulinc.com/doc/reserved.html
- """
-
- self.funcs.update({'system.listMethods' : self.system_listMethods,
- 'system.methodSignature' : self.system_methodSignature,
- 'system.methodHelp' : self.system_methodHelp})
-
- def register_multicall_functions(self):
- """Registers the XML-RPC multicall method in the system
- namespace.
-
- see http://www.xmlrpc.com/discuss/msgReader$1208"""
-
- self.funcs.update({'system.multicall' : self.system_multicall})
-
- def _marshaled_dispatch(self, data, dispatch_method = None, path = None):
- """Dispatches an XML-RPC method from marshalled (XML) data.
-
- XML-RPC methods are dispatched from the marshalled (XML) data
- using the _dispatch method and the result is returned as
- marshalled data. For backwards compatibility, a dispatch
- function can be provided as an argument (see comment in
- SimpleXMLRPCRequestHandler.do_POST) but overriding the
- existing method through subclassing is the preferred means
- of changing method dispatch behavior.
- """
-
- try:
- params, method = loads(data, use_builtin_types=self.use_builtin_types)
-
- # generate response
- if dispatch_method is not None:
- response = dispatch_method(method, params)
- else:
- response = self._dispatch(method, params)
- # wrap response in a singleton tuple
- response = (response,)
- response = dumps(response, methodresponse=1,
- allow_none=self.allow_none, encoding=self.encoding)
- except Fault as fault:
- response = dumps(fault, allow_none=self.allow_none,
- encoding=self.encoding)
- except:
- # report exception back to server
- exc_type, exc_value, exc_tb = sys.exc_info()
- response = dumps(
- Fault(1, "%s:%s" % (exc_type, exc_value)),
- encoding=self.encoding, allow_none=self.allow_none,
- )
-
- return response.encode(self.encoding)
-
- def system_listMethods(self):
- """system.listMethods() => ['add', 'subtract', 'multiple']
-
- Returns a list of the methods supported by the server."""
-
- methods = set(self.funcs.keys())
- if self.instance is not None:
- # Instance can implement _listMethod to return a list of
- # methods
- if hasattr(self.instance, '_listMethods'):
- methods |= set(self.instance._listMethods())
- # if the instance has a _dispatch method then we
- # don't have enough information to provide a list
- # of methods
- elif not hasattr(self.instance, '_dispatch'):
- methods |= set(list_public_methods(self.instance))
- return sorted(methods)
-
- def system_methodSignature(self, method_name):
- """system.methodSignature('add') => [double, int, int]
-
- Returns a list describing the signature of the method. In the
- above example, the add method takes two integers as arguments
- and returns a double result.
-
- This server does NOT support system.methodSignature."""
-
- # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html
-
- return 'signatures not supported'
-
- def system_methodHelp(self, method_name):
- """system.methodHelp('add') => "Adds two integers together"
-
- Returns a string containing documentation for the specified method."""
-
- method = None
- if method_name in self.funcs:
- method = self.funcs[method_name]
- elif self.instance is not None:
- # Instance can implement _methodHelp to return help for a method
- if hasattr(self.instance, '_methodHelp'):
- return self.instance._methodHelp(method_name)
- # if the instance has a _dispatch method then we
- # don't have enough information to provide help
- elif not hasattr(self.instance, '_dispatch'):
- try:
- method = resolve_dotted_attribute(
- self.instance,
- method_name,
- self.allow_dotted_names
- )
- except AttributeError:
- pass
-
- # Note that we aren't checking that the method actually
- # be a callable object of some kind
- if method is None:
- return ""
- else:
- return pydoc.getdoc(method)
-
- def system_multicall(self, call_list):
- """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \
-[[4], ...]
-
- Allows the caller to package multiple XML-RPC calls into a single
- request.
-
- See http://www.xmlrpc.com/discuss/msgReader$1208
- """
-
- results = []
- for call in call_list:
- method_name = call['methodName']
- params = call['params']
-
- try:
- # XXX A marshalling error in any response will fail the entire
- # multicall. If someone cares they should fix this.
- results.append([self._dispatch(method_name, params)])
- except Fault as fault:
- results.append(
- {'faultCode' : fault.faultCode,
- 'faultString' : fault.faultString}
- )
- except:
- exc_type, exc_value, exc_tb = sys.exc_info()
- results.append(
- {'faultCode' : 1,
- 'faultString' : "%s:%s" % (exc_type, exc_value)}
- )
- return results
-
- def _dispatch(self, method, params):
- """Dispatches the XML-RPC method.
-
- XML-RPC calls are forwarded to a registered function that
- matches the called XML-RPC method name. If no such function
- exists then the call is forwarded to the registered instance,
- if available.
-
- If the registered instance has a _dispatch method then that
- method will be called with the name of the XML-RPC method and
- its parameters as a tuple
- e.g. instance._dispatch('add',(2,3))
-
- If the registered instance does not have a _dispatch method
- then the instance will be searched to find a matching method
- and, if found, will be called.
-
- Methods beginning with an '_' are considered private and will
- not be called.
- """
-
- func = None
- try:
- # check to see if a matching function has been registered
- func = self.funcs[method]
- except KeyError:
- if self.instance is not None:
- # check for a _dispatch method
- if hasattr(self.instance, '_dispatch'):
- return self.instance._dispatch(method, params)
- else:
- # call instance method directly
- try:
- func = resolve_dotted_attribute(
- self.instance,
- method,
- self.allow_dotted_names
- )
- except AttributeError:
- pass
-
- if func is not None:
- return func(*params)
- else:
- raise Exception('method "%s" is not supported' % method)
-
-class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler):
- """Simple XML-RPC request handler class.
-
- Handles all HTTP POST requests and attempts to decode them as
- XML-RPC requests.
- """
-
- # Class attribute listing the accessible path components;
- # paths not on this list will result in a 404 error.
- rpc_paths = ('/', '/RPC2')
-
- #if not None, encode responses larger than this, if possible
- encode_threshold = 1400 #a common MTU
-
- #Override form StreamRequestHandler: full buffering of output
- #and no Nagle.
- wbufsize = -1
- disable_nagle_algorithm = True
-
- # a re to match a gzip Accept-Encoding
- aepattern = re.compile(r"""
- \s* ([^\s;]+) \s* #content-coding
- (;\s* q \s*=\s* ([0-9\.]+))? #q
- """, re.VERBOSE | re.IGNORECASE)
-
- def accept_encodings(self):
- r = {}
- ae = self.headers.get("Accept-Encoding", "")
- for e in ae.split(","):
- match = self.aepattern.match(e)
- if match:
- v = match.group(3)
- v = float(v) if v else 1.0
- r[match.group(1)] = v
- return r
-
- def is_rpc_path_valid(self):
- if self.rpc_paths:
- return self.path in self.rpc_paths
- else:
- # If .rpc_paths is empty, just assume all paths are legal
- return True
-
- def do_POST(self):
- """Handles the HTTP POST request.
-
- Attempts to interpret all HTTP POST requests as XML-RPC calls,
- which are forwarded to the server's _dispatch method for handling.
- """
-
- # Check that the path is legal
- if not self.is_rpc_path_valid():
- self.report_404()
- return
-
- try:
- # Get arguments by reading body of request.
- # We read this in chunks to avoid straining
- # socket.read(); around the 10 or 15Mb mark, some platforms
- # begin to have problems (bug #792570).
- max_chunk_size = 10*1024*1024
- size_remaining = int(self.headers["content-length"])
- L = []
- while size_remaining:
- chunk_size = min(size_remaining, max_chunk_size)
- chunk = self.rfile.read(chunk_size)
- if not chunk:
- break
- L.append(chunk)
- size_remaining -= len(L[-1])
- data = b''.join(L)
-
- data = self.decode_request_content(data)
- if data is None:
- return #response has been sent
-
- # In previous versions of SimpleXMLRPCServer, _dispatch
- # could be overridden in this class, instead of in
- # SimpleXMLRPCDispatcher. To maintain backwards compatibility,
- # check to see if a subclass implements _dispatch and dispatch
- # using that method if present.
- response = self.server._marshaled_dispatch(
- data, getattr(self, '_dispatch', None), self.path
- )
- except Exception as e: # This should only happen if the module is buggy
- # internal error, report as HTTP server error
- self.send_response(500)
-
- # Send information about the exception if requested
- if hasattr(self.server, '_send_traceback_header') and \
- self.server._send_traceback_header:
- self.send_header("X-exception", str(e))
- trace = traceback.format_exc()
- trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII')
- self.send_header("X-traceback", trace)
-
- self.send_header("Content-length", "0")
- self.end_headers()
- else:
- self.send_response(200)
- self.send_header("Content-type", "text/xml")
- if self.encode_threshold is not None:
- if len(response) > self.encode_threshold:
- q = self.accept_encodings().get("gzip", 0)
- if q:
- try:
- response = gzip_encode(response)
- self.send_header("Content-Encoding", "gzip")
- except NotImplementedError:
- pass
- self.send_header("Content-length", str(len(response)))
- self.end_headers()
- self.wfile.write(response)
-
- def decode_request_content(self, data):
- #support gzip encoding of request
- encoding = self.headers.get("content-encoding", "identity").lower()
- if encoding == "identity":
- return data
- if encoding == "gzip":
- try:
- return gzip_decode(data)
- except NotImplementedError:
- self.send_response(501, "encoding %r not supported" % encoding)
- except ValueError:
- self.send_response(400, "error decoding gzip content")
- else:
- self.send_response(501, "encoding %r not supported" % encoding)
- self.send_header("Content-length", "0")
- self.end_headers()
-
- def report_404 (self):
- # Report a 404 error
- self.send_response(404)
- response = b'No such page'
- self.send_header("Content-type", "text/plain")
- self.send_header("Content-length", str(len(response)))
- self.end_headers()
- self.wfile.write(response)
-
- def log_request(self, code='-', size='-'):
- """Selectively log an accepted request."""
-
- if self.server.logRequests:
- BaseHTTPRequestHandler.log_request(self, code, size)
-
-class SimpleXMLRPCServer(socketserver.TCPServer,
- SimpleXMLRPCDispatcher):
- """Simple XML-RPC server.
-
- Simple XML-RPC server that allows functions and a single instance
- to be installed to handle requests. The default implementation
- attempts to dispatch XML-RPC calls to the functions or instance
- installed in the server. Override the _dispatch method inherited
- from SimpleXMLRPCDispatcher to change this behavior.
- """
-
- allow_reuse_address = True
-
- # Warning: this is for debugging purposes only! Never set this to True in
- # production code, as will be sending out sensitive information (exception
- # and stack trace details) when exceptions are raised inside
- # SimpleXMLRPCRequestHandler.do_POST
- _send_traceback_header = False
-
- def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler,
- logRequests=True, allow_none=False, encoding=None,
- bind_and_activate=True, use_builtin_types=False):
- self.logRequests = logRequests
-
- SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types)
- socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate)
-
- # [Bug #1222790] If possible, set close-on-exec flag; if a
- # method spawns a subprocess, the subprocess shouldn't have
- # the listening socket open.
- if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'):
- flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD)
- flags |= fcntl.FD_CLOEXEC
- fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags)
-
-class MultiPathXMLRPCServer(SimpleXMLRPCServer):
- """Multipath XML-RPC Server
- This specialization of SimpleXMLRPCServer allows the user to create
- multiple Dispatcher instances and assign them to different
- HTTP request paths. This makes it possible to run two or more
- 'virtual XML-RPC servers' at the same port.
- Make sure that the requestHandler accepts the paths in question.
- """
- def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler,
- logRequests=True, allow_none=False, encoding=None,
- bind_and_activate=True, use_builtin_types=False):
-
- SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none,
- encoding, bind_and_activate, use_builtin_types)
- self.dispatchers = {}
- self.allow_none = allow_none
- self.encoding = encoding or 'utf-8'
-
- def add_dispatcher(self, path, dispatcher):
- self.dispatchers[path] = dispatcher
- return dispatcher
-
- def get_dispatcher(self, path):
- return self.dispatchers[path]
-
- def _marshaled_dispatch(self, data, dispatch_method = None, path = None):
- try:
- response = self.dispatchers[path]._marshaled_dispatch(
- data, dispatch_method, path)
- except:
- # report low level exception back to server
- # (each dispatcher should have handled their own
- # exceptions)
- exc_type, exc_value = sys.exc_info()[:2]
- response = dumps(
- Fault(1, "%s:%s" % (exc_type, exc_value)),
- encoding=self.encoding, allow_none=self.allow_none)
- response = response.encode(self.encoding)
- return response
-
-class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher):
- """Simple handler for XML-RPC data passed through CGI."""
-
- def __init__(self, allow_none=False, encoding=None, use_builtin_types=False):
- SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types)
-
- def handle_xmlrpc(self, request_text):
- """Handle a single XML-RPC request"""
-
- response = self._marshaled_dispatch(request_text)
-
- print('Content-Type: text/xml')
- print('Content-Length: %d' % len(response))
- print()
- sys.stdout.flush()
- sys.stdout.buffer.write(response)
- sys.stdout.buffer.flush()
-
- def handle_get(self):
- """Handle a single HTTP GET request.
-
- Default implementation indicates an error because
- XML-RPC uses the POST method.
- """
-
- code = 400
- message, explain = BaseHTTPRequestHandler.responses[code]
-
- response = http_server.DEFAULT_ERROR_MESSAGE % \
- {
- 'code' : code,
- 'message' : message,
- 'explain' : explain
- }
- response = response.encode('utf-8')
- print('Status: %d %s' % (code, message))
- print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE)
- print('Content-Length: %d' % len(response))
- print()
- sys.stdout.flush()
- sys.stdout.buffer.write(response)
- sys.stdout.buffer.flush()
-
- def handle_request(self, request_text=None):
- """Handle a single XML-RPC request passed through a CGI post method.
-
- If no XML data is given then it is read from stdin. The resulting
- XML-RPC response is printed to stdout along with the correct HTTP
- headers.
- """
-
- if request_text is None and \
- os.environ.get('REQUEST_METHOD', None) == 'GET':
- self.handle_get()
- else:
- # POST data is normally available through stdin
- try:
- length = int(os.environ.get('CONTENT_LENGTH', None))
- except (ValueError, TypeError):
- length = -1
- if request_text is None:
- request_text = sys.stdin.read(length)
-
- self.handle_xmlrpc(request_text)
-
-
-# -----------------------------------------------------------------------------
-# Self documenting XML-RPC Server.
-
-class ServerHTMLDoc(pydoc.HTMLDoc):
- """Class used to generate pydoc HTML document for a server"""
-
- def markup(self, text, escape=None, funcs={}, classes={}, methods={}):
- """Mark up some plain text, given a context of symbols to look for.
- Each context dictionary maps object names to anchor names."""
- escape = escape or self.escape
- results = []
- here = 0
-
- # XXX Note that this regular expression does not allow for the
- # hyperlinking of arbitrary strings being used as method
- # names. Only methods with names consisting of word characters
- # and '.'s are hyperlinked.
- pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|'
- r'RFC[- ]?(\d+)|'
- r'PEP[- ]?(\d+)|'
- r'(self\.)?((?:\w|\.)+))\b')
- while 1:
- match = pattern.search(text, here)
- if not match: break
- start, end = match.span()
- results.append(escape(text[here:start]))
-
- all, scheme, rfc, pep, selfdot, name = match.groups()
- if scheme:
- url = escape(all).replace('"', '&quot;')
- results.append('<a href="%s">%s</a>' % (url, url))
- elif rfc:
- url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc)
- results.append('<a href="%s">%s</a>' % (url, escape(all)))
- elif pep:
- url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep)
- results.append('<a href="%s">%s</a>' % (url, escape(all)))
- elif text[end:end+1] == '(':
- results.append(self.namelink(name, methods, funcs, classes))
- elif selfdot:
- results.append('self.<strong>%s</strong>' % name)
- else:
- results.append(self.namelink(name, classes))
- here = end
- results.append(escape(text[here:]))
- return ''.join(results)
-
- def docroutine(self, object, name, mod=None,
- funcs={}, classes={}, methods={}, cl=None):
- """Produce HTML documentation for a function or method object."""
-
- anchor = (cl and cl.__name__ or '') + '-' + name
- note = ''
-
- title = '<a name="%s"><strong>%s</strong></a>' % (
- self.escape(anchor), self.escape(name))
-
- if inspect.ismethod(object):
- args = inspect.getfullargspec(object)
- # exclude the argument bound to the instance, it will be
- # confusing to the non-Python user
- argspec = inspect.formatargspec (
- args.args[1:],
- args.varargs,
- args.varkw,
- args.defaults,
- annotations=args.annotations,
- formatvalue=self.formatvalue
- )
- elif inspect.isfunction(object):
- args = inspect.getfullargspec(object)
- argspec = inspect.formatargspec(
- args.args, args.varargs, args.varkw, args.defaults,
- annotations=args.annotations,
- formatvalue=self.formatvalue)
- else:
- argspec = '(...)'
-
- if isinstance(object, tuple):
- argspec = object[0] or argspec
- docstring = object[1] or ""
- else:
- docstring = pydoc.getdoc(object)
-
- decl = title + argspec + (note and self.grey(
- '<font face="helvetica, arial">%s</font>' % note))
-
- doc = self.markup(
- docstring, self.preformat, funcs, classes, methods)
- doc = doc and '<dd><tt>%s</tt></dd>' % doc
- return '<dl><dt>%s</dt>%s</dl>\n' % (decl, doc)
-
- def docserver(self, server_name, package_documentation, methods):
- """Produce HTML documentation for an XML-RPC server."""
-
- fdict = {}
- for key, value in methods.items():
- fdict[key] = '#-' + key
- fdict[value] = fdict[key]
-
- server_name = self.escape(server_name)
- head = '<big><big><strong>%s</strong></big></big>' % server_name
- result = self.heading(head, '#ffffff', '#7799ee')
-
- doc = self.markup(package_documentation, self.preformat, fdict)
- doc = doc and '<tt>%s</tt>' % doc
- result = result + '<p>%s</p>\n' % doc
-
- contents = []
- method_items = sorted(methods.items())
- for key, value in method_items:
- contents.append(self.docroutine(value, key, funcs=fdict))
- result = result + self.bigsection(
- 'Methods', '#ffffff', '#eeaa77', ''.join(contents))
-
- return result
-
-class XMLRPCDocGenerator(object):
- """Generates documentation for an XML-RPC server.
-
- This class is designed as mix-in and should not
- be constructed directly.
- """
-
- def __init__(self):
- # setup variables used for HTML documentation
- self.server_name = 'XML-RPC Server Documentation'
- self.server_documentation = \
- "This server exports the following methods through the XML-RPC "\
- "protocol."
- self.server_title = 'XML-RPC Server Documentation'
-
- def set_server_title(self, server_title):
- """Set the HTML title of the generated server documentation"""
-
- self.server_title = server_title
-
- def set_server_name(self, server_name):
- """Set the name of the generated HTML server documentation"""
-
- self.server_name = server_name
-
- def set_server_documentation(self, server_documentation):
- """Set the documentation string for the entire server."""
-
- self.server_documentation = server_documentation
-
- def generate_html_documentation(self):
- """generate_html_documentation() => html documentation for the server
-
- Generates HTML documentation for the server using introspection for
- installed functions and instances that do not implement the
- _dispatch method. Alternatively, instances can choose to implement
- the _get_method_argstring(method_name) method to provide the
- argument string used in the documentation and the
- _methodHelp(method_name) method to provide the help text used
- in the documentation."""
-
- methods = {}
-
- for method_name in self.system_listMethods():
- if method_name in self.funcs:
- method = self.funcs[method_name]
- elif self.instance is not None:
- method_info = [None, None] # argspec, documentation
- if hasattr(self.instance, '_get_method_argstring'):
- method_info[0] = self.instance._get_method_argstring(method_name)
- if hasattr(self.instance, '_methodHelp'):
- method_info[1] = self.instance._methodHelp(method_name)
-
- method_info = tuple(method_info)
- if method_info != (None, None):
- method = method_info
- elif not hasattr(self.instance, '_dispatch'):
- try:
- method = resolve_dotted_attribute(
- self.instance,
- method_name
- )
- except AttributeError:
- method = method_info
- else:
- method = method_info
- else:
- assert 0, "Could not find method in self.functions and no "\
- "instance installed"
-
- methods[method_name] = method
-
- documenter = ServerHTMLDoc()
- documentation = documenter.docserver(
- self.server_name,
- self.server_documentation,
- methods
- )
-
- return documenter.page(self.server_title, documentation)
-
-class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
- """XML-RPC and documentation request handler class.
-
- Handles all HTTP POST requests and attempts to decode them as
- XML-RPC requests.
-
- Handles all HTTP GET requests and interprets them as requests
- for documentation.
- """
-
- def do_GET(self):
- """Handles the HTTP GET request.
-
- Interpret all HTTP GET requests as requests for server
- documentation.
- """
- # Check that the path is legal
- if not self.is_rpc_path_valid():
- self.report_404()
- return
-
- response = self.server.generate_html_documentation().encode('utf-8')
- self.send_response(200)
- self.send_header("Content-type", "text/html")
- self.send_header("Content-length", str(len(response)))
- self.end_headers()
- self.wfile.write(response)
-
-class DocXMLRPCServer( SimpleXMLRPCServer,
- XMLRPCDocGenerator):
- """XML-RPC and HTML documentation server.
-
- Adds the ability to serve server documentation to the capabilities
- of SimpleXMLRPCServer.
- """
-
- def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler,
- logRequests=True, allow_none=False, encoding=None,
- bind_and_activate=True, use_builtin_types=False):
- SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests,
- allow_none, encoding, bind_and_activate,
- use_builtin_types)
- XMLRPCDocGenerator.__init__(self)
-
-class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler,
- XMLRPCDocGenerator):
- """Handler for XML-RPC data and documentation requests passed through
- CGI"""
-
- def handle_get(self):
- """Handles the HTTP GET request.
-
- Interpret all HTTP GET requests as requests for server
- documentation.
- """
-
- response = self.generate_html_documentation().encode('utf-8')
-
- print('Content-Type: text/html')
- print('Content-Length: %d' % len(response))
- print()
- sys.stdout.flush()
- sys.stdout.buffer.write(response)
- sys.stdout.buffer.flush()
-
- def __init__(self):
- CGIXMLRPCRequestHandler.__init__(self)
- XMLRPCDocGenerator.__init__(self)
-
-
-if __name__ == '__main__':
- import datetime
-
- class ExampleService:
- def getData(self):
- return '42'
-
- class currentTime:
- @staticmethod
- def getCurrentTime():
- return datetime.datetime.now()
-
- server = SimpleXMLRPCServer(("localhost", 8000))
- server.register_function(pow)
- server.register_function(lambda x,y: x+y, 'add')
- server.register_instance(ExampleService(), allow_dotted_names=True)
- server.register_multicall_functions()
- print('Serving XML-RPC on localhost port 8000')
- print('It is advisable to run this example server within a secure, closed network.')
- try:
- server.serve_forever()
- except KeyboardInterrupt:
- print("\nKeyboard interrupt received, exiting.")
- server.server_close()
- sys.exit(0)
+r"""
+Ported using Python-Future from the Python 3.3 standard library.
+
+XML-RPC Servers.
+
+This module can be used to create simple XML-RPC servers
+by creating a server and either installing functions, a
+class instance, or by extending the SimpleXMLRPCServer
+class.
+
+It can also be used to handle XML-RPC requests in a CGI
+environment using CGIXMLRPCRequestHandler.
+
+The Doc* classes can be used to create XML-RPC servers that
+serve pydoc-style documentation in response to HTTP
+GET requests. This documentation is dynamically generated
+based on the functions and methods registered with the
+server.
+
+A list of possible usage patterns follows:
+
+1. Install functions:
+
+server = SimpleXMLRPCServer(("localhost", 8000))
+server.register_function(pow)
+server.register_function(lambda x,y: x+y, 'add')
+server.serve_forever()
+
+2. Install an instance:
+
+class MyFuncs:
+ def __init__(self):
+ # make all of the sys functions available through sys.func_name
+ import sys
+ self.sys = sys
+ def _listMethods(self):
+ # implement this method so that system.listMethods
+ # knows to advertise the sys methods
+ return list_public_methods(self) + \
+ ['sys.' + method for method in list_public_methods(self.sys)]
+ def pow(self, x, y): return pow(x, y)
+ def add(self, x, y) : return x + y
+
+server = SimpleXMLRPCServer(("localhost", 8000))
+server.register_introspection_functions()
+server.register_instance(MyFuncs())
+server.serve_forever()
+
+3. Install an instance with custom dispatch method:
+
+class Math:
+ def _listMethods(self):
+ # this method must be present for system.listMethods
+ # to work
+ return ['add', 'pow']
+ def _methodHelp(self, method):
+ # this method must be present for system.methodHelp
+ # to work
+ if method == 'add':
+ return "add(2,3) => 5"
+ elif method == 'pow':
+ return "pow(x, y[, z]) => number"
+ else:
+ # By convention, return empty
+ # string if no help is available
+ return ""
+ def _dispatch(self, method, params):
+ if method == 'pow':
+ return pow(*params)
+ elif method == 'add':
+ return params[0] + params[1]
+ else:
+ raise ValueError('bad method')
+
+server = SimpleXMLRPCServer(("localhost", 8000))
+server.register_introspection_functions()
+server.register_instance(Math())
+server.serve_forever()
+
+4. Subclass SimpleXMLRPCServer:
+
+class MathServer(SimpleXMLRPCServer):
+ def _dispatch(self, method, params):
+ try:
+ # We are forcing the 'export_' prefix on methods that are
+ # callable through XML-RPC to prevent potential security
+ # problems
+ func = getattr(self, 'export_' + method)
+ except AttributeError:
+ raise Exception('method "%s" is not supported' % method)
+ else:
+ return func(*params)
+
+ def export_add(self, x, y):
+ return x + y
+
+server = MathServer(("localhost", 8000))
+server.serve_forever()
+
+5. CGI script:
+
+server = CGIXMLRPCRequestHandler()
+server.register_function(pow)
+server.handle_request()
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+from future.builtins import int, str
+
+# Written by Brian Quinlan (brian@sweetapp.com).
+# Based on code written by Fredrik Lundh.
+
+from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode
+from future.backports.http.server import BaseHTTPRequestHandler
+import future.backports.http.server as http_server
+from future.backports import socketserver
+import sys
+import os
+import re
+import pydoc
+import inspect
+import traceback
+try:
+ import fcntl
+except ImportError:
+ fcntl = None
+
+def resolve_dotted_attribute(obj, attr, allow_dotted_names=True):
+ """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d
+
+ Resolves a dotted attribute name to an object. Raises
+ an AttributeError if any attribute in the chain starts with a '_'.
+
+ If the optional allow_dotted_names argument is false, dots are not
+ supported and this function operates similar to getattr(obj, attr).
+ """
+
+ if allow_dotted_names:
+ attrs = attr.split('.')
+ else:
+ attrs = [attr]
+
+ for i in attrs:
+ if i.startswith('_'):
+ raise AttributeError(
+ 'attempt to access private attribute "%s"' % i
+ )
+ else:
+ obj = getattr(obj,i)
+ return obj
+
+def list_public_methods(obj):
+ """Returns a list of attribute strings, found in the specified
+ object, which represent callable attributes"""
+
+ return [member for member in dir(obj)
+ if not member.startswith('_') and
+ callable(getattr(obj, member))]
+
+class SimpleXMLRPCDispatcher(object):
+ """Mix-in class that dispatches XML-RPC requests.
+
+ This class is used to register XML-RPC method handlers
+ and then to dispatch them. This class doesn't need to be
+ instanced directly when used by SimpleXMLRPCServer but it
+ can be instanced when used by the MultiPathXMLRPCServer
+ """
+
+ def __init__(self, allow_none=False, encoding=None,
+ use_builtin_types=False):
+ self.funcs = {}
+ self.instance = None
+ self.allow_none = allow_none
+ self.encoding = encoding or 'utf-8'
+ self.use_builtin_types = use_builtin_types
+
+ def register_instance(self, instance, allow_dotted_names=False):
+ """Registers an instance to respond to XML-RPC requests.
+
+ Only one instance can be installed at a time.
+
+ If the registered instance has a _dispatch method then that
+ method will be called with the name of the XML-RPC method and
+ its parameters as a tuple
+ e.g. instance._dispatch('add',(2,3))
+
+ If the registered instance does not have a _dispatch method
+ then the instance will be searched to find a matching method
+ and, if found, will be called. Methods beginning with an '_'
+ are considered private and will not be called by
+ SimpleXMLRPCServer.
+
+ If a registered function matches a XML-RPC request, then it
+ will be called instead of the registered instance.
+
+ If the optional allow_dotted_names argument is true and the
+ instance does not have a _dispatch method, method names
+ containing dots are supported and resolved, as long as none of
+ the name segments start with an '_'.
+
+ *** SECURITY WARNING: ***
+
+ Enabling the allow_dotted_names options allows intruders
+ to access your module's global variables and may allow
+ intruders to execute arbitrary code on your machine. Only
+ use this option on a secure, closed network.
+
+ """
+
+ self.instance = instance
+ self.allow_dotted_names = allow_dotted_names
+
+ def register_function(self, function, name=None):
+ """Registers a function to respond to XML-RPC requests.
+
+ The optional name argument can be used to set a Unicode name
+ for the function.
+ """
+
+ if name is None:
+ name = function.__name__
+ self.funcs[name] = function
+
+ def register_introspection_functions(self):
+ """Registers the XML-RPC introspection methods in the system
+ namespace.
+
+ see http://xmlrpc.usefulinc.com/doc/reserved.html
+ """
+
+ self.funcs.update({'system.listMethods' : self.system_listMethods,
+ 'system.methodSignature' : self.system_methodSignature,
+ 'system.methodHelp' : self.system_methodHelp})
+
+ def register_multicall_functions(self):
+ """Registers the XML-RPC multicall method in the system
+ namespace.
+
+ see http://www.xmlrpc.com/discuss/msgReader$1208"""
+
+ self.funcs.update({'system.multicall' : self.system_multicall})
+
+ def _marshaled_dispatch(self, data, dispatch_method = None, path = None):
+ """Dispatches an XML-RPC method from marshalled (XML) data.
+
+ XML-RPC methods are dispatched from the marshalled (XML) data
+ using the _dispatch method and the result is returned as
+ marshalled data. For backwards compatibility, a dispatch
+ function can be provided as an argument (see comment in
+ SimpleXMLRPCRequestHandler.do_POST) but overriding the
+ existing method through subclassing is the preferred means
+ of changing method dispatch behavior.
+ """
+
+ try:
+ params, method = loads(data, use_builtin_types=self.use_builtin_types)
+
+ # generate response
+ if dispatch_method is not None:
+ response = dispatch_method(method, params)
+ else:
+ response = self._dispatch(method, params)
+ # wrap response in a singleton tuple
+ response = (response,)
+ response = dumps(response, methodresponse=1,
+ allow_none=self.allow_none, encoding=self.encoding)
+ except Fault as fault:
+ response = dumps(fault, allow_none=self.allow_none,
+ encoding=self.encoding)
+ except:
+ # report exception back to server
+ exc_type, exc_value, exc_tb = sys.exc_info()
+ response = dumps(
+ Fault(1, "%s:%s" % (exc_type, exc_value)),
+ encoding=self.encoding, allow_none=self.allow_none,
+ )
+
+ return response.encode(self.encoding)
+
+ def system_listMethods(self):
+ """system.listMethods() => ['add', 'subtract', 'multiple']
+
+ Returns a list of the methods supported by the server."""
+
+ methods = set(self.funcs.keys())
+ if self.instance is not None:
+ # Instance can implement _listMethod to return a list of
+ # methods
+ if hasattr(self.instance, '_listMethods'):
+ methods |= set(self.instance._listMethods())
+ # if the instance has a _dispatch method then we
+ # don't have enough information to provide a list
+ # of methods
+ elif not hasattr(self.instance, '_dispatch'):
+ methods |= set(list_public_methods(self.instance))
+ return sorted(methods)
+
+ def system_methodSignature(self, method_name):
+ """system.methodSignature('add') => [double, int, int]
+
+ Returns a list describing the signature of the method. In the
+ above example, the add method takes two integers as arguments
+ and returns a double result.
+
+ This server does NOT support system.methodSignature."""
+
+ # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html
+
+ return 'signatures not supported'
+
+ def system_methodHelp(self, method_name):
+ """system.methodHelp('add') => "Adds two integers together"
+
+ Returns a string containing documentation for the specified method."""
+
+ method = None
+ if method_name in self.funcs:
+ method = self.funcs[method_name]
+ elif self.instance is not None:
+ # Instance can implement _methodHelp to return help for a method
+ if hasattr(self.instance, '_methodHelp'):
+ return self.instance._methodHelp(method_name)
+ # if the instance has a _dispatch method then we
+ # don't have enough information to provide help
+ elif not hasattr(self.instance, '_dispatch'):
+ try:
+ method = resolve_dotted_attribute(
+ self.instance,
+ method_name,
+ self.allow_dotted_names
+ )
+ except AttributeError:
+ pass
+
+ # Note that we aren't checking that the method actually
+ # be a callable object of some kind
+ if method is None:
+ return ""
+ else:
+ return pydoc.getdoc(method)
+
+ def system_multicall(self, call_list):
+ """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \
+[[4], ...]
+
+ Allows the caller to package multiple XML-RPC calls into a single
+ request.
+
+ See http://www.xmlrpc.com/discuss/msgReader$1208
+ """
+
+ results = []
+ for call in call_list:
+ method_name = call['methodName']
+ params = call['params']
+
+ try:
+ # XXX A marshalling error in any response will fail the entire
+ # multicall. If someone cares they should fix this.
+ results.append([self._dispatch(method_name, params)])
+ except Fault as fault:
+ results.append(
+ {'faultCode' : fault.faultCode,
+ 'faultString' : fault.faultString}
+ )
+ except:
+ exc_type, exc_value, exc_tb = sys.exc_info()
+ results.append(
+ {'faultCode' : 1,
+ 'faultString' : "%s:%s" % (exc_type, exc_value)}
+ )
+ return results
+
+ def _dispatch(self, method, params):
+ """Dispatches the XML-RPC method.
+
+ XML-RPC calls are forwarded to a registered function that
+ matches the called XML-RPC method name. If no such function
+ exists then the call is forwarded to the registered instance,
+ if available.
+
+ If the registered instance has a _dispatch method then that
+ method will be called with the name of the XML-RPC method and
+ its parameters as a tuple
+ e.g. instance._dispatch('add',(2,3))
+
+ If the registered instance does not have a _dispatch method
+ then the instance will be searched to find a matching method
+ and, if found, will be called.
+
+ Methods beginning with an '_' are considered private and will
+ not be called.
+ """
+
+ func = None
+ try:
+ # check to see if a matching function has been registered
+ func = self.funcs[method]
+ except KeyError:
+ if self.instance is not None:
+ # check for a _dispatch method
+ if hasattr(self.instance, '_dispatch'):
+ return self.instance._dispatch(method, params)
+ else:
+ # call instance method directly
+ try:
+ func = resolve_dotted_attribute(
+ self.instance,
+ method,
+ self.allow_dotted_names
+ )
+ except AttributeError:
+ pass
+
+ if func is not None:
+ return func(*params)
+ else:
+ raise Exception('method "%s" is not supported' % method)
+
+class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler):
+ """Simple XML-RPC request handler class.
+
+ Handles all HTTP POST requests and attempts to decode them as
+ XML-RPC requests.
+ """
+
+ # Class attribute listing the accessible path components;
+ # paths not on this list will result in a 404 error.
+ rpc_paths = ('/', '/RPC2')
+
+ #if not None, encode responses larger than this, if possible
+ encode_threshold = 1400 #a common MTU
+
+ #Override form StreamRequestHandler: full buffering of output
+ #and no Nagle.
+ wbufsize = -1
+ disable_nagle_algorithm = True
+
+ # a re to match a gzip Accept-Encoding
+ aepattern = re.compile(r"""
+ \s* ([^\s;]+) \s* #content-coding
+ (;\s* q \s*=\s* ([0-9\.]+))? #q
+ """, re.VERBOSE | re.IGNORECASE)
+
+ def accept_encodings(self):
+ r = {}
+ ae = self.headers.get("Accept-Encoding", "")
+ for e in ae.split(","):
+ match = self.aepattern.match(e)
+ if match:
+ v = match.group(3)
+ v = float(v) if v else 1.0
+ r[match.group(1)] = v
+ return r
+
+ def is_rpc_path_valid(self):
+ if self.rpc_paths:
+ return self.path in self.rpc_paths
+ else:
+ # If .rpc_paths is empty, just assume all paths are legal
+ return True
+
+ def do_POST(self):
+ """Handles the HTTP POST request.
+
+ Attempts to interpret all HTTP POST requests as XML-RPC calls,
+ which are forwarded to the server's _dispatch method for handling.
+ """
+
+ # Check that the path is legal
+ if not self.is_rpc_path_valid():
+ self.report_404()
+ return
+
+ try:
+ # Get arguments by reading body of request.
+ # We read this in chunks to avoid straining
+ # socket.read(); around the 10 or 15Mb mark, some platforms
+ # begin to have problems (bug #792570).
+ max_chunk_size = 10*1024*1024
+ size_remaining = int(self.headers["content-length"])
+ L = []
+ while size_remaining:
+ chunk_size = min(size_remaining, max_chunk_size)
+ chunk = self.rfile.read(chunk_size)
+ if not chunk:
+ break
+ L.append(chunk)
+ size_remaining -= len(L[-1])
+ data = b''.join(L)
+
+ data = self.decode_request_content(data)
+ if data is None:
+ return #response has been sent
+
+ # In previous versions of SimpleXMLRPCServer, _dispatch
+ # could be overridden in this class, instead of in
+ # SimpleXMLRPCDispatcher. To maintain backwards compatibility,
+ # check to see if a subclass implements _dispatch and dispatch
+ # using that method if present.
+ response = self.server._marshaled_dispatch(
+ data, getattr(self, '_dispatch', None), self.path
+ )
+ except Exception as e: # This should only happen if the module is buggy
+ # internal error, report as HTTP server error
+ self.send_response(500)
+
+ # Send information about the exception if requested
+ if hasattr(self.server, '_send_traceback_header') and \
+ self.server._send_traceback_header:
+ self.send_header("X-exception", str(e))
+ trace = traceback.format_exc()
+ trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII')
+ self.send_header("X-traceback", trace)
+
+ self.send_header("Content-length", "0")
+ self.end_headers()
+ else:
+ self.send_response(200)
+ self.send_header("Content-type", "text/xml")
+ if self.encode_threshold is not None:
+ if len(response) > self.encode_threshold:
+ q = self.accept_encodings().get("gzip", 0)
+ if q:
+ try:
+ response = gzip_encode(response)
+ self.send_header("Content-Encoding", "gzip")
+ except NotImplementedError:
+ pass
+ self.send_header("Content-length", str(len(response)))
+ self.end_headers()
+ self.wfile.write(response)
+
+ def decode_request_content(self, data):
+ #support gzip encoding of request
+ encoding = self.headers.get("content-encoding", "identity").lower()
+ if encoding == "identity":
+ return data
+ if encoding == "gzip":
+ try:
+ return gzip_decode(data)
+ except NotImplementedError:
+ self.send_response(501, "encoding %r not supported" % encoding)
+ except ValueError:
+ self.send_response(400, "error decoding gzip content")
+ else:
+ self.send_response(501, "encoding %r not supported" % encoding)
+ self.send_header("Content-length", "0")
+ self.end_headers()
+
+ def report_404 (self):
+ # Report a 404 error
+ self.send_response(404)
+ response = b'No such page'
+ self.send_header("Content-type", "text/plain")
+ self.send_header("Content-length", str(len(response)))
+ self.end_headers()
+ self.wfile.write(response)
+
+ def log_request(self, code='-', size='-'):
+ """Selectively log an accepted request."""
+
+ if self.server.logRequests:
+ BaseHTTPRequestHandler.log_request(self, code, size)
+
+class SimpleXMLRPCServer(socketserver.TCPServer,
+ SimpleXMLRPCDispatcher):
+ """Simple XML-RPC server.
+
+ Simple XML-RPC server that allows functions and a single instance
+ to be installed to handle requests. The default implementation
+ attempts to dispatch XML-RPC calls to the functions or instance
+ installed in the server. Override the _dispatch method inherited
+ from SimpleXMLRPCDispatcher to change this behavior.
+ """
+
+ allow_reuse_address = True
+
+ # Warning: this is for debugging purposes only! Never set this to True in
+ # production code, as will be sending out sensitive information (exception
+ # and stack trace details) when exceptions are raised inside
+ # SimpleXMLRPCRequestHandler.do_POST
+ _send_traceback_header = False
+
+ def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler,
+ logRequests=True, allow_none=False, encoding=None,
+ bind_and_activate=True, use_builtin_types=False):
+ self.logRequests = logRequests
+
+ SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types)
+ socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate)
+
+ # [Bug #1222790] If possible, set close-on-exec flag; if a
+ # method spawns a subprocess, the subprocess shouldn't have
+ # the listening socket open.
+ if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'):
+ flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD)
+ flags |= fcntl.FD_CLOEXEC
+ fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags)
+
+class MultiPathXMLRPCServer(SimpleXMLRPCServer):
+ """Multipath XML-RPC Server
+ This specialization of SimpleXMLRPCServer allows the user to create
+ multiple Dispatcher instances and assign them to different
+ HTTP request paths. This makes it possible to run two or more
+ 'virtual XML-RPC servers' at the same port.
+ Make sure that the requestHandler accepts the paths in question.
+ """
+ def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler,
+ logRequests=True, allow_none=False, encoding=None,
+ bind_and_activate=True, use_builtin_types=False):
+
+ SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none,
+ encoding, bind_and_activate, use_builtin_types)
+ self.dispatchers = {}
+ self.allow_none = allow_none
+ self.encoding = encoding or 'utf-8'
+
+ def add_dispatcher(self, path, dispatcher):
+ self.dispatchers[path] = dispatcher
+ return dispatcher
+
+ def get_dispatcher(self, path):
+ return self.dispatchers[path]
+
+ def _marshaled_dispatch(self, data, dispatch_method = None, path = None):
+ try:
+ response = self.dispatchers[path]._marshaled_dispatch(
+ data, dispatch_method, path)
+ except:
+ # report low level exception back to server
+ # (each dispatcher should have handled their own
+ # exceptions)
+ exc_type, exc_value = sys.exc_info()[:2]
+ response = dumps(
+ Fault(1, "%s:%s" % (exc_type, exc_value)),
+ encoding=self.encoding, allow_none=self.allow_none)
+ response = response.encode(self.encoding)
+ return response
+
+class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher):
+ """Simple handler for XML-RPC data passed through CGI."""
+
+ def __init__(self, allow_none=False, encoding=None, use_builtin_types=False):
+ SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types)
+
+ def handle_xmlrpc(self, request_text):
+ """Handle a single XML-RPC request"""
+
+ response = self._marshaled_dispatch(request_text)
+
+ print('Content-Type: text/xml')
+ print('Content-Length: %d' % len(response))
+ print()
+ sys.stdout.flush()
+ sys.stdout.buffer.write(response)
+ sys.stdout.buffer.flush()
+
+ def handle_get(self):
+ """Handle a single HTTP GET request.
+
+ Default implementation indicates an error because
+ XML-RPC uses the POST method.
+ """
+
+ code = 400
+ message, explain = BaseHTTPRequestHandler.responses[code]
+
+ response = http_server.DEFAULT_ERROR_MESSAGE % \
+ {
+ 'code' : code,
+ 'message' : message,
+ 'explain' : explain
+ }
+ response = response.encode('utf-8')
+ print('Status: %d %s' % (code, message))
+ print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE)
+ print('Content-Length: %d' % len(response))
+ print()
+ sys.stdout.flush()
+ sys.stdout.buffer.write(response)
+ sys.stdout.buffer.flush()
+
+ def handle_request(self, request_text=None):
+ """Handle a single XML-RPC request passed through a CGI post method.
+
+ If no XML data is given then it is read from stdin. The resulting
+ XML-RPC response is printed to stdout along with the correct HTTP
+ headers.
+ """
+
+ if request_text is None and \
+ os.environ.get('REQUEST_METHOD', None) == 'GET':
+ self.handle_get()
+ else:
+ # POST data is normally available through stdin
+ try:
+ length = int(os.environ.get('CONTENT_LENGTH', None))
+ except (ValueError, TypeError):
+ length = -1
+ if request_text is None:
+ request_text = sys.stdin.read(length)
+
+ self.handle_xmlrpc(request_text)
+
+
+# -----------------------------------------------------------------------------
+# Self documenting XML-RPC Server.
+
+class ServerHTMLDoc(pydoc.HTMLDoc):
+ """Class used to generate pydoc HTML document for a server"""
+
+ def markup(self, text, escape=None, funcs={}, classes={}, methods={}):
+ """Mark up some plain text, given a context of symbols to look for.
+ Each context dictionary maps object names to anchor names."""
+ escape = escape or self.escape
+ results = []
+ here = 0
+
+ # XXX Note that this regular expression does not allow for the
+ # hyperlinking of arbitrary strings being used as method
+ # names. Only methods with names consisting of word characters
+ # and '.'s are hyperlinked.
+ pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|'
+ r'RFC[- ]?(\d+)|'
+ r'PEP[- ]?(\d+)|'
+ r'(self\.)?((?:\w|\.)+))\b')
+ while 1:
+ match = pattern.search(text, here)
+ if not match: break
+ start, end = match.span()
+ results.append(escape(text[here:start]))
+
+ all, scheme, rfc, pep, selfdot, name = match.groups()
+ if scheme:
+ url = escape(all).replace('"', '&quot;')
+ results.append('<a href="%s">%s</a>' % (url, url))
+ elif rfc:
+ url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc)
+ results.append('<a href="%s">%s</a>' % (url, escape(all)))
+ elif pep:
+ url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep)
+ results.append('<a href="%s">%s</a>' % (url, escape(all)))
+ elif text[end:end+1] == '(':
+ results.append(self.namelink(name, methods, funcs, classes))
+ elif selfdot:
+ results.append('self.<strong>%s</strong>' % name)
+ else:
+ results.append(self.namelink(name, classes))
+ here = end
+ results.append(escape(text[here:]))
+ return ''.join(results)
+
+ def docroutine(self, object, name, mod=None,
+ funcs={}, classes={}, methods={}, cl=None):
+ """Produce HTML documentation for a function or method object."""
+
+ anchor = (cl and cl.__name__ or '') + '-' + name
+ note = ''
+
+ title = '<a name="%s"><strong>%s</strong></a>' % (
+ self.escape(anchor), self.escape(name))
+
+ if inspect.ismethod(object):
+ args = inspect.getfullargspec(object)
+ # exclude the argument bound to the instance, it will be
+ # confusing to the non-Python user
+ argspec = inspect.formatargspec (
+ args.args[1:],
+ args.varargs,
+ args.varkw,
+ args.defaults,
+ annotations=args.annotations,
+ formatvalue=self.formatvalue
+ )
+ elif inspect.isfunction(object):
+ args = inspect.getfullargspec(object)
+ argspec = inspect.formatargspec(
+ args.args, args.varargs, args.varkw, args.defaults,
+ annotations=args.annotations,
+ formatvalue=self.formatvalue)
+ else:
+ argspec = '(...)'
+
+ if isinstance(object, tuple):
+ argspec = object[0] or argspec
+ docstring = object[1] or ""
+ else:
+ docstring = pydoc.getdoc(object)
+
+ decl = title + argspec + (note and self.grey(
+ '<font face="helvetica, arial">%s</font>' % note))
+
+ doc = self.markup(
+ docstring, self.preformat, funcs, classes, methods)
+ doc = doc and '<dd><tt>%s</tt></dd>' % doc
+ return '<dl><dt>%s</dt>%s</dl>\n' % (decl, doc)
+
+ def docserver(self, server_name, package_documentation, methods):
+ """Produce HTML documentation for an XML-RPC server."""
+
+ fdict = {}
+ for key, value in methods.items():
+ fdict[key] = '#-' + key
+ fdict[value] = fdict[key]
+
+ server_name = self.escape(server_name)
+ head = '<big><big><strong>%s</strong></big></big>' % server_name
+ result = self.heading(head, '#ffffff', '#7799ee')
+
+ doc = self.markup(package_documentation, self.preformat, fdict)
+ doc = doc and '<tt>%s</tt>' % doc
+ result = result + '<p>%s</p>\n' % doc
+
+ contents = []
+ method_items = sorted(methods.items())
+ for key, value in method_items:
+ contents.append(self.docroutine(value, key, funcs=fdict))
+ result = result + self.bigsection(
+ 'Methods', '#ffffff', '#eeaa77', ''.join(contents))
+
+ return result
+
+class XMLRPCDocGenerator(object):
+ """Generates documentation for an XML-RPC server.
+
+ This class is designed as mix-in and should not
+ be constructed directly.
+ """
+
+ def __init__(self):
+ # setup variables used for HTML documentation
+ self.server_name = 'XML-RPC Server Documentation'
+ self.server_documentation = \
+ "This server exports the following methods through the XML-RPC "\
+ "protocol."
+ self.server_title = 'XML-RPC Server Documentation'
+
+ def set_server_title(self, server_title):
+ """Set the HTML title of the generated server documentation"""
+
+ self.server_title = server_title
+
+ def set_server_name(self, server_name):
+ """Set the name of the generated HTML server documentation"""
+
+ self.server_name = server_name
+
+ def set_server_documentation(self, server_documentation):
+ """Set the documentation string for the entire server."""
+
+ self.server_documentation = server_documentation
+
+ def generate_html_documentation(self):
+ """generate_html_documentation() => html documentation for the server
+
+ Generates HTML documentation for the server using introspection for
+ installed functions and instances that do not implement the
+ _dispatch method. Alternatively, instances can choose to implement
+ the _get_method_argstring(method_name) method to provide the
+ argument string used in the documentation and the
+ _methodHelp(method_name) method to provide the help text used
+ in the documentation."""
+
+ methods = {}
+
+ for method_name in self.system_listMethods():
+ if method_name in self.funcs:
+ method = self.funcs[method_name]
+ elif self.instance is not None:
+ method_info = [None, None] # argspec, documentation
+ if hasattr(self.instance, '_get_method_argstring'):
+ method_info[0] = self.instance._get_method_argstring(method_name)
+ if hasattr(self.instance, '_methodHelp'):
+ method_info[1] = self.instance._methodHelp(method_name)
+
+ method_info = tuple(method_info)
+ if method_info != (None, None):
+ method = method_info
+ elif not hasattr(self.instance, '_dispatch'):
+ try:
+ method = resolve_dotted_attribute(
+ self.instance,
+ method_name
+ )
+ except AttributeError:
+ method = method_info
+ else:
+ method = method_info
+ else:
+ assert 0, "Could not find method in self.functions and no "\
+ "instance installed"
+
+ methods[method_name] = method
+
+ documenter = ServerHTMLDoc()
+ documentation = documenter.docserver(
+ self.server_name,
+ self.server_documentation,
+ methods
+ )
+
+ return documenter.page(self.server_title, documentation)
+
+class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
+ """XML-RPC and documentation request handler class.
+
+ Handles all HTTP POST requests and attempts to decode them as
+ XML-RPC requests.
+
+ Handles all HTTP GET requests and interprets them as requests
+ for documentation.
+ """
+
+ def do_GET(self):
+ """Handles the HTTP GET request.
+
+ Interpret all HTTP GET requests as requests for server
+ documentation.
+ """
+ # Check that the path is legal
+ if not self.is_rpc_path_valid():
+ self.report_404()
+ return
+
+ response = self.server.generate_html_documentation().encode('utf-8')
+ self.send_response(200)
+ self.send_header("Content-type", "text/html")
+ self.send_header("Content-length", str(len(response)))
+ self.end_headers()
+ self.wfile.write(response)
+
+class DocXMLRPCServer( SimpleXMLRPCServer,
+ XMLRPCDocGenerator):
+ """XML-RPC and HTML documentation server.
+
+ Adds the ability to serve server documentation to the capabilities
+ of SimpleXMLRPCServer.
+ """
+
+ def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler,
+ logRequests=True, allow_none=False, encoding=None,
+ bind_and_activate=True, use_builtin_types=False):
+ SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests,
+ allow_none, encoding, bind_and_activate,
+ use_builtin_types)
+ XMLRPCDocGenerator.__init__(self)
+
+class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler,
+ XMLRPCDocGenerator):
+ """Handler for XML-RPC data and documentation requests passed through
+ CGI"""
+
+ def handle_get(self):
+ """Handles the HTTP GET request.
+
+ Interpret all HTTP GET requests as requests for server
+ documentation.
+ """
+
+ response = self.generate_html_documentation().encode('utf-8')
+
+ print('Content-Type: text/html')
+ print('Content-Length: %d' % len(response))
+ print()
+ sys.stdout.flush()
+ sys.stdout.buffer.write(response)
+ sys.stdout.buffer.flush()
+
+ def __init__(self):
+ CGIXMLRPCRequestHandler.__init__(self)
+ XMLRPCDocGenerator.__init__(self)
+
+
+if __name__ == '__main__':
+ import datetime
+
+ class ExampleService:
+ def getData(self):
+ return '42'
+
+ class currentTime:
+ @staticmethod
+ def getCurrentTime():
+ return datetime.datetime.now()
+
+ server = SimpleXMLRPCServer(("localhost", 8000))
+ server.register_function(pow)
+ server.register_function(lambda x,y: x+y, 'add')
+ server.register_instance(ExampleService(), allow_dotted_names=True)
+ server.register_multicall_functions()
+ print('Serving XML-RPC on localhost port 8000')
+ print('It is advisable to run this example server within a secure, closed network.')
+ try:
+ server.serve_forever()
+ except KeyboardInterrupt:
+ print("\nKeyboard interrupt received, exiting.")
+ server.server_close()
+ sys.exit(0)
diff --git a/contrib/python/future/future/builtins/__init__.py b/contrib/python/future/future/builtins/__init__.py
index 5485a2a616..8bc1649d2f 100644
--- a/contrib/python/future/future/builtins/__init__.py
+++ b/contrib/python/future/future/builtins/__init__.py
@@ -1,51 +1,51 @@
-"""
-A module that brings in equivalents of the new and modified Python 3
-builtins into Py2. Has no effect on Py3.
-
-See the docs `here <http://python-future.org/what-else.html>`_
-(``docs/what-else.rst``) for more information.
-
-"""
-
-from future.builtins.iterators import (filter, map, zip)
-# The isinstance import is no longer needed. We provide it only for
-# backward-compatibility with future v0.8.2. It will be removed in future v1.0.
-from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
+"""
+A module that brings in equivalents of the new and modified Python 3
+builtins into Py2. Has no effect on Py3.
+
+See the docs `here <http://python-future.org/what-else.html>`_
+(``docs/what-else.rst``) for more information.
+
+"""
+
+from future.builtins.iterators import (filter, map, zip)
+# The isinstance import is no longer needed. We provide it only for
+# backward-compatibility with future v0.8.2. It will be removed in future v1.0.
+from future.builtins.misc import (ascii, chr, hex, input, isinstance, next,
oct, open, pow, round, super, max, min)
-from future.utils import PY3
-
-if PY3:
- import builtins
- bytes = builtins.bytes
- dict = builtins.dict
- int = builtins.int
- list = builtins.list
- object = builtins.object
- range = builtins.range
- str = builtins.str
- __all__ = []
-else:
- from future.types import (newbytes as bytes,
- newdict as dict,
- newint as int,
- newlist as list,
- newobject as object,
- newrange as range,
- newstr as str)
-from future import utils
-
-
-if not utils.PY3:
- # We only import names that shadow the builtins on Py2. No other namespace
- # pollution on Py2.
-
- # Only shadow builtins on Py2; no new names
+from future.utils import PY3
+
+if PY3:
+ import builtins
+ bytes = builtins.bytes
+ dict = builtins.dict
+ int = builtins.int
+ list = builtins.list
+ object = builtins.object
+ range = builtins.range
+ str = builtins.str
+ __all__ = []
+else:
+ from future.types import (newbytes as bytes,
+ newdict as dict,
+ newint as int,
+ newlist as list,
+ newobject as object,
+ newrange as range,
+ newstr as str)
+from future import utils
+
+
+if not utils.PY3:
+ # We only import names that shadow the builtins on Py2. No other namespace
+ # pollution on Py2.
+
+ # Only shadow builtins on Py2; no new names
__all__ = ['filter', 'map', 'zip',
- 'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
- 'round', 'super',
+ 'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow',
+ 'round', 'super',
'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min'
- ]
-
-else:
- # No namespace pollution on Py3
- __all__ = []
+ ]
+
+else:
+ # No namespace pollution on Py3
+ __all__ = []
diff --git a/contrib/python/future/future/builtins/disabled.py b/contrib/python/future/future/builtins/disabled.py
index 77bb7796bf..f6d6ea9b80 100644
--- a/contrib/python/future/future/builtins/disabled.py
+++ b/contrib/python/future/future/builtins/disabled.py
@@ -1,66 +1,66 @@
-"""
-This disables builtin functions (and one exception class) which are
-removed from Python 3.3.
-
-This module is designed to be used like this::
-
- from future.builtins.disabled import *
-
-This disables the following obsolete Py2 builtin functions::
-
- apply, cmp, coerce, execfile, file, input, long,
- raw_input, reduce, reload, unicode, xrange
-
-We don't hack __builtin__, which is very fragile because it contaminates
-imported modules too. Instead, we just create new functions with
-the same names as the obsolete builtins from Python 2 which raise
-NameError exceptions when called.
-
-Note that both ``input()`` and ``raw_input()`` are among the disabled
-functions (in this module). Although ``input()`` exists as a builtin in
-Python 3, the Python 2 ``input()`` builtin is unsafe to use because it
-can lead to shell injection. Therefore we shadow it by default upon ``from
-future.builtins.disabled import *``, in case someone forgets to import our
-replacement ``input()`` somehow and expects Python 3 semantics.
-
-See the ``future.builtins.misc`` module for a working version of
-``input`` with Python 3 semantics.
-
-(Note that callable() is not among the functions disabled; this was
-reintroduced into Python 3.2.)
-
-This exception class is also disabled:
-
- StandardError
-
-"""
-
-from __future__ import division, absolute_import, print_function
-
-from future import utils
-
-
-OBSOLETE_BUILTINS = ['apply', 'chr', 'cmp', 'coerce', 'execfile', 'file',
- 'input', 'long', 'raw_input', 'reduce', 'reload',
- 'unicode', 'xrange', 'StandardError']
-
-
-def disabled_function(name):
- '''
- Returns a function that cannot be called
- '''
- def disabled(*args, **kwargs):
- '''
- A function disabled by the ``future`` module. This function is
- no longer a builtin in Python 3.
- '''
- raise NameError('obsolete Python 2 builtin {0} is disabled'.format(name))
- return disabled
-
-
-if not utils.PY3:
- for fname in OBSOLETE_BUILTINS:
- locals()[fname] = disabled_function(fname)
- __all__ = OBSOLETE_BUILTINS
-else:
- __all__ = []
+"""
+This disables builtin functions (and one exception class) which are
+removed from Python 3.3.
+
+This module is designed to be used like this::
+
+ from future.builtins.disabled import *
+
+This disables the following obsolete Py2 builtin functions::
+
+ apply, cmp, coerce, execfile, file, input, long,
+ raw_input, reduce, reload, unicode, xrange
+
+We don't hack __builtin__, which is very fragile because it contaminates
+imported modules too. Instead, we just create new functions with
+the same names as the obsolete builtins from Python 2 which raise
+NameError exceptions when called.
+
+Note that both ``input()`` and ``raw_input()`` are among the disabled
+functions (in this module). Although ``input()`` exists as a builtin in
+Python 3, the Python 2 ``input()`` builtin is unsafe to use because it
+can lead to shell injection. Therefore we shadow it by default upon ``from
+future.builtins.disabled import *``, in case someone forgets to import our
+replacement ``input()`` somehow and expects Python 3 semantics.
+
+See the ``future.builtins.misc`` module for a working version of
+``input`` with Python 3 semantics.
+
+(Note that callable() is not among the functions disabled; this was
+reintroduced into Python 3.2.)
+
+This exception class is also disabled:
+
+ StandardError
+
+"""
+
+from __future__ import division, absolute_import, print_function
+
+from future import utils
+
+
+OBSOLETE_BUILTINS = ['apply', 'chr', 'cmp', 'coerce', 'execfile', 'file',
+ 'input', 'long', 'raw_input', 'reduce', 'reload',
+ 'unicode', 'xrange', 'StandardError']
+
+
+def disabled_function(name):
+ '''
+ Returns a function that cannot be called
+ '''
+ def disabled(*args, **kwargs):
+ '''
+ A function disabled by the ``future`` module. This function is
+ no longer a builtin in Python 3.
+ '''
+ raise NameError('obsolete Python 2 builtin {0} is disabled'.format(name))
+ return disabled
+
+
+if not utils.PY3:
+ for fname in OBSOLETE_BUILTINS:
+ locals()[fname] = disabled_function(fname)
+ __all__ = OBSOLETE_BUILTINS
+else:
+ __all__ = []
diff --git a/contrib/python/future/future/builtins/iterators.py b/contrib/python/future/future/builtins/iterators.py
index 362d8529b6..dff651e0f4 100644
--- a/contrib/python/future/future/builtins/iterators.py
+++ b/contrib/python/future/future/builtins/iterators.py
@@ -1,52 +1,52 @@
-"""
-This module is designed to be used as follows::
-
- from future.builtins.iterators import *
-
-And then, for example::
-
- for i in range(10**15):
- pass
-
- for (a, b) in zip(range(10**15), range(-10**15, 0)):
- pass
-
-Note that this is standard Python 3 code, plus some imports that do
-nothing on Python 3.
-
-The iterators this brings in are::
-
-- ``range``
-- ``filter``
-- ``map``
-- ``zip``
-
-On Python 2, ``range`` is a pure-Python backport of Python 3's ``range``
-iterator with slicing support. The other iterators (``filter``, ``map``,
-``zip``) are from the ``itertools`` module on Python 2. On Python 3 these
-are available in the module namespace but not exported for * imports via
-__all__ (zero no namespace pollution).
-
-Note that these are also available in the standard library
-``future_builtins`` module on Python 2 -- but not Python 3, so using
-the standard library version is not portable, nor anywhere near complete.
-"""
-
-from __future__ import division, absolute_import, print_function
-
-import itertools
-from future import utils
-
-if not utils.PY3:
- filter = itertools.ifilter
- map = itertools.imap
- from future.types import newrange as range
- zip = itertools.izip
- __all__ = ['filter', 'map', 'range', 'zip']
-else:
- import builtins
- filter = builtins.filter
- map = builtins.map
- range = builtins.range
- zip = builtins.zip
- __all__ = []
+"""
+This module is designed to be used as follows::
+
+ from future.builtins.iterators import *
+
+And then, for example::
+
+ for i in range(10**15):
+ pass
+
+ for (a, b) in zip(range(10**15), range(-10**15, 0)):
+ pass
+
+Note that this is standard Python 3 code, plus some imports that do
+nothing on Python 3.
+
+The iterators this brings in are::
+
+- ``range``
+- ``filter``
+- ``map``
+- ``zip``
+
+On Python 2, ``range`` is a pure-Python backport of Python 3's ``range``
+iterator with slicing support. The other iterators (``filter``, ``map``,
+``zip``) are from the ``itertools`` module on Python 2. On Python 3 these
+are available in the module namespace but not exported for * imports via
+__all__ (zero no namespace pollution).
+
+Note that these are also available in the standard library
+``future_builtins`` module on Python 2 -- but not Python 3, so using
+the standard library version is not portable, nor anywhere near complete.
+"""
+
+from __future__ import division, absolute_import, print_function
+
+import itertools
+from future import utils
+
+if not utils.PY3:
+ filter = itertools.ifilter
+ map = itertools.imap
+ from future.types import newrange as range
+ zip = itertools.izip
+ __all__ = ['filter', 'map', 'range', 'zip']
+else:
+ import builtins
+ filter = builtins.filter
+ map = builtins.map
+ range = builtins.range
+ zip = builtins.zip
+ __all__ = []
diff --git a/contrib/python/future/future/builtins/misc.py b/contrib/python/future/future/builtins/misc.py
index 089839ebee..f86ce5f342 100644
--- a/contrib/python/future/future/builtins/misc.py
+++ b/contrib/python/future/future/builtins/misc.py
@@ -1,119 +1,119 @@
-"""
-A module that brings in equivalents of various modified Python 3 builtins
-into Py2. Has no effect on Py3.
-
-The builtin functions are:
-
-- ``ascii`` (from Py2's future_builtins module)
-- ``hex`` (from Py2's future_builtins module)
-- ``oct`` (from Py2's future_builtins module)
-- ``chr`` (equivalent to ``unichr`` on Py2)
-- ``input`` (equivalent to ``raw_input`` on Py2)
-- ``next`` (calls ``__next__`` if it exists, else ``next`` method)
-- ``open`` (equivalent to io.open on Py2)
-- ``super`` (backport of Py3's magic zero-argument super() function
-- ``round`` (new "Banker's Rounding" behaviour from Py3)
+"""
+A module that brings in equivalents of various modified Python 3 builtins
+into Py2. Has no effect on Py3.
+
+The builtin functions are:
+
+- ``ascii`` (from Py2's future_builtins module)
+- ``hex`` (from Py2's future_builtins module)
+- ``oct`` (from Py2's future_builtins module)
+- ``chr`` (equivalent to ``unichr`` on Py2)
+- ``input`` (equivalent to ``raw_input`` on Py2)
+- ``next`` (calls ``__next__`` if it exists, else ``next`` method)
+- ``open`` (equivalent to io.open on Py2)
+- ``super`` (backport of Py3's magic zero-argument super() function
+- ``round`` (new "Banker's Rounding" behaviour from Py3)
- ``max`` (new default option from Py3.4)
- ``min`` (new default option from Py3.4)
-
-``isinstance`` is also currently exported for backwards compatibility
-with v0.8.2, although this has been deprecated since v0.9.
-
-
-input()
--------
-Like the new ``input()`` function from Python 3 (without eval()), except
-that it returns bytes. Equivalent to Python 2's ``raw_input()``.
-
-Warning: By default, importing this module *removes* the old Python 2
-input() function entirely from ``__builtin__`` for safety. This is
-because forgetting to import the new ``input`` from ``future`` might
-otherwise lead to a security vulnerability (shell injection) on Python 2.
-
-To restore it, you can retrieve it yourself from
-``__builtin__._old_input``.
-
-Fortunately, ``input()`` seems to be seldom used in the wild in Python
-2...
-
-"""
-
-from future import utils
-
-
-if utils.PY2:
- from io import open
- from future_builtins import ascii, oct, hex
- from __builtin__ import unichr as chr, pow as _builtin_pow
- import __builtin__
-
- # Only for backward compatibility with future v0.8.2:
- isinstance = __builtin__.isinstance
-
- # Warning: Python 2's input() is unsafe and MUST not be able to be used
- # accidentally by someone who expects Python 3 semantics but forgets
- # to import it on Python 2. Versions of ``future`` prior to 0.11
- # deleted it from __builtin__. Now we keep in __builtin__ but shadow
- # the name like all others. Just be sure to import ``input``.
-
- input = raw_input
-
- from future.builtins.newnext import newnext as next
- from future.builtins.newround import newround as round
- from future.builtins.newsuper import newsuper as super
+
+``isinstance`` is also currently exported for backwards compatibility
+with v0.8.2, although this has been deprecated since v0.9.
+
+
+input()
+-------
+Like the new ``input()`` function from Python 3 (without eval()), except
+that it returns bytes. Equivalent to Python 2's ``raw_input()``.
+
+Warning: By default, importing this module *removes* the old Python 2
+input() function entirely from ``__builtin__`` for safety. This is
+because forgetting to import the new ``input`` from ``future`` might
+otherwise lead to a security vulnerability (shell injection) on Python 2.
+
+To restore it, you can retrieve it yourself from
+``__builtin__._old_input``.
+
+Fortunately, ``input()`` seems to be seldom used in the wild in Python
+2...
+
+"""
+
+from future import utils
+
+
+if utils.PY2:
+ from io import open
+ from future_builtins import ascii, oct, hex
+ from __builtin__ import unichr as chr, pow as _builtin_pow
+ import __builtin__
+
+ # Only for backward compatibility with future v0.8.2:
+ isinstance = __builtin__.isinstance
+
+ # Warning: Python 2's input() is unsafe and MUST not be able to be used
+ # accidentally by someone who expects Python 3 semantics but forgets
+ # to import it on Python 2. Versions of ``future`` prior to 0.11
+ # deleted it from __builtin__. Now we keep in __builtin__ but shadow
+ # the name like all others. Just be sure to import ``input``.
+
+ input = raw_input
+
+ from future.builtins.newnext import newnext as next
+ from future.builtins.newround import newround as round
+ from future.builtins.newsuper import newsuper as super
from future.builtins.new_min_max import newmax as max
from future.builtins.new_min_max import newmin as min
- from future.types.newint import newint
-
- _SENTINEL = object()
-
- def pow(x, y, z=_SENTINEL):
- """
- pow(x, y[, z]) -> number
-
- With two arguments, equivalent to x**y. With three arguments,
- equivalent to (x**y) % z, but may be more efficient (e.g. for ints).
- """
- # Handle newints
- if isinstance(x, newint):
- x = long(x)
- if isinstance(y, newint):
- y = long(y)
- if isinstance(z, newint):
- z = long(z)
-
- try:
- if z == _SENTINEL:
- return _builtin_pow(x, y)
- else:
- return _builtin_pow(x, y, z)
- except ValueError:
- if z == _SENTINEL:
- return _builtin_pow(x+0j, y)
- else:
- return _builtin_pow(x+0j, y, z)
-
-
- # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
- # callable = __builtin__.callable
-
- __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
+ from future.types.newint import newint
+
+ _SENTINEL = object()
+
+ def pow(x, y, z=_SENTINEL):
+ """
+ pow(x, y[, z]) -> number
+
+ With two arguments, equivalent to x**y. With three arguments,
+ equivalent to (x**y) % z, but may be more efficient (e.g. for ints).
+ """
+ # Handle newints
+ if isinstance(x, newint):
+ x = long(x)
+ if isinstance(y, newint):
+ y = long(y)
+ if isinstance(z, newint):
+ z = long(z)
+
+ try:
+ if z == _SENTINEL:
+ return _builtin_pow(x, y)
+ else:
+ return _builtin_pow(x, y, z)
+ except ValueError:
+ if z == _SENTINEL:
+ return _builtin_pow(x+0j, y)
+ else:
+ return _builtin_pow(x+0j, y, z)
+
+
+ # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this:
+ # callable = __builtin__.callable
+
+ __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct',
'open', 'pow', 'round', 'super', 'max', 'min']
-
-else:
- import builtins
- ascii = builtins.ascii
- chr = builtins.chr
- hex = builtins.hex
- input = builtins.input
- next = builtins.next
- # Only for backward compatibility with future v0.8.2:
- isinstance = builtins.isinstance
- oct = builtins.oct
- open = builtins.open
- pow = builtins.pow
- round = builtins.round
- super = builtins.super
+
+else:
+ import builtins
+ ascii = builtins.ascii
+ chr = builtins.chr
+ hex = builtins.hex
+ input = builtins.input
+ next = builtins.next
+ # Only for backward compatibility with future v0.8.2:
+ isinstance = builtins.isinstance
+ oct = builtins.oct
+ open = builtins.open
+ pow = builtins.pow
+ round = builtins.round
+ super = builtins.super
if utils.PY34_PLUS:
max = builtins.max
min = builtins.min
@@ -122,14 +122,14 @@ else:
from future.builtins.new_min_max import newmax as max
from future.builtins.new_min_max import newmin as min
__all__ = ['min', 'max']
-
- # The callable() function was removed from Py3.0 and 3.1 and
- # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
- # did, we'd add this:
- # try:
- # callable = builtins.callable
- # except AttributeError:
- # # Definition from Pandas
- # def callable(obj):
- # return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
- # __all__.append('callable')
+
+ # The callable() function was removed from Py3.0 and 3.1 and
+ # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever
+ # did, we'd add this:
+ # try:
+ # callable = builtins.callable
+ # except AttributeError:
+ # # Definition from Pandas
+ # def callable(obj):
+ # return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+ # __all__.append('callable')
diff --git a/contrib/python/future/future/builtins/newnext.py b/contrib/python/future/future/builtins/newnext.py
index 5ff9267d6c..097638ac11 100644
--- a/contrib/python/future/future/builtins/newnext.py
+++ b/contrib/python/future/future/builtins/newnext.py
@@ -1,70 +1,70 @@
-'''
-This module provides a newnext() function in Python 2 that mimics the
-behaviour of ``next()`` in Python 3, falling back to Python 2's behaviour for
-compatibility if this fails.
-
-``newnext(iterator)`` calls the iterator's ``__next__()`` method if it exists. If this
-doesn't exist, it falls back to calling a ``next()`` method.
-
-For example:
-
- >>> class Odds(object):
- ... def __init__(self, start=1):
- ... self.value = start - 2
- ... def __next__(self): # note the Py3 interface
- ... self.value += 2
- ... return self.value
- ... def __iter__(self):
- ... return self
- ...
- >>> iterator = Odds()
- >>> next(iterator)
- 1
- >>> next(iterator)
- 3
-
-If you are defining your own custom iterator class as above, it is preferable
-to explicitly decorate the class with the @implements_iterator decorator from
-``future.utils`` as follows:
-
- >>> @implements_iterator
- ... class Odds(object):
- ... # etc
- ... pass
-
-This next() function is primarily for consuming iterators defined in Python 3
-code elsewhere that we would like to run on Python 2 or 3.
-'''
-
-_builtin_next = next
-
-_SENTINEL = object()
-
-def newnext(iterator, default=_SENTINEL):
- """
- next(iterator[, default])
+'''
+This module provides a newnext() function in Python 2 that mimics the
+behaviour of ``next()`` in Python 3, falling back to Python 2's behaviour for
+compatibility if this fails.
- Return the next item from the iterator. If default is given and the iterator
- is exhausted, it is returned instead of raising StopIteration.
- """
-
- # args = []
- # if default is not _SENTINEL:
- # args.append(default)
- try:
- try:
- return iterator.__next__()
- except AttributeError:
- try:
- return iterator.next()
- except AttributeError:
- raise TypeError("'{0}' object is not an iterator".format(
- iterator.__class__.__name__))
- except StopIteration as e:
- if default is _SENTINEL:
- raise e
- else:
- return default
-
-
-__all__ = ['newnext']
+``newnext(iterator)`` calls the iterator's ``__next__()`` method if it exists. If this
+doesn't exist, it falls back to calling a ``next()`` method.
+
+For example:
+
+ >>> class Odds(object):
+ ... def __init__(self, start=1):
+ ... self.value = start - 2
+ ... def __next__(self): # note the Py3 interface
+ ... self.value += 2
+ ... return self.value
+ ... def __iter__(self):
+ ... return self
+ ...
+ >>> iterator = Odds()
+ >>> next(iterator)
+ 1
+ >>> next(iterator)
+ 3
+
+If you are defining your own custom iterator class as above, it is preferable
+to explicitly decorate the class with the @implements_iterator decorator from
+``future.utils`` as follows:
+
+ >>> @implements_iterator
+ ... class Odds(object):
+ ... # etc
+ ... pass
+
+This next() function is primarily for consuming iterators defined in Python 3
+code elsewhere that we would like to run on Python 2 or 3.
+'''
+
+_builtin_next = next
+
+_SENTINEL = object()
+
+def newnext(iterator, default=_SENTINEL):
+ """
+ next(iterator[, default])
+
+ Return the next item from the iterator. If default is given and the iterator
+ is exhausted, it is returned instead of raising StopIteration.
+ """
+
+ # args = []
+ # if default is not _SENTINEL:
+ # args.append(default)
+ try:
+ try:
+ return iterator.__next__()
+ except AttributeError:
+ try:
+ return iterator.next()
+ except AttributeError:
+ raise TypeError("'{0}' object is not an iterator".format(
+ iterator.__class__.__name__))
+ except StopIteration as e:
+ if default is _SENTINEL:
+ raise e
+ else:
+ return default
+
+
+__all__ = ['newnext']
diff --git a/contrib/python/future/future/builtins/newround.py b/contrib/python/future/future/builtins/newround.py
index 4e933e71f0..394a2c63c4 100644
--- a/contrib/python/future/future/builtins/newround.py
+++ b/contrib/python/future/future/builtins/newround.py
@@ -1,102 +1,102 @@
-"""
-``python-future``: pure Python implementation of Python 3 round().
-"""
-
-from future.utils import PYPY, PY26, bind_method
-
-# Use the decimal module for simplicity of implementation (and
-# hopefully correctness).
-from decimal import Decimal, ROUND_HALF_EVEN
-
-
-def newround(number, ndigits=None):
- """
- See Python 3 documentation: uses Banker's Rounding.
-
- Delegates to the __round__ method if for some reason this exists.
-
- If not, rounds a number to a given precision in decimal digits (default
- 0 digits). This returns an int when called with one argument,
- otherwise the same type as the number. ndigits may be negative.
-
- See the test_round method in future/tests/test_builtins.py for
- examples.
- """
- return_int = False
- if ndigits is None:
- return_int = True
- ndigits = 0
- if hasattr(number, '__round__'):
- return number.__round__(ndigits)
-
- if ndigits < 0:
- raise NotImplementedError('negative ndigits not supported yet')
- exponent = Decimal('10') ** (-ndigits)
-
- if PYPY:
- # Work around issue #24: round() breaks on PyPy with NumPy's types
- if 'numpy' in repr(type(number)):
- number = float(number)
-
+"""
+``python-future``: pure Python implementation of Python 3 round().
+"""
+
+from future.utils import PYPY, PY26, bind_method
+
+# Use the decimal module for simplicity of implementation (and
+# hopefully correctness).
+from decimal import Decimal, ROUND_HALF_EVEN
+
+
+def newround(number, ndigits=None):
+ """
+ See Python 3 documentation: uses Banker's Rounding.
+
+ Delegates to the __round__ method if for some reason this exists.
+
+ If not, rounds a number to a given precision in decimal digits (default
+ 0 digits). This returns an int when called with one argument,
+ otherwise the same type as the number. ndigits may be negative.
+
+ See the test_round method in future/tests/test_builtins.py for
+ examples.
+ """
+ return_int = False
+ if ndigits is None:
+ return_int = True
+ ndigits = 0
+ if hasattr(number, '__round__'):
+ return number.__round__(ndigits)
+
+ if ndigits < 0:
+ raise NotImplementedError('negative ndigits not supported yet')
+ exponent = Decimal('10') ** (-ndigits)
+
+ if PYPY:
+ # Work around issue #24: round() breaks on PyPy with NumPy's types
+ if 'numpy' in repr(type(number)):
+ number = float(number)
+
if isinstance(number, Decimal):
d = number
- else:
+ else:
if not PY26:
d = Decimal.from_float(number).quantize(exponent,
rounding=ROUND_HALF_EVEN)
else:
d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN)
-
- if return_int:
- return int(d)
- else:
- return float(d)
-
-
-### From Python 2.7's decimal.py. Only needed to support Py2.6:
-
-def from_float_26(f):
- """Converts a float to a decimal number, exactly.
-
- Note that Decimal.from_float(0.1) is not the same as Decimal('0.1').
- Since 0.1 is not exactly representable in binary floating point, the
- value is stored as the nearest representable value which is
- 0x1.999999999999ap-4. The exact equivalent of the value in decimal
- is 0.1000000000000000055511151231257827021181583404541015625.
-
- >>> Decimal.from_float(0.1)
- Decimal('0.1000000000000000055511151231257827021181583404541015625')
- >>> Decimal.from_float(float('nan'))
- Decimal('NaN')
- >>> Decimal.from_float(float('inf'))
- Decimal('Infinity')
- >>> Decimal.from_float(-float('inf'))
- Decimal('-Infinity')
- >>> Decimal.from_float(-0.0)
- Decimal('-0')
-
- """
- import math as _math
- from decimal import _dec_from_triple # only available on Py2.6 and Py2.7 (not 3.3)
-
- if isinstance(f, (int, long)): # handle integer inputs
- return Decimal(f)
- if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float
- return Decimal(repr(f))
- if _math.copysign(1.0, f) == 1.0:
- sign = 0
- else:
- sign = 1
- n, d = abs(f).as_integer_ratio()
- # int.bit_length() method doesn't exist on Py2.6:
- def bit_length(d):
- if d != 0:
- return len(bin(abs(d))) - 2
- else:
- return 0
- k = bit_length(d) - 1
- result = _dec_from_triple(sign, str(n*5**k), -k)
- return result
-
-
-__all__ = ['newround']
+
+ if return_int:
+ return int(d)
+ else:
+ return float(d)
+
+
+### From Python 2.7's decimal.py. Only needed to support Py2.6:
+
+def from_float_26(f):
+ """Converts a float to a decimal number, exactly.
+
+ Note that Decimal.from_float(0.1) is not the same as Decimal('0.1').
+ Since 0.1 is not exactly representable in binary floating point, the
+ value is stored as the nearest representable value which is
+ 0x1.999999999999ap-4. The exact equivalent of the value in decimal
+ is 0.1000000000000000055511151231257827021181583404541015625.
+
+ >>> Decimal.from_float(0.1)
+ Decimal('0.1000000000000000055511151231257827021181583404541015625')
+ >>> Decimal.from_float(float('nan'))
+ Decimal('NaN')
+ >>> Decimal.from_float(float('inf'))
+ Decimal('Infinity')
+ >>> Decimal.from_float(-float('inf'))
+ Decimal('-Infinity')
+ >>> Decimal.from_float(-0.0)
+ Decimal('-0')
+
+ """
+ import math as _math
+ from decimal import _dec_from_triple # only available on Py2.6 and Py2.7 (not 3.3)
+
+ if isinstance(f, (int, long)): # handle integer inputs
+ return Decimal(f)
+ if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float
+ return Decimal(repr(f))
+ if _math.copysign(1.0, f) == 1.0:
+ sign = 0
+ else:
+ sign = 1
+ n, d = abs(f).as_integer_ratio()
+ # int.bit_length() method doesn't exist on Py2.6:
+ def bit_length(d):
+ if d != 0:
+ return len(bin(abs(d))) - 2
+ else:
+ return 0
+ k = bit_length(d) - 1
+ result = _dec_from_triple(sign, str(n*5**k), -k)
+ return result
+
+
+__all__ = ['newround']
diff --git a/contrib/python/future/future/builtins/newsuper.py b/contrib/python/future/future/builtins/newsuper.py
index c661c271b0..5d3402bd2f 100644
--- a/contrib/python/future/future/builtins/newsuper.py
+++ b/contrib/python/future/future/builtins/newsuper.py
@@ -1,114 +1,114 @@
-'''
-This module provides a newsuper() function in Python 2 that mimics the
-behaviour of super() in Python 3. It is designed to be used as follows:
-
- from __future__ import division, absolute_import, print_function
- from future.builtins import super
-
-And then, for example:
-
- class VerboseList(list):
- def append(self, item):
- print('Adding an item')
- super().append(item) # new simpler super() function
-
-Importing this module on Python 3 has no effect.
-
-This is based on (i.e. almost identical to) Ryan Kelly's magicsuper
-module here:
-
- https://github.com/rfk/magicsuper.git
-
-Excerpts from Ryan's docstring:
-
- "Of course, you can still explicitly pass in the arguments if you want
- to do something strange. Sometimes you really do want that, e.g. to
- skip over some classes in the method resolution order.
-
- "How does it work? By inspecting the calling frame to determine the
- function object being executed and the object on which it's being
- called, and then walking the object's __mro__ chain to find out where
- that function was defined. Yuck, but it seems to work..."
-'''
-
-from __future__ import absolute_import
-import sys
-from types import FunctionType
-
-from future.utils import PY3, PY26
-
-
-_builtin_super = super
-
-_SENTINEL = object()
-
-def newsuper(typ=_SENTINEL, type_or_obj=_SENTINEL, framedepth=1):
- '''Like builtin super(), but capable of magic.
-
- This acts just like the builtin super() function, but if called
- without any arguments it attempts to infer them at runtime.
- '''
- # Infer the correct call if used without arguments.
- if typ is _SENTINEL:
- # We'll need to do some frame hacking.
+'''
+This module provides a newsuper() function in Python 2 that mimics the
+behaviour of super() in Python 3. It is designed to be used as follows:
+
+ from __future__ import division, absolute_import, print_function
+ from future.builtins import super
+
+And then, for example:
+
+ class VerboseList(list):
+ def append(self, item):
+ print('Adding an item')
+ super().append(item) # new simpler super() function
+
+Importing this module on Python 3 has no effect.
+
+This is based on (i.e. almost identical to) Ryan Kelly's magicsuper
+module here:
+
+ https://github.com/rfk/magicsuper.git
+
+Excerpts from Ryan's docstring:
+
+ "Of course, you can still explicitly pass in the arguments if you want
+ to do something strange. Sometimes you really do want that, e.g. to
+ skip over some classes in the method resolution order.
+
+ "How does it work? By inspecting the calling frame to determine the
+ function object being executed and the object on which it's being
+ called, and then walking the object's __mro__ chain to find out where
+ that function was defined. Yuck, but it seems to work..."
+'''
+
+from __future__ import absolute_import
+import sys
+from types import FunctionType
+
+from future.utils import PY3, PY26
+
+
+_builtin_super = super
+
+_SENTINEL = object()
+
+def newsuper(typ=_SENTINEL, type_or_obj=_SENTINEL, framedepth=1):
+ '''Like builtin super(), but capable of magic.
+
+ This acts just like the builtin super() function, but if called
+ without any arguments it attempts to infer them at runtime.
+ '''
+ # Infer the correct call if used without arguments.
+ if typ is _SENTINEL:
+ # We'll need to do some frame hacking.
f = sys._getframe(framedepth)
-
- try:
- # Get the function's first positional argument.
- type_or_obj = f.f_locals[f.f_code.co_varnames[0]]
- except (IndexError, KeyError,):
- raise RuntimeError('super() used in a function with no args')
-
- try:
- # Get the MRO so we can crawl it.
- mro = type_or_obj.__mro__
- except (AttributeError, RuntimeError): # see issue #160
- try:
- mro = type_or_obj.__class__.__mro__
- except AttributeError:
- raise RuntimeError('super() used with a non-newstyle class')
-
- # A ``for...else`` block? Yes! It's odd, but useful.
+
+ try:
+ # Get the function's first positional argument.
+ type_or_obj = f.f_locals[f.f_code.co_varnames[0]]
+ except (IndexError, KeyError,):
+ raise RuntimeError('super() used in a function with no args')
+
+ try:
+ # Get the MRO so we can crawl it.
+ mro = type_or_obj.__mro__
+ except (AttributeError, RuntimeError): # see issue #160
+ try:
+ mro = type_or_obj.__class__.__mro__
+ except AttributeError:
+ raise RuntimeError('super() used with a non-newstyle class')
+
+ # A ``for...else`` block? Yes! It's odd, but useful.
# If unfamiliar with for...else, see:
- #
- # http://psung.blogspot.com/2007/12/for-else-in-python.html
- for typ in mro:
- # Find the class that owns the currently-executing method.
- for meth in typ.__dict__.values():
- # Drill down through any wrappers to the underlying func.
- # This handles e.g. classmethod() and staticmethod().
- try:
- while not isinstance(meth,FunctionType):
- if isinstance(meth, property):
- # Calling __get__ on the property will invoke
- # user code which might throw exceptions or have
- # side effects
- meth = meth.fget
- else:
- try:
- meth = meth.__func__
- except AttributeError:
+ #
+ # http://psung.blogspot.com/2007/12/for-else-in-python.html
+ for typ in mro:
+ # Find the class that owns the currently-executing method.
+ for meth in typ.__dict__.values():
+ # Drill down through any wrappers to the underlying func.
+ # This handles e.g. classmethod() and staticmethod().
+ try:
+ while not isinstance(meth,FunctionType):
+ if isinstance(meth, property):
+ # Calling __get__ on the property will invoke
+ # user code which might throw exceptions or have
+ # side effects
+ meth = meth.fget
+ else:
+ try:
+ meth = meth.__func__
+ except AttributeError:
meth = meth.__get__(type_or_obj, typ)
- except (AttributeError, TypeError):
- continue
- if meth.func_code is f.f_code:
- break # Aha! Found you.
- else:
- continue # Not found! Move onto the next class in MRO.
- break # Found! Break out of the search loop.
- else:
- raise RuntimeError('super() called outside a method')
-
- # Dispatch to builtin super().
- if type_or_obj is not _SENTINEL:
- return _builtin_super(typ, type_or_obj)
- return _builtin_super(typ)
-
-
-def superm(*args, **kwds):
- f = sys._getframe(1)
- nm = f.f_code.co_name
- return getattr(newsuper(framedepth=2),nm)(*args, **kwds)
-
-
-__all__ = ['newsuper']
+ except (AttributeError, TypeError):
+ continue
+ if meth.func_code is f.f_code:
+ break # Aha! Found you.
+ else:
+ continue # Not found! Move onto the next class in MRO.
+ break # Found! Break out of the search loop.
+ else:
+ raise RuntimeError('super() called outside a method')
+
+ # Dispatch to builtin super().
+ if type_or_obj is not _SENTINEL:
+ return _builtin_super(typ, type_or_obj)
+ return _builtin_super(typ)
+
+
+def superm(*args, **kwds):
+ f = sys._getframe(1)
+ nm = f.f_code.co_name
+ return getattr(newsuper(framedepth=2),nm)(*args, **kwds)
+
+
+__all__ = ['newsuper']
diff --git a/contrib/python/future/future/moves/__init__.py b/contrib/python/future/future/moves/__init__.py
index d81b0812b0..0cd60d3d5c 100644
--- a/contrib/python/future/future/moves/__init__.py
+++ b/contrib/python/future/future/moves/__init__.py
@@ -1,8 +1,8 @@
-# future.moves package
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-from future.standard_library import import_top_level_modules
-
+# future.moves package
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+from future.standard_library import import_top_level_modules
+
if sys.version_info[0] >= 3:
- import_top_level_modules()
+ import_top_level_modules()
diff --git a/contrib/python/future/future/moves/_dummy_thread.py b/contrib/python/future/future/moves/_dummy_thread.py
index e956c83eef..e5dca348fb 100644
--- a/contrib/python/future/future/moves/_dummy_thread.py
+++ b/contrib/python/future/future/moves/_dummy_thread.py
@@ -1,11 +1,11 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
try:
from _dummy_thread import *
except ImportError:
from _thread import *
-else:
- __future_module__ = True
- from dummy_thread import *
+else:
+ __future_module__ = True
+ from dummy_thread import *
diff --git a/contrib/python/future/future/moves/_markupbase.py b/contrib/python/future/future/moves/_markupbase.py
index 4ace88ff8d..f9fb4bbf28 100644
--- a/contrib/python/future/future/moves/_markupbase.py
+++ b/contrib/python/future/future/moves/_markupbase.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from _markupbase import *
-else:
- __future_module__ = True
- from markupbase import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from _markupbase import *
+else:
+ __future_module__ = True
+ from markupbase import *
diff --git a/contrib/python/future/future/moves/_thread.py b/contrib/python/future/future/moves/_thread.py
index 7af977a163..c68018bb11 100644
--- a/contrib/python/future/future/moves/_thread.py
+++ b/contrib/python/future/future/moves/_thread.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from _thread import *
-else:
- __future_module__ = True
- from thread import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from _thread import *
+else:
+ __future_module__ = True
+ from thread import *
diff --git a/contrib/python/future/future/moves/builtins.py b/contrib/python/future/future/moves/builtins.py
index 8df2ff9ef3..e4b6221d59 100644
--- a/contrib/python/future/future/moves/builtins.py
+++ b/contrib/python/future/future/moves/builtins.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from builtins import *
-else:
- __future_module__ = True
- from __builtin__ import *
- # Overwrite any old definitions with the equivalent future.builtins ones:
- from future.builtins import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from builtins import *
+else:
+ __future_module__ = True
+ from __builtin__ import *
+ # Overwrite any old definitions with the equivalent future.builtins ones:
+ from future.builtins import *
diff --git a/contrib/python/future/future/moves/collections.py b/contrib/python/future/future/moves/collections.py
index c551889f1f..664ee6a3d0 100644
--- a/contrib/python/future/future/moves/collections.py
+++ b/contrib/python/future/future/moves/collections.py
@@ -1,18 +1,18 @@
-from __future__ import absolute_import
-import sys
-
-from future.utils import PY2, PY26
-__future_module__ = True
-
-from collections import *
-
-if PY2:
- from UserDict import UserDict
- from UserList import UserList
- from UserString import UserString
-
-if PY26:
- from future.backports.misc import OrderedDict, Counter
-
-if sys.version_info < (3, 3):
- from future.backports.misc import ChainMap, _count_elements
+from __future__ import absolute_import
+import sys
+
+from future.utils import PY2, PY26
+__future_module__ = True
+
+from collections import *
+
+if PY2:
+ from UserDict import UserDict
+ from UserList import UserList
+ from UserString import UserString
+
+if PY26:
+ from future.backports.misc import OrderedDict, Counter
+
+if sys.version_info < (3, 3):
+ from future.backports.misc import ChainMap, _count_elements
diff --git a/contrib/python/future/future/moves/configparser.py b/contrib/python/future/future/moves/configparser.py
index 86f68b3aff..33d9cf9533 100644
--- a/contrib/python/future/future/moves/configparser.py
+++ b/contrib/python/future/future/moves/configparser.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-
-from future.utils import PY2
-
-if PY2:
- from ConfigParser import *
-else:
- from configparser import *
+from __future__ import absolute_import
+
+from future.utils import PY2
+
+if PY2:
+ from ConfigParser import *
+else:
+ from configparser import *
diff --git a/contrib/python/future/future/moves/copyreg.py b/contrib/python/future/future/moves/copyreg.py
index 1b64f2613c..9d08cdc5ed 100644
--- a/contrib/python/future/future/moves/copyreg.py
+++ b/contrib/python/future/future/moves/copyreg.py
@@ -1,12 +1,12 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
import copyreg, sys
# A "*" import uses Python 3's copyreg.__all__ which does not include
# all public names in the API surface for copyreg, this avoids that
# problem by just making our module _be_ a reference to the actual module.
sys.modules['future.moves.copyreg'] = copyreg
-else:
- __future_module__ = True
- from copy_reg import *
+else:
+ __future_module__ = True
+ from copy_reg import *
diff --git a/contrib/python/future/future/moves/dbm/__init__.py b/contrib/python/future/future/moves/dbm/__init__.py
index 60f9e2ccbe..626b406f7f 100644
--- a/contrib/python/future/future/moves/dbm/__init__.py
+++ b/contrib/python/future/future/moves/dbm/__init__.py
@@ -1,20 +1,20 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from dbm import *
-else:
- __future_module__ = True
- from whichdb import *
- from anydbm import *
-
-# Py3.3's dbm/__init__.py imports ndbm but doesn't expose it via __all__.
-# In case some (badly written) code depends on dbm.ndbm after import dbm,
-# we simulate this:
-if PY3:
- from dbm import ndbm
-else:
- try:
- from future.moves.dbm import ndbm
- except ImportError:
- ndbm = None
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from dbm import *
+else:
+ __future_module__ = True
+ from whichdb import *
+ from anydbm import *
+
+# Py3.3's dbm/__init__.py imports ndbm but doesn't expose it via __all__.
+# In case some (badly written) code depends on dbm.ndbm after import dbm,
+# we simulate this:
+if PY3:
+ from dbm import ndbm
+else:
+ try:
+ from future.moves.dbm import ndbm
+ except ImportError:
+ ndbm = None
diff --git a/contrib/python/future/future/moves/dbm/dumb.py b/contrib/python/future/future/moves/dbm/dumb.py
index e5961d5e86..528383f6d8 100644
--- a/contrib/python/future/future/moves/dbm/dumb.py
+++ b/contrib/python/future/future/moves/dbm/dumb.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-
-from future.utils import PY3
-
-if PY3:
- from dbm.dumb import *
-else:
- __future_module__ = True
- from dumbdbm import *
+from __future__ import absolute_import
+
+from future.utils import PY3
+
+if PY3:
+ from dbm.dumb import *
+else:
+ __future_module__ = True
+ from dumbdbm import *
diff --git a/contrib/python/future/future/moves/dbm/ndbm.py b/contrib/python/future/future/moves/dbm/ndbm.py
index f3185da3d8..8c6fff8ab7 100644
--- a/contrib/python/future/future/moves/dbm/ndbm.py
+++ b/contrib/python/future/future/moves/dbm/ndbm.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-
-from future.utils import PY3
-
-if PY3:
- from dbm.ndbm import *
-else:
- __future_module__ = True
- from dbm import *
+from __future__ import absolute_import
+
+from future.utils import PY3
+
+if PY3:
+ from dbm.ndbm import *
+else:
+ __future_module__ = True
+ from dbm import *
diff --git a/contrib/python/future/future/moves/html/__init__.py b/contrib/python/future/future/moves/html/__init__.py
index 3905a83856..22ed6e7d2c 100644
--- a/contrib/python/future/future/moves/html/__init__.py
+++ b/contrib/python/future/future/moves/html/__init__.py
@@ -1,31 +1,31 @@
-from __future__ import absolute_import
-from future.utils import PY3
-__future_module__ = True
-
-if PY3:
- from html import *
-else:
- # cgi.escape isn't good enough for the single Py3.3 html test to pass.
- # Define it inline here instead. From the Py3.4 stdlib. Note that the
- # html.escape() function from the Py3.3 stdlib is not suitable for use on
- # Py2.x.
- """
- General functions for HTML manipulation.
- """
-
- def escape(s, quote=True):
- """
- Replace special characters "&", "<" and ">" to HTML-safe sequences.
- If the optional flag quote is true (the default), the quotation mark
- characters, both double quote (") and single quote (') characters are also
- translated.
- """
- s = s.replace("&", "&amp;") # Must be done first!
- s = s.replace("<", "&lt;")
- s = s.replace(">", "&gt;")
- if quote:
- s = s.replace('"', "&quot;")
- s = s.replace('\'', "&#x27;")
- return s
-
- __all__ = ['escape']
+from __future__ import absolute_import
+from future.utils import PY3
+__future_module__ = True
+
+if PY3:
+ from html import *
+else:
+ # cgi.escape isn't good enough for the single Py3.3 html test to pass.
+ # Define it inline here instead. From the Py3.4 stdlib. Note that the
+ # html.escape() function from the Py3.3 stdlib is not suitable for use on
+ # Py2.x.
+ """
+ General functions for HTML manipulation.
+ """
+
+ def escape(s, quote=True):
+ """
+ Replace special characters "&", "<" and ">" to HTML-safe sequences.
+ If the optional flag quote is true (the default), the quotation mark
+ characters, both double quote (") and single quote (') characters are also
+ translated.
+ """
+ s = s.replace("&", "&amp;") # Must be done first!
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ if quote:
+ s = s.replace('"', "&quot;")
+ s = s.replace('\'', "&#x27;")
+ return s
+
+ __all__ = ['escape']
diff --git a/contrib/python/future/future/moves/html/entities.py b/contrib/python/future/future/moves/html/entities.py
index 4f69df1c0b..56a8860911 100644
--- a/contrib/python/future/future/moves/html/entities.py
+++ b/contrib/python/future/future/moves/html/entities.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from html.entities import *
-else:
- __future_module__ = True
- from htmlentitydefs import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from html.entities import *
+else:
+ __future_module__ = True
+ from htmlentitydefs import *
diff --git a/contrib/python/future/future/moves/html/parser.py b/contrib/python/future/future/moves/html/parser.py
index deafc10c56..a6115b59f0 100644
--- a/contrib/python/future/future/moves/html/parser.py
+++ b/contrib/python/future/future/moves/html/parser.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-__future_module__ = True
-
-if PY3:
- from html.parser import *
-else:
- from HTMLParser import *
+from __future__ import absolute_import
+from future.utils import PY3
+__future_module__ = True
+
+if PY3:
+ from html.parser import *
+else:
+ from HTMLParser import *
diff --git a/contrib/python/future/future/moves/http/__init__.py b/contrib/python/future/future/moves/http/__init__.py
index debb273968..917b3d71ac 100644
--- a/contrib/python/future/future/moves/http/__init__.py
+++ b/contrib/python/future/future/moves/http/__init__.py
@@ -1,4 +1,4 @@
-from future.utils import PY3
-
-if not PY3:
- __future_module__ = True
+from future.utils import PY3
+
+if not PY3:
+ __future_module__ = True
diff --git a/contrib/python/future/future/moves/http/client.py b/contrib/python/future/future/moves/http/client.py
index 5ee10a6ec4..55f9c9c1ae 100644
--- a/contrib/python/future/future/moves/http/client.py
+++ b/contrib/python/future/future/moves/http/client.py
@@ -1,8 +1,8 @@
-from future.utils import PY3
-
-if PY3:
- from http.client import *
-else:
- from httplib import *
- from httplib import HTTPMessage
- __future_module__ = True
+from future.utils import PY3
+
+if PY3:
+ from http.client import *
+else:
+ from httplib import *
+ from httplib import HTTPMessage
+ __future_module__ = True
diff --git a/contrib/python/future/future/moves/http/cookiejar.py b/contrib/python/future/future/moves/http/cookiejar.py
index ee8b82ad08..ea00df7720 100644
--- a/contrib/python/future/future/moves/http/cookiejar.py
+++ b/contrib/python/future/future/moves/http/cookiejar.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from http.cookiejar import *
-else:
- __future_module__ = True
- from cookielib import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from http.cookiejar import *
+else:
+ __future_module__ = True
+ from cookielib import *
diff --git a/contrib/python/future/future/moves/http/cookies.py b/contrib/python/future/future/moves/http/cookies.py
index c98a521c3a..1b74fe2dd7 100644
--- a/contrib/python/future/future/moves/http/cookies.py
+++ b/contrib/python/future/future/moves/http/cookies.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from http.cookies import *
-else:
- __future_module__ = True
- from Cookie import *
- from Cookie import Morsel # left out of __all__ on Py2.7!
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from http.cookies import *
+else:
+ __future_module__ = True
+ from Cookie import *
+ from Cookie import Morsel # left out of __all__ on Py2.7!
diff --git a/contrib/python/future/future/moves/http/server.py b/contrib/python/future/future/moves/http/server.py
index a56ca8556e..4e75cc1dec 100644
--- a/contrib/python/future/future/moves/http/server.py
+++ b/contrib/python/future/future/moves/http/server.py
@@ -1,20 +1,20 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from http.server import *
-else:
- __future_module__ = True
- from BaseHTTPServer import *
- from CGIHTTPServer import *
- from SimpleHTTPServer import *
- try:
- from CGIHTTPServer import _url_collapse_path # needed for a test
- except ImportError:
- try:
- # Python 2.7.0 to 2.7.3
- from CGIHTTPServer import (
- _url_collapse_path_split as _url_collapse_path)
- except ImportError:
- # Doesn't exist on Python 2.6.x. Ignore it.
- pass
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from http.server import *
+else:
+ __future_module__ = True
+ from BaseHTTPServer import *
+ from CGIHTTPServer import *
+ from SimpleHTTPServer import *
+ try:
+ from CGIHTTPServer import _url_collapse_path # needed for a test
+ except ImportError:
+ try:
+ # Python 2.7.0 to 2.7.3
+ from CGIHTTPServer import (
+ _url_collapse_path_split as _url_collapse_path)
+ except ImportError:
+ # Doesn't exist on Python 2.6.x. Ignore it.
+ pass
diff --git a/contrib/python/future/future/moves/itertools.py b/contrib/python/future/future/moves/itertools.py
index adb7994d49..e5eb20d5d5 100644
--- a/contrib/python/future/future/moves/itertools.py
+++ b/contrib/python/future/future/moves/itertools.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-
-from itertools import *
-try:
- zip_longest = izip_longest
- filterfalse = ifilterfalse
-except NameError:
- pass
+from __future__ import absolute_import
+
+from itertools import *
+try:
+ zip_longest = izip_longest
+ filterfalse = ifilterfalse
+except NameError:
+ pass
diff --git a/contrib/python/future/future/moves/pickle.py b/contrib/python/future/future/moves/pickle.py
index f95fcc27d2..c53d693925 100644
--- a/contrib/python/future/future/moves/pickle.py
+++ b/contrib/python/future/future/moves/pickle.py
@@ -1,11 +1,11 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from pickle import *
-else:
- __future_module__ = True
- try:
- from cPickle import *
- except ImportError:
- from pickle import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from pickle import *
+else:
+ __future_module__ = True
+ try:
+ from cPickle import *
+ except ImportError:
+ from pickle import *
diff --git a/contrib/python/future/future/moves/queue.py b/contrib/python/future/future/moves/queue.py
index 5f60427fc4..1cb1437d74 100644
--- a/contrib/python/future/future/moves/queue.py
+++ b/contrib/python/future/future/moves/queue.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from queue import *
-else:
- __future_module__ = True
- from Queue import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from queue import *
+else:
+ __future_module__ = True
+ from Queue import *
diff --git a/contrib/python/future/future/moves/reprlib.py b/contrib/python/future/future/moves/reprlib.py
index ef6f66a689..a313a13a49 100644
--- a/contrib/python/future/future/moves/reprlib.py
+++ b/contrib/python/future/future/moves/reprlib.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from reprlib import *
-else:
- __future_module__ = True
- from repr import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from reprlib import *
+else:
+ __future_module__ = True
+ from repr import *
diff --git a/contrib/python/future/future/moves/socketserver.py b/contrib/python/future/future/moves/socketserver.py
index c74893469f..062e0848de 100644
--- a/contrib/python/future/future/moves/socketserver.py
+++ b/contrib/python/future/future/moves/socketserver.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from socketserver import *
-else:
- __future_module__ = True
- from SocketServer import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from socketserver import *
+else:
+ __future_module__ = True
+ from SocketServer import *
diff --git a/contrib/python/future/future/moves/subprocess.py b/contrib/python/future/future/moves/subprocess.py
index 4ae8ad0f81..43ffd2ac23 100644
--- a/contrib/python/future/future/moves/subprocess.py
+++ b/contrib/python/future/future/moves/subprocess.py
@@ -1,11 +1,11 @@
-from __future__ import absolute_import
-from future.utils import PY2, PY26
-
-from subprocess import *
-
-if PY2:
- __future_module__ = True
- from commands import getoutput, getstatusoutput
-
-if PY26:
- from future.backports.misc import check_output
+from __future__ import absolute_import
+from future.utils import PY2, PY26
+
+from subprocess import *
+
+if PY2:
+ __future_module__ = True
+ from commands import getoutput, getstatusoutput
+
+if PY26:
+ from future.backports.misc import check_output
diff --git a/contrib/python/future/future/moves/sys.py b/contrib/python/future/future/moves/sys.py
index 9ee0c87ba6..1293bcb070 100644
--- a/contrib/python/future/future/moves/sys.py
+++ b/contrib/python/future/future/moves/sys.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-
-from future.utils import PY2
-
-from sys import *
-
-if PY2:
- from __builtin__ import intern
+from __future__ import absolute_import
+
+from future.utils import PY2
+
+from sys import *
+
+if PY2:
+ from __builtin__ import intern
diff --git a/contrib/python/future/future/moves/urllib/__init__.py b/contrib/python/future/future/moves/urllib/__init__.py
index e7a4dbd568..5cf428b6ec 100644
--- a/contrib/python/future/future/moves/urllib/__init__.py
+++ b/contrib/python/future/future/moves/urllib/__init__.py
@@ -1,5 +1,5 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if not PY3:
- __future_module__ = True
+from __future__ import absolute_import
+from future.utils import PY3
+
+if not PY3:
+ __future_module__ = True
diff --git a/contrib/python/future/future/moves/urllib/error.py b/contrib/python/future/future/moves/urllib/error.py
index 71fe68a94e..7d8ada73f8 100644
--- a/contrib/python/future/future/moves/urllib/error.py
+++ b/contrib/python/future/future/moves/urllib/error.py
@@ -1,16 +1,16 @@
-from __future__ import absolute_import
-from future.standard_library import suspend_hooks
-
-from future.utils import PY3
-
-if PY3:
- from urllib.error import *
-else:
- __future_module__ = True
+from __future__ import absolute_import
+from future.standard_library import suspend_hooks
- # We use this method to get at the original Py2 urllib before any renaming magic
- # ContentTooShortError = sys.py2_modules['urllib'].ContentTooShortError
+from future.utils import PY3
- with suspend_hooks():
- from urllib import ContentTooShortError
- from urllib2 import URLError, HTTPError
+if PY3:
+ from urllib.error import *
+else:
+ __future_module__ = True
+
+ # We use this method to get at the original Py2 urllib before any renaming magic
+ # ContentTooShortError = sys.py2_modules['urllib'].ContentTooShortError
+
+ with suspend_hooks():
+ from urllib import ContentTooShortError
+ from urllib2 import URLError, HTTPError
diff --git a/contrib/python/future/future/moves/urllib/parse.py b/contrib/python/future/future/moves/urllib/parse.py
index 499abf991a..9074b8163f 100644
--- a/contrib/python/future/future/moves/urllib/parse.py
+++ b/contrib/python/future/future/moves/urllib/parse.py
@@ -1,28 +1,28 @@
-from __future__ import absolute_import
-from future.standard_library import suspend_hooks
-
-from future.utils import PY3
-
-if PY3:
- from urllib.parse import *
-else:
- __future_module__ = True
- from urlparse import (ParseResult, SplitResult, parse_qs, parse_qsl,
- urldefrag, urljoin, urlparse, urlsplit,
- urlunparse, urlunsplit)
+from __future__ import absolute_import
+from future.standard_library import suspend_hooks
- # we use this method to get at the original py2 urllib before any renaming
- # quote = sys.py2_modules['urllib'].quote
- # quote_plus = sys.py2_modules['urllib'].quote_plus
- # unquote = sys.py2_modules['urllib'].unquote
- # unquote_plus = sys.py2_modules['urllib'].unquote_plus
- # urlencode = sys.py2_modules['urllib'].urlencode
- # splitquery = sys.py2_modules['urllib'].splitquery
+from future.utils import PY3
- with suspend_hooks():
- from urllib import (quote,
- quote_plus,
- unquote,
- unquote_plus,
- urlencode,
- splitquery)
+if PY3:
+ from urllib.parse import *
+else:
+ __future_module__ = True
+ from urlparse import (ParseResult, SplitResult, parse_qs, parse_qsl,
+ urldefrag, urljoin, urlparse, urlsplit,
+ urlunparse, urlunsplit)
+
+ # we use this method to get at the original py2 urllib before any renaming
+ # quote = sys.py2_modules['urllib'].quote
+ # quote_plus = sys.py2_modules['urllib'].quote_plus
+ # unquote = sys.py2_modules['urllib'].unquote
+ # unquote_plus = sys.py2_modules['urllib'].unquote_plus
+ # urlencode = sys.py2_modules['urllib'].urlencode
+ # splitquery = sys.py2_modules['urllib'].splitquery
+
+ with suspend_hooks():
+ from urllib import (quote,
+ quote_plus,
+ unquote,
+ unquote_plus,
+ urlencode,
+ splitquery)
diff --git a/contrib/python/future/future/moves/urllib/request.py b/contrib/python/future/future/moves/urllib/request.py
index 5cd313dc93..972aa4ab5d 100644
--- a/contrib/python/future/future/moves/urllib/request.py
+++ b/contrib/python/future/future/moves/urllib/request.py
@@ -1,26 +1,26 @@
-from __future__ import absolute_import
-
-from future.standard_library import suspend_hooks
-from future.utils import PY3
-
-if PY3:
- from urllib.request import *
- # This aren't in __all__:
- from urllib.request import (getproxies,
- pathname2url,
- proxy_bypass,
- quote,
- request_host,
- thishost,
- unquote,
- url2pathname,
- urlcleanup,
- urljoin,
- urlopen,
- urlparse,
- urlretrieve,
- urlsplit,
- urlunparse)
+from __future__ import absolute_import
+
+from future.standard_library import suspend_hooks
+from future.utils import PY3
+
+if PY3:
+ from urllib.request import *
+ # This aren't in __all__:
+ from urllib.request import (getproxies,
+ pathname2url,
+ proxy_bypass,
+ quote,
+ request_host,
+ thishost,
+ unquote,
+ url2pathname,
+ urlcleanup,
+ urljoin,
+ urlopen,
+ urlparse,
+ urlretrieve,
+ urlsplit,
+ urlunparse)
from urllib.parse import (splitattr,
splithost,
@@ -33,62 +33,62 @@ if PY3:
splitvalue,
to_bytes,
unwrap)
-else:
- __future_module__ = True
- with suspend_hooks():
- from urllib import *
- from urllib2 import *
- from urlparse import *
-
- # Rename:
- from urllib import toBytes # missing from __all__ on Py2.6
- to_bytes = toBytes
-
- # from urllib import (pathname2url,
- # url2pathname,
- # getproxies,
- # urlretrieve,
- # urlcleanup,
- # URLopener,
- # FancyURLopener,
- # proxy_bypass)
+else:
+ __future_module__ = True
+ with suspend_hooks():
+ from urllib import *
+ from urllib2 import *
+ from urlparse import *
+
+ # Rename:
+ from urllib import toBytes # missing from __all__ on Py2.6
+ to_bytes = toBytes
+
+ # from urllib import (pathname2url,
+ # url2pathname,
+ # getproxies,
+ # urlretrieve,
+ # urlcleanup,
+ # URLopener,
+ # FancyURLopener,
+ # proxy_bypass)
- # from urllib2 import (
- # AbstractBasicAuthHandler,
- # AbstractDigestAuthHandler,
- # BaseHandler,
- # CacheFTPHandler,
- # FileHandler,
- # FTPHandler,
- # HTTPBasicAuthHandler,
- # HTTPCookieProcessor,
- # HTTPDefaultErrorHandler,
- # HTTPDigestAuthHandler,
- # HTTPErrorProcessor,
- # HTTPHandler,
- # HTTPPasswordMgr,
- # HTTPPasswordMgrWithDefaultRealm,
- # HTTPRedirectHandler,
- # HTTPSHandler,
- # URLError,
- # build_opener,
- # install_opener,
- # OpenerDirector,
- # ProxyBasicAuthHandler,
- # ProxyDigestAuthHandler,
- # ProxyHandler,
- # Request,
- # UnknownHandler,
- # urlopen,
- # )
+ # from urllib2 import (
+ # AbstractBasicAuthHandler,
+ # AbstractDigestAuthHandler,
+ # BaseHandler,
+ # CacheFTPHandler,
+ # FileHandler,
+ # FTPHandler,
+ # HTTPBasicAuthHandler,
+ # HTTPCookieProcessor,
+ # HTTPDefaultErrorHandler,
+ # HTTPDigestAuthHandler,
+ # HTTPErrorProcessor,
+ # HTTPHandler,
+ # HTTPPasswordMgr,
+ # HTTPPasswordMgrWithDefaultRealm,
+ # HTTPRedirectHandler,
+ # HTTPSHandler,
+ # URLError,
+ # build_opener,
+ # install_opener,
+ # OpenerDirector,
+ # ProxyBasicAuthHandler,
+ # ProxyDigestAuthHandler,
+ # ProxyHandler,
+ # Request,
+ # UnknownHandler,
+ # urlopen,
+ # )
- # from urlparse import (
- # urldefrag
- # urljoin,
- # urlparse,
- # urlunparse,
- # urlsplit,
- # urlunsplit,
- # parse_qs,
- # parse_q"
- # )
+ # from urlparse import (
+ # urldefrag
+ # urljoin,
+ # urlparse,
+ # urlunparse,
+ # urlsplit,
+ # urlunsplit,
+ # parse_qs,
+ # parse_q"
+ # )
diff --git a/contrib/python/future/future/moves/urllib/response.py b/contrib/python/future/future/moves/urllib/response.py
index 99f0d64624..a287ae2833 100644
--- a/contrib/python/future/future/moves/urllib/response.py
+++ b/contrib/python/future/future/moves/urllib/response.py
@@ -1,12 +1,12 @@
-from future import standard_library
-from future.utils import PY3
-
-if PY3:
- from urllib.response import *
-else:
- __future_module__ = True
- with standard_library.suspend_hooks():
- from urllib import (addbase,
- addclosehook,
- addinfo,
- addinfourl)
+from future import standard_library
+from future.utils import PY3
+
+if PY3:
+ from urllib.response import *
+else:
+ __future_module__ = True
+ with standard_library.suspend_hooks():
+ from urllib import (addbase,
+ addclosehook,
+ addinfo,
+ addinfourl)
diff --git a/contrib/python/future/future/moves/urllib/robotparser.py b/contrib/python/future/future/moves/urllib/robotparser.py
index fe2360f49f..0dc8f5715c 100644
--- a/contrib/python/future/future/moves/urllib/robotparser.py
+++ b/contrib/python/future/future/moves/urllib/robotparser.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from urllib.robotparser import *
-else:
- __future_module__ = True
- from robotparser import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from urllib.robotparser import *
+else:
+ __future_module__ = True
+ from robotparser import *
diff --git a/contrib/python/future/future/moves/winreg.py b/contrib/python/future/future/moves/winreg.py
index f03684b70d..c8b147568c 100644
--- a/contrib/python/future/future/moves/winreg.py
+++ b/contrib/python/future/future/moves/winreg.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from winreg import *
-else:
- __future_module__ = True
- from _winreg import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from winreg import *
+else:
+ __future_module__ = True
+ from _winreg import *
diff --git a/contrib/python/future/future/moves/xmlrpc/client.py b/contrib/python/future/future/moves/xmlrpc/client.py
index 908fa5c937..4708cf8992 100644
--- a/contrib/python/future/future/moves/xmlrpc/client.py
+++ b/contrib/python/future/future/moves/xmlrpc/client.py
@@ -1,7 +1,7 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from xmlrpc.client import *
-else:
- from xmlrpclib import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from xmlrpc.client import *
+else:
+ from xmlrpclib import *
diff --git a/contrib/python/future/future/moves/xmlrpc/server.py b/contrib/python/future/future/moves/xmlrpc/server.py
index d64ee19273..1a8af3454b 100644
--- a/contrib/python/future/future/moves/xmlrpc/server.py
+++ b/contrib/python/future/future/moves/xmlrpc/server.py
@@ -1,7 +1,7 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from xmlrpc.server import *
-else:
- from xmlrpclib import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from xmlrpc.server import *
+else:
+ from xmlrpclib import *
diff --git a/contrib/python/future/future/standard_library/__init__.py b/contrib/python/future/future/standard_library/__init__.py
index 9e43ada73c..41c4f36df2 100644
--- a/contrib/python/future/future/standard_library/__init__.py
+++ b/contrib/python/future/future/standard_library/__init__.py
@@ -1,815 +1,815 @@
-"""
-Python 3 reorganized the standard library (PEP 3108). This module exposes
-several standard library modules to Python 2 under their new Python 3
-names.
-
-It is designed to be used as follows::
-
- from future import standard_library
- standard_library.install_aliases()
-
-And then these normal Py3 imports work on both Py3 and Py2::
-
- import builtins
- import copyreg
- import queue
- import reprlib
- import socketserver
- import winreg # on Windows only
- import test.support
- import html, html.parser, html.entites
- import http, http.client, http.server
- import http.cookies, http.cookiejar
- import urllib.parse, urllib.request, urllib.response, urllib.error, urllib.robotparser
- import xmlrpc.client, xmlrpc.server
-
- import _thread
- import _dummy_thread
- import _markupbase
-
- from itertools import filterfalse, zip_longest
- from sys import intern
- from collections import UserDict, UserList, UserString
+"""
+Python 3 reorganized the standard library (PEP 3108). This module exposes
+several standard library modules to Python 2 under their new Python 3
+names.
+
+It is designed to be used as follows::
+
+ from future import standard_library
+ standard_library.install_aliases()
+
+And then these normal Py3 imports work on both Py3 and Py2::
+
+ import builtins
+ import copyreg
+ import queue
+ import reprlib
+ import socketserver
+ import winreg # on Windows only
+ import test.support
+ import html, html.parser, html.entites
+ import http, http.client, http.server
+ import http.cookies, http.cookiejar
+ import urllib.parse, urllib.request, urllib.response, urllib.error, urllib.robotparser
+ import xmlrpc.client, xmlrpc.server
+
+ import _thread
+ import _dummy_thread
+ import _markupbase
+
+ from itertools import filterfalse, zip_longest
+ from sys import intern
+ from collections import UserDict, UserList, UserString
from collections import OrderedDict, Counter, ChainMap # even on Py2.6
- from subprocess import getoutput, getstatusoutput
- from subprocess import check_output # even on Py2.6
-
-(The renamed modules and functions are still available under their old
-names on Python 2.)
-
-This is a cleaner alternative to this idiom (see
-http://docs.pythonsprints.com/python3_porting/py-porting.html)::
-
- try:
- import queue
- except ImportError:
- import Queue as queue
-
-
-Limitations
------------
-We don't currently support these modules, but would like to::
-
- import dbm
- import dbm.dumb
- import dbm.gnu
- import collections.abc # on Py33
- import pickle # should (optionally) bring in cPickle on Python 2
-
-"""
-
-from __future__ import absolute_import, division, print_function
-
-import sys
-import logging
-import imp
-import contextlib
-import types
-import copy
-import os
-
-# Make a dedicated logger; leave the root logger to be configured
-# by the application.
-flog = logging.getLogger('future_stdlib')
-_formatter = logging.Formatter(logging.BASIC_FORMAT)
-_handler = logging.StreamHandler()
-_handler.setFormatter(_formatter)
-flog.addHandler(_handler)
-flog.setLevel(logging.WARN)
-
-from future.utils import PY2, PY3
-
-# The modules that are defined under the same names on Py3 but with
-# different contents in a significant way (e.g. submodules) are:
-# pickle (fast one)
-# dbm
-# urllib
-# test
-# email
-
-REPLACED_MODULES = set(['test', 'urllib', 'pickle', 'dbm']) # add email and dbm when we support it
-
-# The following module names are not present in Python 2.x, so they cause no
-# potential clashes between the old and new names:
-# http
-# html
-# tkinter
-# xmlrpc
-# Keys: Py2 / real module names
-# Values: Py3 / simulated module names
-RENAMES = {
- # 'cStringIO': 'io', # there's a new io module in Python 2.6
- # that provides StringIO and BytesIO
- # 'StringIO': 'io', # ditto
- # 'cPickle': 'pickle',
- '__builtin__': 'builtins',
- 'copy_reg': 'copyreg',
- 'Queue': 'queue',
- 'future.moves.socketserver': 'socketserver',
- 'ConfigParser': 'configparser',
- 'repr': 'reprlib',
- # 'FileDialog': 'tkinter.filedialog',
- # 'tkFileDialog': 'tkinter.filedialog',
- # 'SimpleDialog': 'tkinter.simpledialog',
- # 'tkSimpleDialog': 'tkinter.simpledialog',
- # 'tkColorChooser': 'tkinter.colorchooser',
- # 'tkCommonDialog': 'tkinter.commondialog',
- # 'Dialog': 'tkinter.dialog',
- # 'Tkdnd': 'tkinter.dnd',
- # 'tkFont': 'tkinter.font',
- # 'tkMessageBox': 'tkinter.messagebox',
- # 'ScrolledText': 'tkinter.scrolledtext',
- # 'Tkconstants': 'tkinter.constants',
- # 'Tix': 'tkinter.tix',
- # 'ttk': 'tkinter.ttk',
- # 'Tkinter': 'tkinter',
- '_winreg': 'winreg',
- 'thread': '_thread',
+ from subprocess import getoutput, getstatusoutput
+ from subprocess import check_output # even on Py2.6
+
+(The renamed modules and functions are still available under their old
+names on Python 2.)
+
+This is a cleaner alternative to this idiom (see
+http://docs.pythonsprints.com/python3_porting/py-porting.html)::
+
+ try:
+ import queue
+ except ImportError:
+ import Queue as queue
+
+
+Limitations
+-----------
+We don't currently support these modules, but would like to::
+
+ import dbm
+ import dbm.dumb
+ import dbm.gnu
+ import collections.abc # on Py33
+ import pickle # should (optionally) bring in cPickle on Python 2
+
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import sys
+import logging
+import imp
+import contextlib
+import types
+import copy
+import os
+
+# Make a dedicated logger; leave the root logger to be configured
+# by the application.
+flog = logging.getLogger('future_stdlib')
+_formatter = logging.Formatter(logging.BASIC_FORMAT)
+_handler = logging.StreamHandler()
+_handler.setFormatter(_formatter)
+flog.addHandler(_handler)
+flog.setLevel(logging.WARN)
+
+from future.utils import PY2, PY3
+
+# The modules that are defined under the same names on Py3 but with
+# different contents in a significant way (e.g. submodules) are:
+# pickle (fast one)
+# dbm
+# urllib
+# test
+# email
+
+REPLACED_MODULES = set(['test', 'urllib', 'pickle', 'dbm']) # add email and dbm when we support it
+
+# The following module names are not present in Python 2.x, so they cause no
+# potential clashes between the old and new names:
+# http
+# html
+# tkinter
+# xmlrpc
+# Keys: Py2 / real module names
+# Values: Py3 / simulated module names
+RENAMES = {
+ # 'cStringIO': 'io', # there's a new io module in Python 2.6
+ # that provides StringIO and BytesIO
+ # 'StringIO': 'io', # ditto
+ # 'cPickle': 'pickle',
+ '__builtin__': 'builtins',
+ 'copy_reg': 'copyreg',
+ 'Queue': 'queue',
+ 'future.moves.socketserver': 'socketserver',
+ 'ConfigParser': 'configparser',
+ 'repr': 'reprlib',
+ # 'FileDialog': 'tkinter.filedialog',
+ # 'tkFileDialog': 'tkinter.filedialog',
+ # 'SimpleDialog': 'tkinter.simpledialog',
+ # 'tkSimpleDialog': 'tkinter.simpledialog',
+ # 'tkColorChooser': 'tkinter.colorchooser',
+ # 'tkCommonDialog': 'tkinter.commondialog',
+ # 'Dialog': 'tkinter.dialog',
+ # 'Tkdnd': 'tkinter.dnd',
+ # 'tkFont': 'tkinter.font',
+ # 'tkMessageBox': 'tkinter.messagebox',
+ # 'ScrolledText': 'tkinter.scrolledtext',
+ # 'Tkconstants': 'tkinter.constants',
+ # 'Tix': 'tkinter.tix',
+ # 'ttk': 'tkinter.ttk',
+ # 'Tkinter': 'tkinter',
+ '_winreg': 'winreg',
+ 'thread': '_thread',
'dummy_thread': '_dummy_thread' if sys.version_info < (3, 9) else '_thread',
- # 'anydbm': 'dbm', # causes infinite import loop
- # 'whichdb': 'dbm', # causes infinite import loop
- # anydbm and whichdb are handled by fix_imports2
- # 'dbhash': 'dbm.bsd',
- # 'dumbdbm': 'dbm.dumb',
- # 'dbm': 'dbm.ndbm',
- # 'gdbm': 'dbm.gnu',
- 'future.moves.xmlrpc': 'xmlrpc',
- # 'future.backports.email': 'email', # for use by urllib
- # 'DocXMLRPCServer': 'xmlrpc.server',
- # 'SimpleXMLRPCServer': 'xmlrpc.server',
- # 'httplib': 'http.client',
- # 'htmlentitydefs' : 'html.entities',
- # 'HTMLParser' : 'html.parser',
- # 'Cookie': 'http.cookies',
- # 'cookielib': 'http.cookiejar',
- # 'BaseHTTPServer': 'http.server',
- # 'SimpleHTTPServer': 'http.server',
- # 'CGIHTTPServer': 'http.server',
- # 'future.backports.test': 'test', # primarily for renaming test_support to support
- # 'commands': 'subprocess',
- # 'urlparse' : 'urllib.parse',
- # 'robotparser' : 'urllib.robotparser',
- # 'abc': 'collections.abc', # for Py33
- # 'future.utils.six.moves.html': 'html',
- # 'future.utils.six.moves.http': 'http',
- 'future.moves.html': 'html',
- 'future.moves.http': 'http',
- # 'future.backports.urllib': 'urllib',
- # 'future.utils.six.moves.urllib': 'urllib',
- 'future.moves._markupbase': '_markupbase',
- }
-
-
-# It is complicated and apparently brittle to mess around with the
-# ``sys.modules`` cache in order to support "import urllib" meaning two
-# different things (Py2.7 urllib and backported Py3.3-like urllib) in different
-# contexts. So we require explicit imports for these modules.
-assert len(set(RENAMES.values()) & set(REPLACED_MODULES)) == 0
-
-
-# Harmless renames that we can insert.
-# These modules need names from elsewhere being added to them:
-# subprocess: should provide getoutput and other fns from commands
-# module but these fns are missing: getstatus, mk2arg,
-# mkarg
-# re: needs an ASCII constant that works compatibly with Py3
-
-# etc: see lib2to3/fixes/fix_imports.py
-
-# (New module name, new object name, old module name, old object name)
-MOVES = [('collections', 'UserList', 'UserList', 'UserList'),
- ('collections', 'UserDict', 'UserDict', 'UserDict'),
- ('collections', 'UserString','UserString', 'UserString'),
+ # 'anydbm': 'dbm', # causes infinite import loop
+ # 'whichdb': 'dbm', # causes infinite import loop
+ # anydbm and whichdb are handled by fix_imports2
+ # 'dbhash': 'dbm.bsd',
+ # 'dumbdbm': 'dbm.dumb',
+ # 'dbm': 'dbm.ndbm',
+ # 'gdbm': 'dbm.gnu',
+ 'future.moves.xmlrpc': 'xmlrpc',
+ # 'future.backports.email': 'email', # for use by urllib
+ # 'DocXMLRPCServer': 'xmlrpc.server',
+ # 'SimpleXMLRPCServer': 'xmlrpc.server',
+ # 'httplib': 'http.client',
+ # 'htmlentitydefs' : 'html.entities',
+ # 'HTMLParser' : 'html.parser',
+ # 'Cookie': 'http.cookies',
+ # 'cookielib': 'http.cookiejar',
+ # 'BaseHTTPServer': 'http.server',
+ # 'SimpleHTTPServer': 'http.server',
+ # 'CGIHTTPServer': 'http.server',
+ # 'future.backports.test': 'test', # primarily for renaming test_support to support
+ # 'commands': 'subprocess',
+ # 'urlparse' : 'urllib.parse',
+ # 'robotparser' : 'urllib.robotparser',
+ # 'abc': 'collections.abc', # for Py33
+ # 'future.utils.six.moves.html': 'html',
+ # 'future.utils.six.moves.http': 'http',
+ 'future.moves.html': 'html',
+ 'future.moves.http': 'http',
+ # 'future.backports.urllib': 'urllib',
+ # 'future.utils.six.moves.urllib': 'urllib',
+ 'future.moves._markupbase': '_markupbase',
+ }
+
+
+# It is complicated and apparently brittle to mess around with the
+# ``sys.modules`` cache in order to support "import urllib" meaning two
+# different things (Py2.7 urllib and backported Py3.3-like urllib) in different
+# contexts. So we require explicit imports for these modules.
+assert len(set(RENAMES.values()) & set(REPLACED_MODULES)) == 0
+
+
+# Harmless renames that we can insert.
+# These modules need names from elsewhere being added to them:
+# subprocess: should provide getoutput and other fns from commands
+# module but these fns are missing: getstatus, mk2arg,
+# mkarg
+# re: needs an ASCII constant that works compatibly with Py3
+
+# etc: see lib2to3/fixes/fix_imports.py
+
+# (New module name, new object name, old module name, old object name)
+MOVES = [('collections', 'UserList', 'UserList', 'UserList'),
+ ('collections', 'UserDict', 'UserDict', 'UserDict'),
+ ('collections', 'UserString','UserString', 'UserString'),
('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'),
- ('itertools', 'filterfalse','itertools', 'ifilterfalse'),
- ('itertools', 'zip_longest','itertools', 'izip_longest'),
- ('sys', 'intern','__builtin__', 'intern'),
- # The re module has no ASCII flag in Py2, but this is the default.
- # Set re.ASCII to a zero constant. stat.ST_MODE just happens to be one
- # (and it exists on Py2.6+).
- ('re', 'ASCII','stat', 'ST_MODE'),
- ('base64', 'encodebytes','base64', 'encodestring'),
- ('base64', 'decodebytes','base64', 'decodestring'),
- ('subprocess', 'getoutput', 'commands', 'getoutput'),
- ('subprocess', 'getstatusoutput', 'commands', 'getstatusoutput'),
- ('subprocess', 'check_output', 'future.backports.misc', 'check_output'),
- ('math', 'ceil', 'future.backports.misc', 'ceil'),
- ('collections', 'OrderedDict', 'future.backports.misc', 'OrderedDict'),
- ('collections', 'Counter', 'future.backports.misc', 'Counter'),
+ ('itertools', 'filterfalse','itertools', 'ifilterfalse'),
+ ('itertools', 'zip_longest','itertools', 'izip_longest'),
+ ('sys', 'intern','__builtin__', 'intern'),
+ # The re module has no ASCII flag in Py2, but this is the default.
+ # Set re.ASCII to a zero constant. stat.ST_MODE just happens to be one
+ # (and it exists on Py2.6+).
+ ('re', 'ASCII','stat', 'ST_MODE'),
+ ('base64', 'encodebytes','base64', 'encodestring'),
+ ('base64', 'decodebytes','base64', 'decodestring'),
+ ('subprocess', 'getoutput', 'commands', 'getoutput'),
+ ('subprocess', 'getstatusoutput', 'commands', 'getstatusoutput'),
+ ('subprocess', 'check_output', 'future.backports.misc', 'check_output'),
+ ('math', 'ceil', 'future.backports.misc', 'ceil'),
+ ('collections', 'OrderedDict', 'future.backports.misc', 'OrderedDict'),
+ ('collections', 'Counter', 'future.backports.misc', 'Counter'),
('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'),
- ('itertools', 'count', 'future.backports.misc', 'count'),
- ('reprlib', 'recursive_repr', 'future.backports.misc', 'recursive_repr'),
- ('functools', 'cmp_to_key', 'future.backports.misc', 'cmp_to_key'),
-
-# This is no use, since "import urllib.request" etc. still fails:
-# ('urllib', 'error', 'future.moves.urllib', 'error'),
-# ('urllib', 'parse', 'future.moves.urllib', 'parse'),
-# ('urllib', 'request', 'future.moves.urllib', 'request'),
-# ('urllib', 'response', 'future.moves.urllib', 'response'),
-# ('urllib', 'robotparser', 'future.moves.urllib', 'robotparser'),
- ]
-
-
-# A minimal example of an import hook:
-# class WarnOnImport(object):
-# def __init__(self, *args):
-# self.module_names = args
-#
-# def find_module(self, fullname, path=None):
-# if fullname in self.module_names:
-# self.path = path
-# return self
-# return None
-#
-# def load_module(self, name):
-# if name in sys.modules:
-# return sys.modules[name]
-# module_info = imp.find_module(name, self.path)
-# module = imp.load_module(name, *module_info)
-# sys.modules[name] = module
-# flog.warning("Imported deprecated module %s", name)
-# return module
-
-
-class RenameImport(object):
- """
- A class for import hooks mapping Py3 module names etc. to the Py2 equivalents.
- """
- # Different RenameImport classes are created when importing this module from
- # different source files. This causes isinstance(hook, RenameImport) checks
- # to produce inconsistent results. We add this RENAMER attribute here so
- # remove_hooks() and install_hooks() can find instances of these classes
- # easily:
- RENAMER = True
-
- def __init__(self, old_to_new):
- '''
- Pass in a dictionary-like object mapping from old names to new
- names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'}
- '''
- self.old_to_new = old_to_new
- both = set(old_to_new.keys()) & set(old_to_new.values())
- assert (len(both) == 0 and
- len(set(old_to_new.values())) == len(old_to_new.values())), \
- 'Ambiguity in renaming (handler not implemented)'
- self.new_to_old = dict((new, old) for (old, new) in old_to_new.items())
-
- def find_module(self, fullname, path=None):
- # Handles hierarchical importing: package.module.module2
- new_base_names = set([s.split('.')[0] for s in self.new_to_old])
- # Before v0.12: Was: if fullname in set(self.old_to_new) | new_base_names:
- if fullname in new_base_names:
- return self
- return None
-
- def load_module(self, name):
- path = None
- if name in sys.modules:
- return sys.modules[name]
- elif name in self.new_to_old:
- # New name. Look up the corresponding old (Py2) name:
- oldname = self.new_to_old[name]
- module = self._find_and_load_module(oldname)
- # module.__future_module__ = True
- else:
- module = self._find_and_load_module(name)
- # In any case, make it available under the requested (Py3) name
- sys.modules[name] = module
- return module
-
- def _find_and_load_module(self, name, path=None):
- """
- Finds and loads it. But if there's a . in the name, handles it
- properly.
- """
- bits = name.split('.')
- while len(bits) > 1:
- # Treat the first bit as a package
- packagename = bits.pop(0)
- package = self._find_and_load_module(packagename, path)
- try:
- path = package.__path__
- except AttributeError:
- # This could be e.g. moves.
- flog.debug('Package {0} has no __path__.'.format(package))
- if name in sys.modules:
- return sys.modules[name]
- flog.debug('What to do here?')
-
- name = bits[0]
- module_info = imp.find_module(name, path)
- return imp.load_module(name, *module_info)
-
-
-class hooks(object):
- """
- Acts as a context manager. Saves the state of sys.modules and restores it
- after the 'with' block.
-
- Use like this:
-
- >>> from future import standard_library
- >>> with standard_library.hooks():
- ... import http.client
- >>> import requests
-
- For this to work, http.client will be scrubbed from sys.modules after the
- 'with' block. That way the modules imported in the 'with' block will
- continue to be accessible in the current namespace but not from any
- imported modules (like requests).
- """
- def __enter__(self):
- # flog.debug('Entering hooks context manager')
- self.old_sys_modules = copy.copy(sys.modules)
- self.hooks_were_installed = detect_hooks()
- # self.scrubbed = scrub_py2_sys_modules()
- install_hooks()
- return self
-
- def __exit__(self, *args):
- # flog.debug('Exiting hooks context manager')
- # restore_sys_modules(self.scrubbed)
- if not self.hooks_were_installed:
- remove_hooks()
- # scrub_future_sys_modules()
-
-# Sanity check for is_py2_stdlib_module(): We aren't replacing any
-# builtin modules names:
-if PY2:
- assert len(set(RENAMES.values()) & set(sys.builtin_module_names)) == 0
-
-
-def is_py2_stdlib_module(m):
- """
- Tries to infer whether the module m is from the Python 2 standard library.
- This may not be reliable on all systems.
- """
- if PY3:
- return False
- if not 'stdlib_path' in is_py2_stdlib_module.__dict__:
- stdlib_files = [contextlib.__file__, os.__file__, copy.__file__]
- stdlib_paths = [os.path.split(f)[0] for f in stdlib_files]
- if not len(set(stdlib_paths)) == 1:
- # This seems to happen on travis-ci.org. Very strange. We'll try to
- # ignore it.
- flog.warn('Multiple locations found for the Python standard '
- 'library: %s' % stdlib_paths)
- # Choose the first one arbitrarily
- is_py2_stdlib_module.stdlib_path = stdlib_paths[0]
-
- if m.__name__ in sys.builtin_module_names:
- return True
-
- if hasattr(m, '__file__'):
- modpath = os.path.split(m.__file__)
- if (modpath[0].startswith(is_py2_stdlib_module.stdlib_path) and
- 'site-packages' not in modpath[0]):
- return True
-
- return False
-
-
-def scrub_py2_sys_modules():
- """
- Removes any Python 2 standard library modules from ``sys.modules`` that
- would interfere with Py3-style imports using import hooks. Examples are
- modules with the same names (like urllib or email).
-
- (Note that currently import hooks are disabled for modules like these
- with ambiguous names anyway ...)
- """
- if PY3:
- return {}
- scrubbed = {}
- for modulename in REPLACED_MODULES & set(RENAMES.keys()):
- if not modulename in sys.modules:
- continue
-
- module = sys.modules[modulename]
-
- if is_py2_stdlib_module(module):
- flog.debug('Deleting (Py2) {} from sys.modules'.format(modulename))
- scrubbed[modulename] = sys.modules[modulename]
- del sys.modules[modulename]
- return scrubbed
-
-
-def scrub_future_sys_modules():
- """
- Deprecated.
- """
+ ('itertools', 'count', 'future.backports.misc', 'count'),
+ ('reprlib', 'recursive_repr', 'future.backports.misc', 'recursive_repr'),
+ ('functools', 'cmp_to_key', 'future.backports.misc', 'cmp_to_key'),
+
+# This is no use, since "import urllib.request" etc. still fails:
+# ('urllib', 'error', 'future.moves.urllib', 'error'),
+# ('urllib', 'parse', 'future.moves.urllib', 'parse'),
+# ('urllib', 'request', 'future.moves.urllib', 'request'),
+# ('urllib', 'response', 'future.moves.urllib', 'response'),
+# ('urllib', 'robotparser', 'future.moves.urllib', 'robotparser'),
+ ]
+
+
+# A minimal example of an import hook:
+# class WarnOnImport(object):
+# def __init__(self, *args):
+# self.module_names = args
+#
+# def find_module(self, fullname, path=None):
+# if fullname in self.module_names:
+# self.path = path
+# return self
+# return None
+#
+# def load_module(self, name):
+# if name in sys.modules:
+# return sys.modules[name]
+# module_info = imp.find_module(name, self.path)
+# module = imp.load_module(name, *module_info)
+# sys.modules[name] = module
+# flog.warning("Imported deprecated module %s", name)
+# return module
+
+
+class RenameImport(object):
+ """
+ A class for import hooks mapping Py3 module names etc. to the Py2 equivalents.
+ """
+ # Different RenameImport classes are created when importing this module from
+ # different source files. This causes isinstance(hook, RenameImport) checks
+ # to produce inconsistent results. We add this RENAMER attribute here so
+ # remove_hooks() and install_hooks() can find instances of these classes
+ # easily:
+ RENAMER = True
+
+ def __init__(self, old_to_new):
+ '''
+ Pass in a dictionary-like object mapping from old names to new
+ names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'}
+ '''
+ self.old_to_new = old_to_new
+ both = set(old_to_new.keys()) & set(old_to_new.values())
+ assert (len(both) == 0 and
+ len(set(old_to_new.values())) == len(old_to_new.values())), \
+ 'Ambiguity in renaming (handler not implemented)'
+ self.new_to_old = dict((new, old) for (old, new) in old_to_new.items())
+
+ def find_module(self, fullname, path=None):
+ # Handles hierarchical importing: package.module.module2
+ new_base_names = set([s.split('.')[0] for s in self.new_to_old])
+ # Before v0.12: Was: if fullname in set(self.old_to_new) | new_base_names:
+ if fullname in new_base_names:
+ return self
+ return None
+
+ def load_module(self, name):
+ path = None
+ if name in sys.modules:
+ return sys.modules[name]
+ elif name in self.new_to_old:
+ # New name. Look up the corresponding old (Py2) name:
+ oldname = self.new_to_old[name]
+ module = self._find_and_load_module(oldname)
+ # module.__future_module__ = True
+ else:
+ module = self._find_and_load_module(name)
+ # In any case, make it available under the requested (Py3) name
+ sys.modules[name] = module
+ return module
+
+ def _find_and_load_module(self, name, path=None):
+ """
+ Finds and loads it. But if there's a . in the name, handles it
+ properly.
+ """
+ bits = name.split('.')
+ while len(bits) > 1:
+ # Treat the first bit as a package
+ packagename = bits.pop(0)
+ package = self._find_and_load_module(packagename, path)
+ try:
+ path = package.__path__
+ except AttributeError:
+ # This could be e.g. moves.
+ flog.debug('Package {0} has no __path__.'.format(package))
+ if name in sys.modules:
+ return sys.modules[name]
+ flog.debug('What to do here?')
+
+ name = bits[0]
+ module_info = imp.find_module(name, path)
+ return imp.load_module(name, *module_info)
+
+
+class hooks(object):
+ """
+ Acts as a context manager. Saves the state of sys.modules and restores it
+ after the 'with' block.
+
+ Use like this:
+
+ >>> from future import standard_library
+ >>> with standard_library.hooks():
+ ... import http.client
+ >>> import requests
+
+ For this to work, http.client will be scrubbed from sys.modules after the
+ 'with' block. That way the modules imported in the 'with' block will
+ continue to be accessible in the current namespace but not from any
+ imported modules (like requests).
+ """
+ def __enter__(self):
+ # flog.debug('Entering hooks context manager')
+ self.old_sys_modules = copy.copy(sys.modules)
+ self.hooks_were_installed = detect_hooks()
+ # self.scrubbed = scrub_py2_sys_modules()
+ install_hooks()
+ return self
+
+ def __exit__(self, *args):
+ # flog.debug('Exiting hooks context manager')
+ # restore_sys_modules(self.scrubbed)
+ if not self.hooks_were_installed:
+ remove_hooks()
+ # scrub_future_sys_modules()
+
+# Sanity check for is_py2_stdlib_module(): We aren't replacing any
+# builtin modules names:
+if PY2:
+ assert len(set(RENAMES.values()) & set(sys.builtin_module_names)) == 0
+
+
+def is_py2_stdlib_module(m):
+ """
+ Tries to infer whether the module m is from the Python 2 standard library.
+ This may not be reliable on all systems.
+ """
+ if PY3:
+ return False
+ if not 'stdlib_path' in is_py2_stdlib_module.__dict__:
+ stdlib_files = [contextlib.__file__, os.__file__, copy.__file__]
+ stdlib_paths = [os.path.split(f)[0] for f in stdlib_files]
+ if not len(set(stdlib_paths)) == 1:
+ # This seems to happen on travis-ci.org. Very strange. We'll try to
+ # ignore it.
+ flog.warn('Multiple locations found for the Python standard '
+ 'library: %s' % stdlib_paths)
+ # Choose the first one arbitrarily
+ is_py2_stdlib_module.stdlib_path = stdlib_paths[0]
+
+ if m.__name__ in sys.builtin_module_names:
+ return True
+
+ if hasattr(m, '__file__'):
+ modpath = os.path.split(m.__file__)
+ if (modpath[0].startswith(is_py2_stdlib_module.stdlib_path) and
+ 'site-packages' not in modpath[0]):
+ return True
+
+ return False
+
+
+def scrub_py2_sys_modules():
+ """
+ Removes any Python 2 standard library modules from ``sys.modules`` that
+ would interfere with Py3-style imports using import hooks. Examples are
+ modules with the same names (like urllib or email).
+
+ (Note that currently import hooks are disabled for modules like these
+ with ambiguous names anyway ...)
+ """
+ if PY3:
+ return {}
+ scrubbed = {}
+ for modulename in REPLACED_MODULES & set(RENAMES.keys()):
+ if not modulename in sys.modules:
+ continue
+
+ module = sys.modules[modulename]
+
+ if is_py2_stdlib_module(module):
+ flog.debug('Deleting (Py2) {} from sys.modules'.format(modulename))
+ scrubbed[modulename] = sys.modules[modulename]
+ del sys.modules[modulename]
+ return scrubbed
+
+
+def scrub_future_sys_modules():
+ """
+ Deprecated.
+ """
return {}
-
-class suspend_hooks(object):
- """
- Acts as a context manager. Use like this:
-
- >>> from future import standard_library
- >>> standard_library.install_hooks()
- >>> import http.client
- >>> # ...
- >>> with standard_library.suspend_hooks():
- >>> import requests # incompatible with ``future``'s standard library hooks
-
- If the hooks were disabled before the context, they are not installed when
- the context is left.
- """
- def __enter__(self):
- self.hooks_were_installed = detect_hooks()
- remove_hooks()
- # self.scrubbed = scrub_future_sys_modules()
- return self
-
- def __exit__(self, *args):
- if self.hooks_were_installed:
- install_hooks()
- # restore_sys_modules(self.scrubbed)
-
-
-def restore_sys_modules(scrubbed):
- """
- Add any previously scrubbed modules back to the sys.modules cache,
- but only if it's safe to do so.
- """
- clash = set(sys.modules) & set(scrubbed)
- if len(clash) != 0:
- # If several, choose one arbitrarily to raise an exception about
- first = list(clash)[0]
- raise ImportError('future module {} clashes with Py2 module'
- .format(first))
- sys.modules.update(scrubbed)
-
-
-def install_aliases():
- """
- Monkey-patches the standard library in Py2.6/7 to provide
- aliases for better Py3 compatibility.
- """
- if PY3:
- return
- # if hasattr(install_aliases, 'run_already'):
- # return
- for (newmodname, newobjname, oldmodname, oldobjname) in MOVES:
- __import__(newmodname)
- # We look up the module in sys.modules because __import__ just returns the
- # top-level package:
- newmod = sys.modules[newmodname]
- # newmod.__future_module__ = True
-
- __import__(oldmodname)
- oldmod = sys.modules[oldmodname]
-
- obj = getattr(oldmod, oldobjname)
- setattr(newmod, newobjname, obj)
-
- # Hack for urllib so it appears to have the same structure on Py2 as on Py3
- import urllib
- from future.backports.urllib import request
- from future.backports.urllib import response
- from future.backports.urllib import parse
- from future.backports.urllib import error
- from future.backports.urllib import robotparser
- urllib.request = request
- urllib.response = response
- urllib.parse = parse
- urllib.error = error
- urllib.robotparser = robotparser
- sys.modules['urllib.request'] = request
- sys.modules['urllib.response'] = response
- sys.modules['urllib.parse'] = parse
- sys.modules['urllib.error'] = error
- sys.modules['urllib.robotparser'] = robotparser
-
- # Patch the test module so it appears to have the same structure on Py2 as on Py3
- try:
- import test
- except ImportError:
- pass
- try:
- from future.moves.test import support
- except ImportError:
- pass
- else:
- test.support = support
- sys.modules['test.support'] = support
-
- # Patch the dbm module so it appears to have the same structure on Py2 as on Py3
- try:
- import dbm
- except ImportError:
- pass
- else:
- from future.moves.dbm import dumb
- dbm.dumb = dumb
- sys.modules['dbm.dumb'] = dumb
- try:
- from future.moves.dbm import gnu
- except ImportError:
- pass
- else:
- dbm.gnu = gnu
- sys.modules['dbm.gnu'] = gnu
- try:
- from future.moves.dbm import ndbm
- except ImportError:
- pass
- else:
- dbm.ndbm = ndbm
- sys.modules['dbm.ndbm'] = ndbm
-
- # install_aliases.run_already = True
-
-
-def install_hooks():
- """
- This function installs the future.standard_library import hook into
- sys.meta_path.
- """
- if PY3:
- return
-
- install_aliases()
-
- flog.debug('sys.meta_path was: {0}'.format(sys.meta_path))
- flog.debug('Installing hooks ...')
-
- # Add it unless it's there already
- newhook = RenameImport(RENAMES)
- if not detect_hooks():
- sys.meta_path.append(newhook)
- flog.debug('sys.meta_path is now: {0}'.format(sys.meta_path))
-
-
-def enable_hooks():
- """
- Deprecated. Use install_hooks() instead. This will be removed by
- ``future`` v1.0.
- """
- install_hooks()
-
-
-def remove_hooks(scrub_sys_modules=False):
- """
- This function removes the import hook from sys.meta_path.
- """
- if PY3:
- return
- flog.debug('Uninstalling hooks ...')
- # Loop backwards, so deleting items keeps the ordering:
- for i, hook in list(enumerate(sys.meta_path))[::-1]:
- if hasattr(hook, 'RENAMER'):
- del sys.meta_path[i]
-
- # Explicit is better than implicit. In the future the interface should
- # probably change so that scrubbing the import hooks requires a separate
- # function call. Left as is for now for backward compatibility with
- # v0.11.x.
- if scrub_sys_modules:
- scrub_future_sys_modules()
-
-
-def disable_hooks():
- """
- Deprecated. Use remove_hooks() instead. This will be removed by
- ``future`` v1.0.
- """
- remove_hooks()
-
-
-def detect_hooks():
- """
- Returns True if the import hooks are installed, False if not.
- """
- flog.debug('Detecting hooks ...')
- present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path])
- if present:
- flog.debug('Detected.')
- else:
- flog.debug('Not detected.')
- return present
-
-
-# As of v0.12, this no longer happens implicitly:
-# if not PY3:
-# install_hooks()
-
-
-if not hasattr(sys, 'py2_modules'):
- sys.py2_modules = {}
-
-def cache_py2_modules():
- """
- Currently this function is unneeded, as we are not attempting to provide import hooks
- for modules with ambiguous names: email, urllib, pickle.
- """
- if len(sys.py2_modules) != 0:
- return
- assert not detect_hooks()
- import urllib
- sys.py2_modules['urllib'] = urllib
-
- import email
- sys.py2_modules['email'] = email
-
- import pickle
- sys.py2_modules['pickle'] = pickle
-
- # Not all Python installations have test module. (Anaconda doesn't, for example.)
- # try:
- # import test
- # except ImportError:
- # sys.py2_modules['test'] = None
- # sys.py2_modules['test'] = test
-
- # import dbm
- # sys.py2_modules['dbm'] = dbm
-
-
-def import_(module_name, backport=False):
- """
- Pass a (potentially dotted) module name of a Python 3 standard library
- module. This function imports the module compatibly on Py2 and Py3 and
- returns the top-level module.
-
- Example use:
- >>> http = import_('http.client')
- >>> http = import_('http.server')
- >>> urllib = import_('urllib.request')
-
- Then:
- >>> conn = http.client.HTTPConnection(...)
- >>> response = urllib.request.urlopen('http://mywebsite.com')
- >>> # etc.
-
- Use as follows:
- >>> package_name = import_(module_name)
-
- On Py3, equivalent to this:
-
- >>> import module_name
-
- On Py2, equivalent to this if backport=False:
-
- >>> from future.moves import module_name
-
- or to this if backport=True:
-
- >>> from future.backports import module_name
-
- except that it also handles dotted module names such as ``http.client``
- The effect then is like this:
-
- >>> from future.backports import module
- >>> from future.backports.module import submodule
- >>> module.submodule = submodule
-
- Note that this would be a SyntaxError in Python:
-
- >>> from future.backports import http.client
-
- """
- # Python 2.6 doesn't have importlib in the stdlib, so it requires
- # the backported ``importlib`` package from PyPI as a dependency to use
- # this function:
- import importlib
-
- if PY3:
- return __import__(module_name)
- else:
- # client.blah = blah
- # Then http.client = client
- # etc.
- if backport:
- prefix = 'future.backports'
- else:
- prefix = 'future.moves'
- parts = prefix.split('.') + module_name.split('.')
-
- modules = []
- for i, part in enumerate(parts):
- sofar = '.'.join(parts[:i+1])
- modules.append(importlib.import_module(sofar))
- for i, part in reversed(list(enumerate(parts))):
- if i == 0:
- break
- setattr(modules[i-1], part, modules[i])
-
- # Return the next-most top-level module after future.backports / future.moves:
- return modules[2]
-
-
-def from_import(module_name, *symbol_names, **kwargs):
- """
- Example use:
- >>> HTTPConnection = from_import('http.client', 'HTTPConnection')
- >>> HTTPServer = from_import('http.server', 'HTTPServer')
- >>> urlopen, urlparse = from_import('urllib.request', 'urlopen', 'urlparse')
-
- Equivalent to this on Py3:
-
- >>> from module_name import symbol_names[0], symbol_names[1], ...
-
- and this on Py2:
-
- >>> from future.moves.module_name import symbol_names[0], ...
-
- or:
-
- >>> from future.backports.module_name import symbol_names[0], ...
-
- except that it also handles dotted module names such as ``http.client``.
- """
-
- if PY3:
- return __import__(module_name)
- else:
- if 'backport' in kwargs and bool(kwargs['backport']):
- prefix = 'future.backports'
- else:
- prefix = 'future.moves'
- parts = prefix.split('.') + module_name.split('.')
- module = importlib.import_module(prefix + '.' + module_name)
- output = [getattr(module, name) for name in symbol_names]
- if len(output) == 1:
- return output[0]
- else:
- return output
-
-
-class exclude_local_folder_imports(object):
- """
- A context-manager that prevents standard library modules like configparser
- from being imported from the local python-future source folder on Py3.
-
- (This was need prior to v0.16.0 because the presence of a configparser
- folder would otherwise have prevented setuptools from running on Py3. Maybe
- it's not needed any more?)
- """
- def __init__(self, *args):
- assert len(args) > 0
- self.module_names = args
- # Disallow dotted module names like http.client:
- if any(['.' in m for m in self.module_names]):
- raise NotImplementedError('Dotted module names are not supported')
-
- def __enter__(self):
- self.old_sys_path = copy.copy(sys.path)
- self.old_sys_modules = copy.copy(sys.modules)
- if sys.version_info[0] < 3:
- return
- # The presence of all these indicates we've found our source folder,
- # because `builtins` won't have been installed in site-packages by setup.py:
- FUTURE_SOURCE_SUBFOLDERS = ['future', 'past', 'libfuturize', 'libpasteurize', 'builtins']
-
- # Look for the future source folder:
- for folder in self.old_sys_path:
- if all([os.path.exists(os.path.join(folder, subfolder))
- for subfolder in FUTURE_SOURCE_SUBFOLDERS]):
- # Found it. Remove it.
- sys.path.remove(folder)
-
- # Ensure we import the system module:
- for m in self.module_names:
- # Delete the module and any submodules from sys.modules:
- # for key in list(sys.modules):
- # if key == m or key.startswith(m + '.'):
- # try:
- # del sys.modules[key]
- # except KeyError:
- # pass
- try:
- module = __import__(m, level=0)
- except ImportError:
- # There's a problem importing the system module. E.g. the
- # winreg module is not available except on Windows.
- pass
-
- def __exit__(self, *args):
- # Restore sys.path and sys.modules:
- sys.path = self.old_sys_path
- for m in set(self.old_sys_modules.keys()) - set(sys.modules.keys()):
- sys.modules[m] = self.old_sys_modules[m]
-
-TOP_LEVEL_MODULES = ['builtins',
- 'copyreg',
- 'html',
- 'http',
- 'queue',
- 'reprlib',
- 'socketserver',
- 'test',
- 'tkinter',
- 'winreg',
- 'xmlrpc',
- '_dummy_thread',
- '_markupbase',
- '_thread',
- ]
-
-def import_top_level_modules():
- with exclude_local_folder_imports(*TOP_LEVEL_MODULES):
- for m in TOP_LEVEL_MODULES:
- try:
- __import__(m)
- except ImportError: # e.g. winreg
- pass
+
+class suspend_hooks(object):
+ """
+ Acts as a context manager. Use like this:
+
+ >>> from future import standard_library
+ >>> standard_library.install_hooks()
+ >>> import http.client
+ >>> # ...
+ >>> with standard_library.suspend_hooks():
+ >>> import requests # incompatible with ``future``'s standard library hooks
+
+ If the hooks were disabled before the context, they are not installed when
+ the context is left.
+ """
+ def __enter__(self):
+ self.hooks_were_installed = detect_hooks()
+ remove_hooks()
+ # self.scrubbed = scrub_future_sys_modules()
+ return self
+
+ def __exit__(self, *args):
+ if self.hooks_were_installed:
+ install_hooks()
+ # restore_sys_modules(self.scrubbed)
+
+
+def restore_sys_modules(scrubbed):
+ """
+ Add any previously scrubbed modules back to the sys.modules cache,
+ but only if it's safe to do so.
+ """
+ clash = set(sys.modules) & set(scrubbed)
+ if len(clash) != 0:
+ # If several, choose one arbitrarily to raise an exception about
+ first = list(clash)[0]
+ raise ImportError('future module {} clashes with Py2 module'
+ .format(first))
+ sys.modules.update(scrubbed)
+
+
+def install_aliases():
+ """
+ Monkey-patches the standard library in Py2.6/7 to provide
+ aliases for better Py3 compatibility.
+ """
+ if PY3:
+ return
+ # if hasattr(install_aliases, 'run_already'):
+ # return
+ for (newmodname, newobjname, oldmodname, oldobjname) in MOVES:
+ __import__(newmodname)
+ # We look up the module in sys.modules because __import__ just returns the
+ # top-level package:
+ newmod = sys.modules[newmodname]
+ # newmod.__future_module__ = True
+
+ __import__(oldmodname)
+ oldmod = sys.modules[oldmodname]
+
+ obj = getattr(oldmod, oldobjname)
+ setattr(newmod, newobjname, obj)
+
+ # Hack for urllib so it appears to have the same structure on Py2 as on Py3
+ import urllib
+ from future.backports.urllib import request
+ from future.backports.urllib import response
+ from future.backports.urllib import parse
+ from future.backports.urllib import error
+ from future.backports.urllib import robotparser
+ urllib.request = request
+ urllib.response = response
+ urllib.parse = parse
+ urllib.error = error
+ urllib.robotparser = robotparser
+ sys.modules['urllib.request'] = request
+ sys.modules['urllib.response'] = response
+ sys.modules['urllib.parse'] = parse
+ sys.modules['urllib.error'] = error
+ sys.modules['urllib.robotparser'] = robotparser
+
+ # Patch the test module so it appears to have the same structure on Py2 as on Py3
+ try:
+ import test
+ except ImportError:
+ pass
+ try:
+ from future.moves.test import support
+ except ImportError:
+ pass
+ else:
+ test.support = support
+ sys.modules['test.support'] = support
+
+ # Patch the dbm module so it appears to have the same structure on Py2 as on Py3
+ try:
+ import dbm
+ except ImportError:
+ pass
+ else:
+ from future.moves.dbm import dumb
+ dbm.dumb = dumb
+ sys.modules['dbm.dumb'] = dumb
+ try:
+ from future.moves.dbm import gnu
+ except ImportError:
+ pass
+ else:
+ dbm.gnu = gnu
+ sys.modules['dbm.gnu'] = gnu
+ try:
+ from future.moves.dbm import ndbm
+ except ImportError:
+ pass
+ else:
+ dbm.ndbm = ndbm
+ sys.modules['dbm.ndbm'] = ndbm
+
+ # install_aliases.run_already = True
+
+
+def install_hooks():
+ """
+ This function installs the future.standard_library import hook into
+ sys.meta_path.
+ """
+ if PY3:
+ return
+
+ install_aliases()
+
+ flog.debug('sys.meta_path was: {0}'.format(sys.meta_path))
+ flog.debug('Installing hooks ...')
+
+ # Add it unless it's there already
+ newhook = RenameImport(RENAMES)
+ if not detect_hooks():
+ sys.meta_path.append(newhook)
+ flog.debug('sys.meta_path is now: {0}'.format(sys.meta_path))
+
+
+def enable_hooks():
+ """
+ Deprecated. Use install_hooks() instead. This will be removed by
+ ``future`` v1.0.
+ """
+ install_hooks()
+
+
+def remove_hooks(scrub_sys_modules=False):
+ """
+ This function removes the import hook from sys.meta_path.
+ """
+ if PY3:
+ return
+ flog.debug('Uninstalling hooks ...')
+ # Loop backwards, so deleting items keeps the ordering:
+ for i, hook in list(enumerate(sys.meta_path))[::-1]:
+ if hasattr(hook, 'RENAMER'):
+ del sys.meta_path[i]
+
+ # Explicit is better than implicit. In the future the interface should
+ # probably change so that scrubbing the import hooks requires a separate
+ # function call. Left as is for now for backward compatibility with
+ # v0.11.x.
+ if scrub_sys_modules:
+ scrub_future_sys_modules()
+
+
+def disable_hooks():
+ """
+ Deprecated. Use remove_hooks() instead. This will be removed by
+ ``future`` v1.0.
+ """
+ remove_hooks()
+
+
+def detect_hooks():
+ """
+ Returns True if the import hooks are installed, False if not.
+ """
+ flog.debug('Detecting hooks ...')
+ present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path])
+ if present:
+ flog.debug('Detected.')
+ else:
+ flog.debug('Not detected.')
+ return present
+
+
+# As of v0.12, this no longer happens implicitly:
+# if not PY3:
+# install_hooks()
+
+
+if not hasattr(sys, 'py2_modules'):
+ sys.py2_modules = {}
+
+def cache_py2_modules():
+ """
+ Currently this function is unneeded, as we are not attempting to provide import hooks
+ for modules with ambiguous names: email, urllib, pickle.
+ """
+ if len(sys.py2_modules) != 0:
+ return
+ assert not detect_hooks()
+ import urllib
+ sys.py2_modules['urllib'] = urllib
+
+ import email
+ sys.py2_modules['email'] = email
+
+ import pickle
+ sys.py2_modules['pickle'] = pickle
+
+ # Not all Python installations have test module. (Anaconda doesn't, for example.)
+ # try:
+ # import test
+ # except ImportError:
+ # sys.py2_modules['test'] = None
+ # sys.py2_modules['test'] = test
+
+ # import dbm
+ # sys.py2_modules['dbm'] = dbm
+
+
+def import_(module_name, backport=False):
+ """
+ Pass a (potentially dotted) module name of a Python 3 standard library
+ module. This function imports the module compatibly on Py2 and Py3 and
+ returns the top-level module.
+
+ Example use:
+ >>> http = import_('http.client')
+ >>> http = import_('http.server')
+ >>> urllib = import_('urllib.request')
+
+ Then:
+ >>> conn = http.client.HTTPConnection(...)
+ >>> response = urllib.request.urlopen('http://mywebsite.com')
+ >>> # etc.
+
+ Use as follows:
+ >>> package_name = import_(module_name)
+
+ On Py3, equivalent to this:
+
+ >>> import module_name
+
+ On Py2, equivalent to this if backport=False:
+
+ >>> from future.moves import module_name
+
+ or to this if backport=True:
+
+ >>> from future.backports import module_name
+
+ except that it also handles dotted module names such as ``http.client``
+ The effect then is like this:
+
+ >>> from future.backports import module
+ >>> from future.backports.module import submodule
+ >>> module.submodule = submodule
+
+ Note that this would be a SyntaxError in Python:
+
+ >>> from future.backports import http.client
+
+ """
+ # Python 2.6 doesn't have importlib in the stdlib, so it requires
+ # the backported ``importlib`` package from PyPI as a dependency to use
+ # this function:
+ import importlib
+
+ if PY3:
+ return __import__(module_name)
+ else:
+ # client.blah = blah
+ # Then http.client = client
+ # etc.
+ if backport:
+ prefix = 'future.backports'
+ else:
+ prefix = 'future.moves'
+ parts = prefix.split('.') + module_name.split('.')
+
+ modules = []
+ for i, part in enumerate(parts):
+ sofar = '.'.join(parts[:i+1])
+ modules.append(importlib.import_module(sofar))
+ for i, part in reversed(list(enumerate(parts))):
+ if i == 0:
+ break
+ setattr(modules[i-1], part, modules[i])
+
+ # Return the next-most top-level module after future.backports / future.moves:
+ return modules[2]
+
+
+def from_import(module_name, *symbol_names, **kwargs):
+ """
+ Example use:
+ >>> HTTPConnection = from_import('http.client', 'HTTPConnection')
+ >>> HTTPServer = from_import('http.server', 'HTTPServer')
+ >>> urlopen, urlparse = from_import('urllib.request', 'urlopen', 'urlparse')
+
+ Equivalent to this on Py3:
+
+ >>> from module_name import symbol_names[0], symbol_names[1], ...
+
+ and this on Py2:
+
+ >>> from future.moves.module_name import symbol_names[0], ...
+
+ or:
+
+ >>> from future.backports.module_name import symbol_names[0], ...
+
+ except that it also handles dotted module names such as ``http.client``.
+ """
+
+ if PY3:
+ return __import__(module_name)
+ else:
+ if 'backport' in kwargs and bool(kwargs['backport']):
+ prefix = 'future.backports'
+ else:
+ prefix = 'future.moves'
+ parts = prefix.split('.') + module_name.split('.')
+ module = importlib.import_module(prefix + '.' + module_name)
+ output = [getattr(module, name) for name in symbol_names]
+ if len(output) == 1:
+ return output[0]
+ else:
+ return output
+
+
+class exclude_local_folder_imports(object):
+ """
+ A context-manager that prevents standard library modules like configparser
+ from being imported from the local python-future source folder on Py3.
+
+ (This was need prior to v0.16.0 because the presence of a configparser
+ folder would otherwise have prevented setuptools from running on Py3. Maybe
+ it's not needed any more?)
+ """
+ def __init__(self, *args):
+ assert len(args) > 0
+ self.module_names = args
+ # Disallow dotted module names like http.client:
+ if any(['.' in m for m in self.module_names]):
+ raise NotImplementedError('Dotted module names are not supported')
+
+ def __enter__(self):
+ self.old_sys_path = copy.copy(sys.path)
+ self.old_sys_modules = copy.copy(sys.modules)
+ if sys.version_info[0] < 3:
+ return
+ # The presence of all these indicates we've found our source folder,
+ # because `builtins` won't have been installed in site-packages by setup.py:
+ FUTURE_SOURCE_SUBFOLDERS = ['future', 'past', 'libfuturize', 'libpasteurize', 'builtins']
+
+ # Look for the future source folder:
+ for folder in self.old_sys_path:
+ if all([os.path.exists(os.path.join(folder, subfolder))
+ for subfolder in FUTURE_SOURCE_SUBFOLDERS]):
+ # Found it. Remove it.
+ sys.path.remove(folder)
+
+ # Ensure we import the system module:
+ for m in self.module_names:
+ # Delete the module and any submodules from sys.modules:
+ # for key in list(sys.modules):
+ # if key == m or key.startswith(m + '.'):
+ # try:
+ # del sys.modules[key]
+ # except KeyError:
+ # pass
+ try:
+ module = __import__(m, level=0)
+ except ImportError:
+ # There's a problem importing the system module. E.g. the
+ # winreg module is not available except on Windows.
+ pass
+
+ def __exit__(self, *args):
+ # Restore sys.path and sys.modules:
+ sys.path = self.old_sys_path
+ for m in set(self.old_sys_modules.keys()) - set(sys.modules.keys()):
+ sys.modules[m] = self.old_sys_modules[m]
+
+TOP_LEVEL_MODULES = ['builtins',
+ 'copyreg',
+ 'html',
+ 'http',
+ 'queue',
+ 'reprlib',
+ 'socketserver',
+ 'test',
+ 'tkinter',
+ 'winreg',
+ 'xmlrpc',
+ '_dummy_thread',
+ '_markupbase',
+ '_thread',
+ ]
+
+def import_top_level_modules():
+ with exclude_local_folder_imports(*TOP_LEVEL_MODULES):
+ for m in TOP_LEVEL_MODULES:
+ try:
+ __import__(m)
+ except ImportError: # e.g. winreg
+ pass
diff --git a/contrib/python/future/future/tests/base.py b/contrib/python/future/future/tests/base.py
index d99923daa0..4ef437baa6 100644
--- a/contrib/python/future/future/tests/base.py
+++ b/contrib/python/future/future/tests/base.py
@@ -1,539 +1,539 @@
-from __future__ import print_function, absolute_import
-import os
-import tempfile
-import unittest
-import sys
-import re
-import warnings
-import io
-from textwrap import dedent
-
-from future.utils import bind_method, PY26, PY3, PY2, PY27
-from future.moves.subprocess import check_output, STDOUT, CalledProcessError
-
-if PY26:
- import unittest2 as unittest
-
-
-def reformat_code(code):
- """
- Removes any leading \n and dedents.
- """
- if code.startswith('\n'):
- code = code[1:]
- return dedent(code)
-
-
-def order_future_lines(code):
- """
- Returns the code block with any ``__future__`` import lines sorted, and
- then any ``future`` import lines sorted, then any ``builtins`` import lines
- sorted.
-
- This only sorts the lines within the expected blocks.
-
- See test_order_future_lines() for an example.
- """
-
- # We need .splitlines(keepends=True), which doesn't exist on Py2,
- # so we use this instead:
- lines = code.split('\n')
-
- uufuture_line_numbers = [i for i, line in enumerate(lines)
- if line.startswith('from __future__ import ')]
-
- future_line_numbers = [i for i, line in enumerate(lines)
- if line.startswith('from future')
- or line.startswith('from past')]
-
- builtins_line_numbers = [i for i, line in enumerate(lines)
- if line.startswith('from builtins')]
-
- assert code.lstrip() == code, ('internal usage error: '
- 'dedent the code before calling order_future_lines()')
-
- def mymax(numbers):
- return max(numbers) if len(numbers) > 0 else 0
-
- def mymin(numbers):
- return min(numbers) if len(numbers) > 0 else float('inf')
-
- assert mymax(uufuture_line_numbers) <= mymin(future_line_numbers), \
- 'the __future__ and future imports are out of order'
-
- # assert mymax(future_line_numbers) <= mymin(builtins_line_numbers), \
- # 'the future and builtins imports are out of order'
-
- uul = sorted([lines[i] for i in uufuture_line_numbers])
- sorted_uufuture_lines = dict(zip(uufuture_line_numbers, uul))
-
- fl = sorted([lines[i] for i in future_line_numbers])
- sorted_future_lines = dict(zip(future_line_numbers, fl))
-
- bl = sorted([lines[i] for i in builtins_line_numbers])
- sorted_builtins_lines = dict(zip(builtins_line_numbers, bl))
-
- # Replace the old unsorted "from __future__ import ..." lines with the
- # new sorted ones:
- new_lines = []
- for i in range(len(lines)):
- if i in uufuture_line_numbers:
- new_lines.append(sorted_uufuture_lines[i])
- elif i in future_line_numbers:
- new_lines.append(sorted_future_lines[i])
- elif i in builtins_line_numbers:
- new_lines.append(sorted_builtins_lines[i])
- else:
- new_lines.append(lines[i])
- return '\n'.join(new_lines)
-
-
-class VerboseCalledProcessError(CalledProcessError):
- """
- Like CalledProcessError, but it displays more information (message and
- script output) for diagnosing test failures etc.
- """
- def __init__(self, msg, returncode, cmd, output=None):
- self.msg = msg
- self.returncode = returncode
- self.cmd = cmd
- self.output = output
-
- def __str__(self):
- return ("Command '%s' failed with exit status %d\nMessage: %s\nOutput: %s"
- % (self.cmd, self.returncode, self.msg, self.output))
-
-class FuturizeError(VerboseCalledProcessError):
- pass
-
-class PasteurizeError(VerboseCalledProcessError):
- pass
-
-
-class CodeHandler(unittest.TestCase):
- """
- Handy mixin for test classes for writing / reading / futurizing /
- running .py files in the test suite.
- """
- def setUp(self):
- """
- The outputs from the various futurize stages should have the
- following headers:
- """
- # After stage1:
- # TODO: use this form after implementing a fixer to consolidate
- # __future__ imports into a single line:
- # self.headers1 = """
- # from __future__ import absolute_import, division, print_function
- # """
- self.headers1 = reformat_code("""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- """)
-
- # After stage2 --all-imports:
- # TODO: use this form after implementing a fixer to consolidate
- # __future__ imports into a single line:
- # self.headers2 = """
- # from __future__ import (absolute_import, division,
- # print_function, unicode_literals)
- # from future import standard_library
- # from future.builtins import *
- # """
- self.headers2 = reformat_code("""
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
- from future import standard_library
- standard_library.install_aliases()
- from builtins import *
- """)
- self.interpreters = [sys.executable]
- self.tempdir = tempfile.mkdtemp() + os.path.sep
- pypath = os.getenv('PYTHONPATH')
- if pypath:
- self.env = {'PYTHONPATH': os.getcwd() + os.pathsep + pypath}
- else:
- self.env = {'PYTHONPATH': os.getcwd()}
-
- def convert(self, code, stages=(1, 2), all_imports=False, from3=False,
- reformat=True, run=True, conservative=False):
- """
- Converts the code block using ``futurize`` and returns the
- resulting code.
-
- Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or
- ``stage2`` to ``futurize``. Passing both stages runs ``futurize``
- with both stages by default.
-
- If from3 is False, runs ``futurize``, converting from Python 2 to
- both 2 and 3. If from3 is True, runs ``pasteurize`` to convert
- from Python 3 to both 2 and 3.
-
- Optionally reformats the code block first using the reformat() function.
-
- If run is True, runs the resulting code under all Python
- interpreters in self.interpreters.
- """
- if reformat:
- code = reformat_code(code)
- self._write_test_script(code)
- self._futurize_test_script(stages=stages, all_imports=all_imports,
- from3=from3, conservative=conservative)
- output = self._read_test_script()
- if run:
- for interpreter in self.interpreters:
- _ = self._run_test_script(interpreter=interpreter)
- return output
-
- def compare(self, output, expected, ignore_imports=True):
- """
- Compares whether the code blocks are equal. If not, raises an
- exception so the test fails. Ignores any trailing whitespace like
- blank lines.
-
- If ignore_imports is True, passes the code blocks into the
- strip_future_imports method.
-
- If one code block is a unicode string and the other a
- byte-string, it assumes the byte-string is encoded as utf-8.
- """
- if ignore_imports:
- output = self.strip_future_imports(output)
- expected = self.strip_future_imports(expected)
- if isinstance(output, bytes) and not isinstance(expected, bytes):
- output = output.decode('utf-8')
- if isinstance(expected, bytes) and not isinstance(output, bytes):
- expected = expected.decode('utf-8')
- self.assertEqual(order_future_lines(output.rstrip()),
- expected.rstrip())
-
- def strip_future_imports(self, code):
- """
- Strips any of these import lines:
-
- from __future__ import <anything>
- from future <anything>
- from future.<anything>
- from builtins <anything>
-
- or any line containing:
- install_hooks()
- or:
- install_aliases()
-
- Limitation: doesn't handle imports split across multiple lines like
- this:
-
- from __future__ import (absolute_import, division, print_function,
- unicode_literals)
- """
- output = []
- # We need .splitlines(keepends=True), which doesn't exist on Py2,
- # so we use this instead:
- for line in code.split('\n'):
- if not (line.startswith('from __future__ import ')
- or line.startswith('from future ')
- or line.startswith('from builtins ')
- or 'install_hooks()' in line
- or 'install_aliases()' in line
- # but don't match "from future_builtins" :)
- or line.startswith('from future.')):
- output.append(line)
- return '\n'.join(output)
-
- def convert_check(self, before, expected, stages=(1, 2), all_imports=False,
- ignore_imports=True, from3=False, run=True,
- conservative=False):
- """
- Convenience method that calls convert() and compare().
-
- Reformats the code blocks automatically using the reformat_code()
- function.
-
- If all_imports is passed, we add the appropriate import headers
- for the stage(s) selected to the ``expected`` code-block, so they
- needn't appear repeatedly in the test code.
-
- If ignore_imports is True, ignores the presence of any lines
- beginning:
-
- from __future__ import ...
- from future import ...
-
- for the purpose of the comparison.
- """
- output = self.convert(before, stages=stages, all_imports=all_imports,
- from3=from3, run=run, conservative=conservative)
- if all_imports:
- headers = self.headers2 if 2 in stages else self.headers1
- else:
- headers = ''
-
+from __future__ import print_function, absolute_import
+import os
+import tempfile
+import unittest
+import sys
+import re
+import warnings
+import io
+from textwrap import dedent
+
+from future.utils import bind_method, PY26, PY3, PY2, PY27
+from future.moves.subprocess import check_output, STDOUT, CalledProcessError
+
+if PY26:
+ import unittest2 as unittest
+
+
+def reformat_code(code):
+ """
+ Removes any leading \n and dedents.
+ """
+ if code.startswith('\n'):
+ code = code[1:]
+ return dedent(code)
+
+
+def order_future_lines(code):
+ """
+ Returns the code block with any ``__future__`` import lines sorted, and
+ then any ``future`` import lines sorted, then any ``builtins`` import lines
+ sorted.
+
+ This only sorts the lines within the expected blocks.
+
+ See test_order_future_lines() for an example.
+ """
+
+ # We need .splitlines(keepends=True), which doesn't exist on Py2,
+ # so we use this instead:
+ lines = code.split('\n')
+
+ uufuture_line_numbers = [i for i, line in enumerate(lines)
+ if line.startswith('from __future__ import ')]
+
+ future_line_numbers = [i for i, line in enumerate(lines)
+ if line.startswith('from future')
+ or line.startswith('from past')]
+
+ builtins_line_numbers = [i for i, line in enumerate(lines)
+ if line.startswith('from builtins')]
+
+ assert code.lstrip() == code, ('internal usage error: '
+ 'dedent the code before calling order_future_lines()')
+
+ def mymax(numbers):
+ return max(numbers) if len(numbers) > 0 else 0
+
+ def mymin(numbers):
+ return min(numbers) if len(numbers) > 0 else float('inf')
+
+ assert mymax(uufuture_line_numbers) <= mymin(future_line_numbers), \
+ 'the __future__ and future imports are out of order'
+
+ # assert mymax(future_line_numbers) <= mymin(builtins_line_numbers), \
+ # 'the future and builtins imports are out of order'
+
+ uul = sorted([lines[i] for i in uufuture_line_numbers])
+ sorted_uufuture_lines = dict(zip(uufuture_line_numbers, uul))
+
+ fl = sorted([lines[i] for i in future_line_numbers])
+ sorted_future_lines = dict(zip(future_line_numbers, fl))
+
+ bl = sorted([lines[i] for i in builtins_line_numbers])
+ sorted_builtins_lines = dict(zip(builtins_line_numbers, bl))
+
+ # Replace the old unsorted "from __future__ import ..." lines with the
+ # new sorted ones:
+ new_lines = []
+ for i in range(len(lines)):
+ if i in uufuture_line_numbers:
+ new_lines.append(sorted_uufuture_lines[i])
+ elif i in future_line_numbers:
+ new_lines.append(sorted_future_lines[i])
+ elif i in builtins_line_numbers:
+ new_lines.append(sorted_builtins_lines[i])
+ else:
+ new_lines.append(lines[i])
+ return '\n'.join(new_lines)
+
+
+class VerboseCalledProcessError(CalledProcessError):
+ """
+ Like CalledProcessError, but it displays more information (message and
+ script output) for diagnosing test failures etc.
+ """
+ def __init__(self, msg, returncode, cmd, output=None):
+ self.msg = msg
+ self.returncode = returncode
+ self.cmd = cmd
+ self.output = output
+
+ def __str__(self):
+ return ("Command '%s' failed with exit status %d\nMessage: %s\nOutput: %s"
+ % (self.cmd, self.returncode, self.msg, self.output))
+
+class FuturizeError(VerboseCalledProcessError):
+ pass
+
+class PasteurizeError(VerboseCalledProcessError):
+ pass
+
+
+class CodeHandler(unittest.TestCase):
+ """
+ Handy mixin for test classes for writing / reading / futurizing /
+ running .py files in the test suite.
+ """
+ def setUp(self):
+ """
+ The outputs from the various futurize stages should have the
+ following headers:
+ """
+ # After stage1:
+ # TODO: use this form after implementing a fixer to consolidate
+ # __future__ imports into a single line:
+ # self.headers1 = """
+ # from __future__ import absolute_import, division, print_function
+ # """
+ self.headers1 = reformat_code("""
+ from __future__ import absolute_import
+ from __future__ import division
+ from __future__ import print_function
+ """)
+
+ # After stage2 --all-imports:
+ # TODO: use this form after implementing a fixer to consolidate
+ # __future__ imports into a single line:
+ # self.headers2 = """
+ # from __future__ import (absolute_import, division,
+ # print_function, unicode_literals)
+ # from future import standard_library
+ # from future.builtins import *
+ # """
+ self.headers2 = reformat_code("""
+ from __future__ import absolute_import
+ from __future__ import division
+ from __future__ import print_function
+ from __future__ import unicode_literals
+ from future import standard_library
+ standard_library.install_aliases()
+ from builtins import *
+ """)
+ self.interpreters = [sys.executable]
+ self.tempdir = tempfile.mkdtemp() + os.path.sep
+ pypath = os.getenv('PYTHONPATH')
+ if pypath:
+ self.env = {'PYTHONPATH': os.getcwd() + os.pathsep + pypath}
+ else:
+ self.env = {'PYTHONPATH': os.getcwd()}
+
+ def convert(self, code, stages=(1, 2), all_imports=False, from3=False,
+ reformat=True, run=True, conservative=False):
+ """
+ Converts the code block using ``futurize`` and returns the
+ resulting code.
+
+ Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or
+ ``stage2`` to ``futurize``. Passing both stages runs ``futurize``
+ with both stages by default.
+
+ If from3 is False, runs ``futurize``, converting from Python 2 to
+ both 2 and 3. If from3 is True, runs ``pasteurize`` to convert
+ from Python 3 to both 2 and 3.
+
+ Optionally reformats the code block first using the reformat() function.
+
+ If run is True, runs the resulting code under all Python
+ interpreters in self.interpreters.
+ """
+ if reformat:
+ code = reformat_code(code)
+ self._write_test_script(code)
+ self._futurize_test_script(stages=stages, all_imports=all_imports,
+ from3=from3, conservative=conservative)
+ output = self._read_test_script()
+ if run:
+ for interpreter in self.interpreters:
+ _ = self._run_test_script(interpreter=interpreter)
+ return output
+
+ def compare(self, output, expected, ignore_imports=True):
+ """
+ Compares whether the code blocks are equal. If not, raises an
+ exception so the test fails. Ignores any trailing whitespace like
+ blank lines.
+
+ If ignore_imports is True, passes the code blocks into the
+ strip_future_imports method.
+
+ If one code block is a unicode string and the other a
+ byte-string, it assumes the byte-string is encoded as utf-8.
+ """
+ if ignore_imports:
+ output = self.strip_future_imports(output)
+ expected = self.strip_future_imports(expected)
+ if isinstance(output, bytes) and not isinstance(expected, bytes):
+ output = output.decode('utf-8')
+ if isinstance(expected, bytes) and not isinstance(output, bytes):
+ expected = expected.decode('utf-8')
+ self.assertEqual(order_future_lines(output.rstrip()),
+ expected.rstrip())
+
+ def strip_future_imports(self, code):
+ """
+ Strips any of these import lines:
+
+ from __future__ import <anything>
+ from future <anything>
+ from future.<anything>
+ from builtins <anything>
+
+ or any line containing:
+ install_hooks()
+ or:
+ install_aliases()
+
+ Limitation: doesn't handle imports split across multiple lines like
+ this:
+
+ from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+ """
+ output = []
+ # We need .splitlines(keepends=True), which doesn't exist on Py2,
+ # so we use this instead:
+ for line in code.split('\n'):
+ if not (line.startswith('from __future__ import ')
+ or line.startswith('from future ')
+ or line.startswith('from builtins ')
+ or 'install_hooks()' in line
+ or 'install_aliases()' in line
+ # but don't match "from future_builtins" :)
+ or line.startswith('from future.')):
+ output.append(line)
+ return '\n'.join(output)
+
+ def convert_check(self, before, expected, stages=(1, 2), all_imports=False,
+ ignore_imports=True, from3=False, run=True,
+ conservative=False):
+ """
+ Convenience method that calls convert() and compare().
+
+ Reformats the code blocks automatically using the reformat_code()
+ function.
+
+ If all_imports is passed, we add the appropriate import headers
+ for the stage(s) selected to the ``expected`` code-block, so they
+ needn't appear repeatedly in the test code.
+
+ If ignore_imports is True, ignores the presence of any lines
+ beginning:
+
+ from __future__ import ...
+ from future import ...
+
+ for the purpose of the comparison.
+ """
+ output = self.convert(before, stages=stages, all_imports=all_imports,
+ from3=from3, run=run, conservative=conservative)
+ if all_imports:
+ headers = self.headers2 if 2 in stages else self.headers1
+ else:
+ headers = ''
+
reformatted = reformat_code(expected)
if headers in reformatted:
headers = ''
self.compare(output, headers + reformatted,
- ignore_imports=ignore_imports)
-
- def unchanged(self, code, **kwargs):
- """
- Convenience method to ensure the code is unchanged by the
- futurize process.
- """
- self.convert_check(code, code, **kwargs)
-
- def _write_test_script(self, code, filename='mytestscript.py'):
- """
- Dedents the given code (a multiline string) and writes it out to
- a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py.
- """
- if isinstance(code, bytes):
- code = code.decode('utf-8')
- # Be explicit about encoding the temp file as UTF-8 (issue #63):
- with io.open(self.tempdir + filename, 'wt', encoding='utf-8') as f:
- f.write(dedent(code))
-
- def _read_test_script(self, filename='mytestscript.py'):
- with io.open(self.tempdir + filename, 'rt', encoding='utf-8') as f:
- newsource = f.read()
- return newsource
-
- def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2),
- all_imports=False, from3=False,
- conservative=False):
- params = []
- stages = list(stages)
- if all_imports:
- params.append('--all-imports')
- if from3:
- script = 'pasteurize.py'
- else:
- script = 'futurize.py'
- if stages == [1]:
- params.append('--stage1')
- elif stages == [2]:
- params.append('--stage2')
- else:
- assert stages == [1, 2]
- if conservative:
- params.append('--conservative')
- # No extra params needed
-
- # Absolute file path:
- fn = self.tempdir + filename
- call_args = [sys.executable, script] + params + ['-w', fn]
- try:
- output = check_output(call_args, stderr=STDOUT, env=self.env)
- except CalledProcessError as e:
- with open(fn) as f:
- msg = (
- 'Error running the command %s\n'
- '%s\n'
- 'Contents of file %s:\n'
- '\n'
- '%s') % (
- ' '.join(call_args),
- 'env=%s' % self.env,
- fn,
- '----\n%s\n----' % f.read(),
- )
- ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
+ ignore_imports=ignore_imports)
+
+ def unchanged(self, code, **kwargs):
+ """
+ Convenience method to ensure the code is unchanged by the
+ futurize process.
+ """
+ self.convert_check(code, code, **kwargs)
+
+ def _write_test_script(self, code, filename='mytestscript.py'):
+ """
+ Dedents the given code (a multiline string) and writes it out to
+ a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py.
+ """
+ if isinstance(code, bytes):
+ code = code.decode('utf-8')
+ # Be explicit about encoding the temp file as UTF-8 (issue #63):
+ with io.open(self.tempdir + filename, 'wt', encoding='utf-8') as f:
+ f.write(dedent(code))
+
+ def _read_test_script(self, filename='mytestscript.py'):
+ with io.open(self.tempdir + filename, 'rt', encoding='utf-8') as f:
+ newsource = f.read()
+ return newsource
+
+ def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2),
+ all_imports=False, from3=False,
+ conservative=False):
+ params = []
+ stages = list(stages)
+ if all_imports:
+ params.append('--all-imports')
+ if from3:
+ script = 'pasteurize.py'
+ else:
+ script = 'futurize.py'
+ if stages == [1]:
+ params.append('--stage1')
+ elif stages == [2]:
+ params.append('--stage2')
+ else:
+ assert stages == [1, 2]
+ if conservative:
+ params.append('--conservative')
+ # No extra params needed
+
+ # Absolute file path:
+ fn = self.tempdir + filename
+ call_args = [sys.executable, script] + params + ['-w', fn]
+ try:
+ output = check_output(call_args, stderr=STDOUT, env=self.env)
+ except CalledProcessError as e:
+ with open(fn) as f:
+ msg = (
+ 'Error running the command %s\n'
+ '%s\n'
+ 'Contents of file %s:\n'
+ '\n'
+ '%s') % (
+ ' '.join(call_args),
+ 'env=%s' % self.env,
+ fn,
+ '----\n%s\n----' % f.read(),
+ )
+ ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
if not hasattr(e, 'output'):
# The attribute CalledProcessError.output doesn't exist on Py2.6
e.output = None
- raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
- return output
-
- def _run_test_script(self, filename='mytestscript.py',
- interpreter=sys.executable):
- # Absolute file path:
- fn = self.tempdir + filename
- try:
- output = check_output([interpreter, fn],
- env=self.env, stderr=STDOUT)
- except CalledProcessError as e:
- with open(fn) as f:
- msg = (
- 'Error running the command %s\n'
- '%s\n'
- 'Contents of file %s:\n'
- '\n'
- '%s') % (
- ' '.join([interpreter, fn]),
- 'env=%s' % self.env,
- fn,
- '----\n%s\n----' % f.read(),
- )
- if not hasattr(e, 'output'):
- # The attribute CalledProcessError.output doesn't exist on Py2.6
- e.output = None
- raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output)
- return output
-
-
-# Decorator to skip some tests on Python 2.6 ...
-skip26 = unittest.skipIf(PY26, "this test is known to fail on Py2.6")
-
-
-def expectedFailurePY3(func):
- if not PY3:
- return func
- return unittest.expectedFailure(func)
-
-def expectedFailurePY26(func):
- if not PY26:
- return func
- return unittest.expectedFailure(func)
-
-
-def expectedFailurePY27(func):
- if not PY27:
- return func
- return unittest.expectedFailure(func)
-
-
-def expectedFailurePY2(func):
- if not PY2:
- return func
- return unittest.expectedFailure(func)
-
-
-# Renamed in Py3.3:
-if not hasattr(unittest.TestCase, 'assertRaisesRegex'):
- unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
-
-# From Py3.3:
-def assertRegex(self, text, expected_regex, msg=None):
- """Fail the test unless the text matches the regular expression."""
- if isinstance(expected_regex, (str, unicode)):
- assert expected_regex, "expected_regex must not be empty."
- expected_regex = re.compile(expected_regex)
- if not expected_regex.search(text):
- msg = msg or "Regex didn't match"
- msg = '%s: %r not found in %r' % (msg, expected_regex.pattern, text)
- raise self.failureException(msg)
-
-if not hasattr(unittest.TestCase, 'assertRegex'):
- bind_method(unittest.TestCase, 'assertRegex', assertRegex)
-
-class _AssertRaisesBaseContext(object):
-
- def __init__(self, expected, test_case, callable_obj=None,
- expected_regex=None):
- self.expected = expected
- self.test_case = test_case
- if callable_obj is not None:
- try:
- self.obj_name = callable_obj.__name__
- except AttributeError:
- self.obj_name = str(callable_obj)
- else:
- self.obj_name = None
- if isinstance(expected_regex, (bytes, str)):
- expected_regex = re.compile(expected_regex)
- self.expected_regex = expected_regex
- self.msg = None
-
- def _raiseFailure(self, standardMsg):
- msg = self.test_case._formatMessage(self.msg, standardMsg)
- raise self.test_case.failureException(msg)
-
- def handle(self, name, callable_obj, args, kwargs):
- """
- If callable_obj is None, assertRaises/Warns is being used as a
- context manager, so check for a 'msg' kwarg and return self.
- If callable_obj is not None, call it passing args and kwargs.
- """
- if callable_obj is None:
- self.msg = kwargs.pop('msg', None)
- return self
- with self:
- callable_obj(*args, **kwargs)
-
-class _AssertWarnsContext(_AssertRaisesBaseContext):
- """A context manager used to implement TestCase.assertWarns* methods."""
-
- def __enter__(self):
- # The __warningregistry__'s need to be in a pristine state for tests
- # to work properly.
- for v in sys.modules.values():
- if getattr(v, '__warningregistry__', None):
- v.__warningregistry__ = {}
- self.warnings_manager = warnings.catch_warnings(record=True)
- self.warnings = self.warnings_manager.__enter__()
- warnings.simplefilter("always", self.expected)
- return self
-
- def __exit__(self, exc_type, exc_value, tb):
- self.warnings_manager.__exit__(exc_type, exc_value, tb)
- if exc_type is not None:
- # let unexpected exceptions pass through
- return
- try:
- exc_name = self.expected.__name__
- except AttributeError:
- exc_name = str(self.expected)
- first_matching = None
- for m in self.warnings:
- w = m.message
- if not isinstance(w, self.expected):
- continue
- if first_matching is None:
- first_matching = w
- if (self.expected_regex is not None and
- not self.expected_regex.search(str(w))):
- continue
- # store warning for later retrieval
- self.warning = w
- self.filename = m.filename
- self.lineno = m.lineno
- return
- # Now we simply try to choose a helpful failure message
- if first_matching is not None:
- self._raiseFailure('"{}" does not match "{}"'.format(
- self.expected_regex.pattern, str(first_matching)))
- if self.obj_name:
- self._raiseFailure("{} not triggered by {}".format(exc_name,
- self.obj_name))
- else:
- self._raiseFailure("{} not triggered".format(exc_name))
-
-
-def assertWarns(self, expected_warning, callable_obj=None, *args, **kwargs):
- """Fail unless a warning of class warnClass is triggered
- by callable_obj when invoked with arguments args and keyword
- arguments kwargs. If a different type of warning is
- triggered, it will not be handled: depending on the other
- warning filtering rules in effect, it might be silenced, printed
- out, or raised as an exception.
-
- If called with callable_obj omitted or None, will return a
- context object used like this::
-
- with self.assertWarns(SomeWarning):
- do_something()
-
- An optional keyword argument 'msg' can be provided when assertWarns
- is used as a context object.
-
- The context manager keeps a reference to the first matching
- warning as the 'warning' attribute; similarly, the 'filename'
- and 'lineno' attributes give you information about the line
- of Python code from which the warning was triggered.
- This allows you to inspect the warning after the assertion::
-
- with self.assertWarns(SomeWarning) as cm:
- do_something()
- the_warning = cm.warning
- self.assertEqual(the_warning.some_attribute, 147)
- """
- context = _AssertWarnsContext(expected_warning, self, callable_obj)
- return context.handle('assertWarns', callable_obj, args, kwargs)
-
-if not hasattr(unittest.TestCase, 'assertWarns'):
- bind_method(unittest.TestCase, 'assertWarns', assertWarns)
+ raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
+ return output
+
+ def _run_test_script(self, filename='mytestscript.py',
+ interpreter=sys.executable):
+ # Absolute file path:
+ fn = self.tempdir + filename
+ try:
+ output = check_output([interpreter, fn],
+ env=self.env, stderr=STDOUT)
+ except CalledProcessError as e:
+ with open(fn) as f:
+ msg = (
+ 'Error running the command %s\n'
+ '%s\n'
+ 'Contents of file %s:\n'
+ '\n'
+ '%s') % (
+ ' '.join([interpreter, fn]),
+ 'env=%s' % self.env,
+ fn,
+ '----\n%s\n----' % f.read(),
+ )
+ if not hasattr(e, 'output'):
+ # The attribute CalledProcessError.output doesn't exist on Py2.6
+ e.output = None
+ raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output)
+ return output
+
+
+# Decorator to skip some tests on Python 2.6 ...
+skip26 = unittest.skipIf(PY26, "this test is known to fail on Py2.6")
+
+
+def expectedFailurePY3(func):
+ if not PY3:
+ return func
+ return unittest.expectedFailure(func)
+
+def expectedFailurePY26(func):
+ if not PY26:
+ return func
+ return unittest.expectedFailure(func)
+
+
+def expectedFailurePY27(func):
+ if not PY27:
+ return func
+ return unittest.expectedFailure(func)
+
+
+def expectedFailurePY2(func):
+ if not PY2:
+ return func
+ return unittest.expectedFailure(func)
+
+
+# Renamed in Py3.3:
+if not hasattr(unittest.TestCase, 'assertRaisesRegex'):
+ unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
+
+# From Py3.3:
+def assertRegex(self, text, expected_regex, msg=None):
+ """Fail the test unless the text matches the regular expression."""
+ if isinstance(expected_regex, (str, unicode)):
+ assert expected_regex, "expected_regex must not be empty."
+ expected_regex = re.compile(expected_regex)
+ if not expected_regex.search(text):
+ msg = msg or "Regex didn't match"
+ msg = '%s: %r not found in %r' % (msg, expected_regex.pattern, text)
+ raise self.failureException(msg)
+
+if not hasattr(unittest.TestCase, 'assertRegex'):
+ bind_method(unittest.TestCase, 'assertRegex', assertRegex)
+
+class _AssertRaisesBaseContext(object):
+
+ def __init__(self, expected, test_case, callable_obj=None,
+ expected_regex=None):
+ self.expected = expected
+ self.test_case = test_case
+ if callable_obj is not None:
+ try:
+ self.obj_name = callable_obj.__name__
+ except AttributeError:
+ self.obj_name = str(callable_obj)
+ else:
+ self.obj_name = None
+ if isinstance(expected_regex, (bytes, str)):
+ expected_regex = re.compile(expected_regex)
+ self.expected_regex = expected_regex
+ self.msg = None
+
+ def _raiseFailure(self, standardMsg):
+ msg = self.test_case._formatMessage(self.msg, standardMsg)
+ raise self.test_case.failureException(msg)
+
+ def handle(self, name, callable_obj, args, kwargs):
+ """
+ If callable_obj is None, assertRaises/Warns is being used as a
+ context manager, so check for a 'msg' kwarg and return self.
+ If callable_obj is not None, call it passing args and kwargs.
+ """
+ if callable_obj is None:
+ self.msg = kwargs.pop('msg', None)
+ return self
+ with self:
+ callable_obj(*args, **kwargs)
+
+class _AssertWarnsContext(_AssertRaisesBaseContext):
+ """A context manager used to implement TestCase.assertWarns* methods."""
+
+ def __enter__(self):
+ # The __warningregistry__'s need to be in a pristine state for tests
+ # to work properly.
+ for v in sys.modules.values():
+ if getattr(v, '__warningregistry__', None):
+ v.__warningregistry__ = {}
+ self.warnings_manager = warnings.catch_warnings(record=True)
+ self.warnings = self.warnings_manager.__enter__()
+ warnings.simplefilter("always", self.expected)
+ return self
+
+ def __exit__(self, exc_type, exc_value, tb):
+ self.warnings_manager.__exit__(exc_type, exc_value, tb)
+ if exc_type is not None:
+ # let unexpected exceptions pass through
+ return
+ try:
+ exc_name = self.expected.__name__
+ except AttributeError:
+ exc_name = str(self.expected)
+ first_matching = None
+ for m in self.warnings:
+ w = m.message
+ if not isinstance(w, self.expected):
+ continue
+ if first_matching is None:
+ first_matching = w
+ if (self.expected_regex is not None and
+ not self.expected_regex.search(str(w))):
+ continue
+ # store warning for later retrieval
+ self.warning = w
+ self.filename = m.filename
+ self.lineno = m.lineno
+ return
+ # Now we simply try to choose a helpful failure message
+ if first_matching is not None:
+ self._raiseFailure('"{}" does not match "{}"'.format(
+ self.expected_regex.pattern, str(first_matching)))
+ if self.obj_name:
+ self._raiseFailure("{} not triggered by {}".format(exc_name,
+ self.obj_name))
+ else:
+ self._raiseFailure("{} not triggered".format(exc_name))
+
+
+def assertWarns(self, expected_warning, callable_obj=None, *args, **kwargs):
+ """Fail unless a warning of class warnClass is triggered
+ by callable_obj when invoked with arguments args and keyword
+ arguments kwargs. If a different type of warning is
+ triggered, it will not be handled: depending on the other
+ warning filtering rules in effect, it might be silenced, printed
+ out, or raised as an exception.
+
+ If called with callable_obj omitted or None, will return a
+ context object used like this::
+
+ with self.assertWarns(SomeWarning):
+ do_something()
+
+ An optional keyword argument 'msg' can be provided when assertWarns
+ is used as a context object.
+
+ The context manager keeps a reference to the first matching
+ warning as the 'warning' attribute; similarly, the 'filename'
+ and 'lineno' attributes give you information about the line
+ of Python code from which the warning was triggered.
+ This allows you to inspect the warning after the assertion::
+
+ with self.assertWarns(SomeWarning) as cm:
+ do_something()
+ the_warning = cm.warning
+ self.assertEqual(the_warning.some_attribute, 147)
+ """
+ context = _AssertWarnsContext(expected_warning, self, callable_obj)
+ return context.handle('assertWarns', callable_obj, args, kwargs)
+
+if not hasattr(unittest.TestCase, 'assertWarns'):
+ bind_method(unittest.TestCase, 'assertWarns', assertWarns)
diff --git a/contrib/python/future/future/types/__init__.py b/contrib/python/future/future/types/__init__.py
index 58160a74b4..062507703e 100644
--- a/contrib/python/future/future/types/__init__.py
+++ b/contrib/python/future/future/types/__init__.py
@@ -1,257 +1,257 @@
-"""
-This module contains backports the data types that were significantly changed
-in the transition from Python 2 to Python 3.
-
-- an implementation of Python 3's bytes object (pure Python subclass of
- Python 2's builtin 8-bit str type)
-- an implementation of Python 3's str object (pure Python subclass of
- Python 2's builtin unicode type)
-- a backport of the range iterator from Py3 with slicing support
-
-It is used as follows::
-
- from __future__ import division, absolute_import, print_function
- from builtins import bytes, dict, int, range, str
-
-to bring in the new semantics for these functions from Python 3. And
-then, for example::
-
- b = bytes(b'ABCD')
- assert list(b) == [65, 66, 67, 68]
- assert repr(b) == "b'ABCD'"
- assert [65, 66] in b
-
- # These raise TypeErrors:
- # b + u'EFGH'
- # b.split(u'B')
- # bytes(b',').join([u'Fred', u'Bill'])
-
-
- s = str(u'ABCD')
-
- # These raise TypeErrors:
- # s.join([b'Fred', b'Bill'])
- # s.startswith(b'A')
- # b'B' in s
- # s.find(b'A')
- # s.replace(u'A', b'a')
-
- # This raises an AttributeError:
- # s.decode('utf-8')
-
- assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix)
-
-
- for i in range(10**11)[:10]:
- pass
-
-and::
-
- class VerboseList(list):
- def append(self, item):
- print('Adding an item')
- super().append(item) # new simpler super() function
-
-For more information:
----------------------
-
-- future.types.newbytes
-- future.types.newdict
-- future.types.newint
-- future.types.newobject
-- future.types.newrange
-- future.types.newstr
-
-
-Notes
-=====
-
-range()
--------
-``range`` is a custom class that backports the slicing behaviour from
-Python 3 (based on the ``xrange`` module by Dan Crosta). See the
-``newrange`` module docstring for more details.
-
-
-super()
--------
-``super()`` is based on Ryan Kelly's ``magicsuper`` module. See the
-``newsuper`` module docstring for more details.
-
-
-round()
--------
-Python 3 modifies the behaviour of ``round()`` to use "Banker's Rounding".
-See http://stackoverflow.com/a/10825998. See the ``newround`` module
-docstring for more details.
-
-"""
-
-from __future__ import absolute_import, division, print_function
-
-import functools
-from numbers import Integral
-
-from future import utils
-
-
-# Some utility functions to enforce strict type-separation of unicode str and
-# bytes:
-def disallow_types(argnums, disallowed_types):
- """
- A decorator that raises a TypeError if any of the given numbered
- arguments is of the corresponding given type (e.g. bytes or unicode
- string).
-
- For example:
-
- @disallow_types([0, 1], [unicode, bytes])
- def f(a, b):
- pass
-
- raises a TypeError when f is called if a unicode object is passed as
- `a` or a bytes object is passed as `b`.
-
+"""
+This module contains backports the data types that were significantly changed
+in the transition from Python 2 to Python 3.
+
+- an implementation of Python 3's bytes object (pure Python subclass of
+ Python 2's builtin 8-bit str type)
+- an implementation of Python 3's str object (pure Python subclass of
+ Python 2's builtin unicode type)
+- a backport of the range iterator from Py3 with slicing support
+
+It is used as follows::
+
+ from __future__ import division, absolute_import, print_function
+ from builtins import bytes, dict, int, range, str
+
+to bring in the new semantics for these functions from Python 3. And
+then, for example::
+
+ b = bytes(b'ABCD')
+ assert list(b) == [65, 66, 67, 68]
+ assert repr(b) == "b'ABCD'"
+ assert [65, 66] in b
+
+ # These raise TypeErrors:
+ # b + u'EFGH'
+ # b.split(u'B')
+ # bytes(b',').join([u'Fred', u'Bill'])
+
+
+ s = str(u'ABCD')
+
+ # These raise TypeErrors:
+ # s.join([b'Fred', b'Bill'])
+ # s.startswith(b'A')
+ # b'B' in s
+ # s.find(b'A')
+ # s.replace(u'A', b'a')
+
+ # This raises an AttributeError:
+ # s.decode('utf-8')
+
+ assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix)
+
+
+ for i in range(10**11)[:10]:
+ pass
+
+and::
+
+ class VerboseList(list):
+ def append(self, item):
+ print('Adding an item')
+ super().append(item) # new simpler super() function
+
+For more information:
+---------------------
+
+- future.types.newbytes
+- future.types.newdict
+- future.types.newint
+- future.types.newobject
+- future.types.newrange
+- future.types.newstr
+
+
+Notes
+=====
+
+range()
+-------
+``range`` is a custom class that backports the slicing behaviour from
+Python 3 (based on the ``xrange`` module by Dan Crosta). See the
+``newrange`` module docstring for more details.
+
+
+super()
+-------
+``super()`` is based on Ryan Kelly's ``magicsuper`` module. See the
+``newsuper`` module docstring for more details.
+
+
+round()
+-------
+Python 3 modifies the behaviour of ``round()`` to use "Banker's Rounding".
+See http://stackoverflow.com/a/10825998. See the ``newround`` module
+docstring for more details.
+
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import functools
+from numbers import Integral
+
+from future import utils
+
+
+# Some utility functions to enforce strict type-separation of unicode str and
+# bytes:
+def disallow_types(argnums, disallowed_types):
+ """
+ A decorator that raises a TypeError if any of the given numbered
+ arguments is of the corresponding given type (e.g. bytes or unicode
+ string).
+
+ For example:
+
+ @disallow_types([0, 1], [unicode, bytes])
+ def f(a, b):
+ pass
+
+ raises a TypeError when f is called if a unicode object is passed as
+ `a` or a bytes object is passed as `b`.
+
This also skips over keyword arguments, so
-
- @disallow_types([0, 1], [unicode, bytes])
- def g(a, b=None):
- pass
-
- doesn't raise an exception if g is called with only one argument a,
- e.g.:
-
- g(b'Byte string')
-
- Example use:
-
- >>> class newbytes(object):
- ... @disallow_types([1], [unicode])
- ... def __add__(self, other):
- ... pass
-
+
+ @disallow_types([0, 1], [unicode, bytes])
+ def g(a, b=None):
+ pass
+
+ doesn't raise an exception if g is called with only one argument a,
+ e.g.:
+
+ g(b'Byte string')
+
+ Example use:
+
+ >>> class newbytes(object):
+ ... @disallow_types([1], [unicode])
+ ... def __add__(self, other):
+ ... pass
+
>>> newbytes('1234') + u'1234' #doctest: +IGNORE_EXCEPTION_DETAIL
- Traceback (most recent call last):
- ...
- TypeError: can't concat 'bytes' to (unicode) str
- """
-
- def decorator(function):
-
- @functools.wraps(function)
- def wrapper(*args, **kwargs):
- # These imports are just for this decorator, and are defined here
- # to prevent circular imports:
- from .newbytes import newbytes
- from .newint import newint
- from .newstr import newstr
-
- errmsg = "argument can't be {0}"
- for (argnum, mytype) in zip(argnums, disallowed_types):
- # Handle the case where the type is passed as a string like 'newbytes'.
- if isinstance(mytype, str) or isinstance(mytype, bytes):
- mytype = locals()[mytype]
-
- # Only restrict kw args only if they are passed:
- if len(args) <= argnum:
- break
-
- # Here we use type() rather than isinstance() because
- # __instancecheck__ is being overridden. E.g.
- # isinstance(b'abc', newbytes) is True on Py2.
- if type(args[argnum]) == mytype:
- raise TypeError(errmsg.format(mytype))
-
- return function(*args, **kwargs)
- return wrapper
- return decorator
-
-
-def no(mytype, argnums=(1,)):
- """
- A shortcut for the disallow_types decorator that disallows only one type
- (in any position in argnums).
-
- Example use:
-
- >>> class newstr(object):
- ... @no('bytes')
- ... def __add__(self, other):
- ... pass
-
- >>> newstr(u'1234') + b'1234' #doctest: +IGNORE_EXCEPTION_DETAIL
- Traceback (most recent call last):
- ...
- TypeError: argument can't be bytes
-
- The object can also be passed directly, but passing the string helps
- to prevent circular import problems.
- """
- if isinstance(argnums, Integral):
- argnums = (argnums,)
- disallowed_types = [mytype] * len(argnums)
- return disallow_types(argnums, disallowed_types)
-
-
-def issubset(list1, list2):
- """
- Examples:
-
- >>> issubset([], [65, 66, 67])
- True
- >>> issubset([65], [65, 66, 67])
- True
- >>> issubset([65, 66], [65, 66, 67])
- True
- >>> issubset([65, 67], [65, 66, 67])
- False
- """
- n = len(list1)
- for startpos in range(len(list2) - n + 1):
- if list2[startpos:startpos+n] == list1:
- return True
- return False
-
-
-if utils.PY3:
- import builtins
- bytes = builtins.bytes
- dict = builtins.dict
- int = builtins.int
- list = builtins.list
- object = builtins.object
- range = builtins.range
- str = builtins.str
-
- # The identity mapping
- newtypes = {bytes: bytes,
- dict: dict,
- int: int,
- list: list,
- object: object,
- range: range,
- str: str}
-
- __all__ = ['newtypes']
-
-else:
-
- from .newbytes import newbytes
- from .newdict import newdict
- from .newint import newint
- from .newlist import newlist
- from .newrange import newrange
- from .newobject import newobject
- from .newstr import newstr
-
- newtypes = {bytes: newbytes,
- dict: newdict,
- int: newint,
- long: newint,
- list: newlist,
- object: newobject,
- range: newrange,
- str: newbytes,
- unicode: newstr}
-
- __all__ = ['newbytes', 'newdict', 'newint', 'newlist', 'newrange', 'newstr', 'newtypes']
+ Traceback (most recent call last):
+ ...
+ TypeError: can't concat 'bytes' to (unicode) str
+ """
+
+ def decorator(function):
+
+ @functools.wraps(function)
+ def wrapper(*args, **kwargs):
+ # These imports are just for this decorator, and are defined here
+ # to prevent circular imports:
+ from .newbytes import newbytes
+ from .newint import newint
+ from .newstr import newstr
+
+ errmsg = "argument can't be {0}"
+ for (argnum, mytype) in zip(argnums, disallowed_types):
+ # Handle the case where the type is passed as a string like 'newbytes'.
+ if isinstance(mytype, str) or isinstance(mytype, bytes):
+ mytype = locals()[mytype]
+
+ # Only restrict kw args only if they are passed:
+ if len(args) <= argnum:
+ break
+
+ # Here we use type() rather than isinstance() because
+ # __instancecheck__ is being overridden. E.g.
+ # isinstance(b'abc', newbytes) is True on Py2.
+ if type(args[argnum]) == mytype:
+ raise TypeError(errmsg.format(mytype))
+
+ return function(*args, **kwargs)
+ return wrapper
+ return decorator
+
+
+def no(mytype, argnums=(1,)):
+ """
+ A shortcut for the disallow_types decorator that disallows only one type
+ (in any position in argnums).
+
+ Example use:
+
+ >>> class newstr(object):
+ ... @no('bytes')
+ ... def __add__(self, other):
+ ... pass
+
+ >>> newstr(u'1234') + b'1234' #doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ TypeError: argument can't be bytes
+
+ The object can also be passed directly, but passing the string helps
+ to prevent circular import problems.
+ """
+ if isinstance(argnums, Integral):
+ argnums = (argnums,)
+ disallowed_types = [mytype] * len(argnums)
+ return disallow_types(argnums, disallowed_types)
+
+
+def issubset(list1, list2):
+ """
+ Examples:
+
+ >>> issubset([], [65, 66, 67])
+ True
+ >>> issubset([65], [65, 66, 67])
+ True
+ >>> issubset([65, 66], [65, 66, 67])
+ True
+ >>> issubset([65, 67], [65, 66, 67])
+ False
+ """
+ n = len(list1)
+ for startpos in range(len(list2) - n + 1):
+ if list2[startpos:startpos+n] == list1:
+ return True
+ return False
+
+
+if utils.PY3:
+ import builtins
+ bytes = builtins.bytes
+ dict = builtins.dict
+ int = builtins.int
+ list = builtins.list
+ object = builtins.object
+ range = builtins.range
+ str = builtins.str
+
+ # The identity mapping
+ newtypes = {bytes: bytes,
+ dict: dict,
+ int: int,
+ list: list,
+ object: object,
+ range: range,
+ str: str}
+
+ __all__ = ['newtypes']
+
+else:
+
+ from .newbytes import newbytes
+ from .newdict import newdict
+ from .newint import newint
+ from .newlist import newlist
+ from .newrange import newrange
+ from .newobject import newobject
+ from .newstr import newstr
+
+ newtypes = {bytes: newbytes,
+ dict: newdict,
+ int: newint,
+ long: newint,
+ list: newlist,
+ object: newobject,
+ range: newrange,
+ str: newbytes,
+ unicode: newstr}
+
+ __all__ = ['newbytes', 'newdict', 'newint', 'newlist', 'newrange', 'newstr', 'newtypes']
diff --git a/contrib/python/future/future/types/newbytes.py b/contrib/python/future/future/types/newbytes.py
index 277fd563b2..c9d584a7ca 100644
--- a/contrib/python/future/future/types/newbytes.py
+++ b/contrib/python/future/future/types/newbytes.py
@@ -1,183 +1,183 @@
-"""
-Pure-Python implementation of a Python 3-like bytes object for Python 2.
-
-Why do this? Without it, the Python 2 bytes object is a very, very
-different beast to the Python 3 bytes object.
-"""
-
-from numbers import Integral
-import string
-import copy
-
+"""
+Pure-Python implementation of a Python 3-like bytes object for Python 2.
+
+Why do this? Without it, the Python 2 bytes object is a very, very
+different beast to the Python 3 bytes object.
+"""
+
+from numbers import Integral
+import string
+import copy
+
from future.utils import istext, isbytes, PY2, PY3, with_metaclass
-from future.types import no, issubset
-from future.types.newobject import newobject
-
+from future.types import no, issubset
+from future.types.newobject import newobject
+
if PY2:
from collections import Iterable
else:
from collections.abc import Iterable
-
-
-_builtin_bytes = bytes
-
-if PY3:
- # We'll probably never use newstr on Py3 anyway...
- unicode = str
-
-
-class BaseNewBytes(type):
- def __instancecheck__(cls, instance):
- if cls == newbytes:
- return isinstance(instance, _builtin_bytes)
- else:
- return issubclass(instance.__class__, cls)
-
-
-def _newchr(x):
- if isinstance(x, str): # this happens on pypy
- return x.encode('ascii')
- else:
- return chr(x)
-
-
-class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)):
- """
- A backport of the Python 3 bytes object to Py2
- """
- def __new__(cls, *args, **kwargs):
- """
- From the Py3 bytes docstring:
-
- bytes(iterable_of_ints) -> bytes
- bytes(string, encoding[, errors]) -> bytes
- bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
- bytes(int) -> bytes object of size given by the parameter initialized with null bytes
- bytes() -> empty bytes object
-
- Construct an immutable array of bytes from:
- - an iterable yielding integers in range(256)
- - a text string encoded using the specified encoding
- - any object implementing the buffer API.
- - an integer
- """
-
- encoding = None
- errors = None
-
- if len(args) == 0:
- return super(newbytes, cls).__new__(cls)
- elif len(args) >= 2:
- args = list(args)
- if len(args) == 3:
- errors = args.pop()
- encoding=args.pop()
- # Was: elif isinstance(args[0], newbytes):
- # We use type() instead of the above because we're redefining
- # this to be True for all unicode string subclasses. Warning:
- # This may render newstr un-subclassable.
- if type(args[0]) == newbytes:
- # Special-case: for consistency with Py3.3, we return the same object
- # (with the same id) if a newbytes object is passed into the
- # newbytes constructor.
- return args[0]
- elif isinstance(args[0], _builtin_bytes):
- value = args[0]
- elif isinstance(args[0], unicode):
- try:
- if 'encoding' in kwargs:
- assert encoding is None
- encoding = kwargs['encoding']
- if 'errors' in kwargs:
- assert errors is None
- errors = kwargs['errors']
- except AssertionError:
- raise TypeError('Argument given by name and position')
- if encoding is None:
- raise TypeError('unicode string argument without an encoding')
- ###
- # Was: value = args[0].encode(**kwargs)
- # Python 2.6 string encode() method doesn't take kwargs:
- # Use this instead:
- newargs = [encoding]
- if errors is not None:
- newargs.append(errors)
- value = args[0].encode(*newargs)
- ###
+
+
+_builtin_bytes = bytes
+
+if PY3:
+ # We'll probably never use newstr on Py3 anyway...
+ unicode = str
+
+
+class BaseNewBytes(type):
+ def __instancecheck__(cls, instance):
+ if cls == newbytes:
+ return isinstance(instance, _builtin_bytes)
+ else:
+ return issubclass(instance.__class__, cls)
+
+
+def _newchr(x):
+ if isinstance(x, str): # this happens on pypy
+ return x.encode('ascii')
+ else:
+ return chr(x)
+
+
+class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)):
+ """
+ A backport of the Python 3 bytes object to Py2
+ """
+ def __new__(cls, *args, **kwargs):
+ """
+ From the Py3 bytes docstring:
+
+ bytes(iterable_of_ints) -> bytes
+ bytes(string, encoding[, errors]) -> bytes
+ bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
+ bytes(int) -> bytes object of size given by the parameter initialized with null bytes
+ bytes() -> empty bytes object
+
+ Construct an immutable array of bytes from:
+ - an iterable yielding integers in range(256)
+ - a text string encoded using the specified encoding
+ - any object implementing the buffer API.
+ - an integer
+ """
+
+ encoding = None
+ errors = None
+
+ if len(args) == 0:
+ return super(newbytes, cls).__new__(cls)
+ elif len(args) >= 2:
+ args = list(args)
+ if len(args) == 3:
+ errors = args.pop()
+ encoding=args.pop()
+ # Was: elif isinstance(args[0], newbytes):
+ # We use type() instead of the above because we're redefining
+ # this to be True for all unicode string subclasses. Warning:
+ # This may render newstr un-subclassable.
+ if type(args[0]) == newbytes:
+ # Special-case: for consistency with Py3.3, we return the same object
+ # (with the same id) if a newbytes object is passed into the
+ # newbytes constructor.
+ return args[0]
+ elif isinstance(args[0], _builtin_bytes):
+ value = args[0]
+ elif isinstance(args[0], unicode):
+ try:
+ if 'encoding' in kwargs:
+ assert encoding is None
+ encoding = kwargs['encoding']
+ if 'errors' in kwargs:
+ assert errors is None
+ errors = kwargs['errors']
+ except AssertionError:
+ raise TypeError('Argument given by name and position')
+ if encoding is None:
+ raise TypeError('unicode string argument without an encoding')
+ ###
+ # Was: value = args[0].encode(**kwargs)
+ # Python 2.6 string encode() method doesn't take kwargs:
+ # Use this instead:
+ newargs = [encoding]
+ if errors is not None:
+ newargs.append(errors)
+ value = args[0].encode(*newargs)
+ ###
elif hasattr(args[0], '__bytes__'):
value = args[0].__bytes__()
- elif isinstance(args[0], Iterable):
- if len(args[0]) == 0:
- # This could be an empty list or tuple. Return b'' as on Py3.
- value = b''
- else:
- # Was: elif len(args[0])>0 and isinstance(args[0][0], Integral):
- # # It's a list of integers
- # But then we can't index into e.g. frozensets. Try to proceed
- # anyway.
- try:
- value = bytearray([_newchr(x) for x in args[0]])
- except:
- raise ValueError('bytes must be in range(0, 256)')
- elif isinstance(args[0], Integral):
- if args[0] < 0:
- raise ValueError('negative count')
- value = b'\x00' * args[0]
- else:
- value = args[0]
- if type(value) == newbytes:
- # Above we use type(...) rather than isinstance(...) because the
- # newbytes metaclass overrides __instancecheck__.
- # oldbytes(value) gives the wrong thing on Py2: the same
- # result as str(value) on Py3, e.g. "b'abc'". (Issue #193).
- # So we handle this case separately:
- return copy.copy(value)
- else:
- return super(newbytes, cls).__new__(cls, value)
-
- def __repr__(self):
- return 'b' + super(newbytes, self).__repr__()
-
- def __str__(self):
- return 'b' + "'{0}'".format(super(newbytes, self).__str__())
-
- def __getitem__(self, y):
- value = super(newbytes, self).__getitem__(y)
- if isinstance(y, Integral):
- return ord(value)
- else:
- return newbytes(value)
-
- def __getslice__(self, *args):
- return self.__getitem__(slice(*args))
-
- def __contains__(self, key):
- if isinstance(key, int):
- newbyteskey = newbytes([key])
- # Don't use isinstance() here because we only want to catch
- # newbytes, not Python 2 str:
- elif type(key) == newbytes:
- newbyteskey = key
- else:
- newbyteskey = newbytes(key)
- return issubset(list(newbyteskey), list(self))
-
- @no(unicode)
- def __add__(self, other):
- return newbytes(super(newbytes, self).__add__(other))
-
- @no(unicode)
- def __radd__(self, left):
- return newbytes(left) + self
-
- @no(unicode)
- def __mul__(self, other):
- return newbytes(super(newbytes, self).__mul__(other))
-
- @no(unicode)
- def __rmul__(self, other):
- return newbytes(super(newbytes, self).__rmul__(other))
-
+ elif isinstance(args[0], Iterable):
+ if len(args[0]) == 0:
+ # This could be an empty list or tuple. Return b'' as on Py3.
+ value = b''
+ else:
+ # Was: elif len(args[0])>0 and isinstance(args[0][0], Integral):
+ # # It's a list of integers
+ # But then we can't index into e.g. frozensets. Try to proceed
+ # anyway.
+ try:
+ value = bytearray([_newchr(x) for x in args[0]])
+ except:
+ raise ValueError('bytes must be in range(0, 256)')
+ elif isinstance(args[0], Integral):
+ if args[0] < 0:
+ raise ValueError('negative count')
+ value = b'\x00' * args[0]
+ else:
+ value = args[0]
+ if type(value) == newbytes:
+ # Above we use type(...) rather than isinstance(...) because the
+ # newbytes metaclass overrides __instancecheck__.
+ # oldbytes(value) gives the wrong thing on Py2: the same
+ # result as str(value) on Py3, e.g. "b'abc'". (Issue #193).
+ # So we handle this case separately:
+ return copy.copy(value)
+ else:
+ return super(newbytes, cls).__new__(cls, value)
+
+ def __repr__(self):
+ return 'b' + super(newbytes, self).__repr__()
+
+ def __str__(self):
+ return 'b' + "'{0}'".format(super(newbytes, self).__str__())
+
+ def __getitem__(self, y):
+ value = super(newbytes, self).__getitem__(y)
+ if isinstance(y, Integral):
+ return ord(value)
+ else:
+ return newbytes(value)
+
+ def __getslice__(self, *args):
+ return self.__getitem__(slice(*args))
+
+ def __contains__(self, key):
+ if isinstance(key, int):
+ newbyteskey = newbytes([key])
+ # Don't use isinstance() here because we only want to catch
+ # newbytes, not Python 2 str:
+ elif type(key) == newbytes:
+ newbyteskey = key
+ else:
+ newbyteskey = newbytes(key)
+ return issubset(list(newbyteskey), list(self))
+
+ @no(unicode)
+ def __add__(self, other):
+ return newbytes(super(newbytes, self).__add__(other))
+
+ @no(unicode)
+ def __radd__(self, left):
+ return newbytes(left) + self
+
+ @no(unicode)
+ def __mul__(self, other):
+ return newbytes(super(newbytes, self).__mul__(other))
+
+ @no(unicode)
+ def __rmul__(self, other):
+ return newbytes(super(newbytes, self).__rmul__(other))
+
def __mod__(self, vals):
if isinstance(vals, newbytes):
vals = _builtin_bytes.__str__(vals)
@@ -201,260 +201,260 @@ class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)):
def __imod__(self, other):
return self.__mod__(other)
- def join(self, iterable_of_bytes):
- errmsg = 'sequence item {0}: expected bytes, {1} found'
- if isbytes(iterable_of_bytes) or istext(iterable_of_bytes):
- raise TypeError(errmsg.format(0, type(iterable_of_bytes)))
- for i, item in enumerate(iterable_of_bytes):
- if istext(item):
- raise TypeError(errmsg.format(i, type(item)))
- return newbytes(super(newbytes, self).join(iterable_of_bytes))
-
- @classmethod
- def fromhex(cls, string):
- # Only on Py2:
- return cls(string.replace(' ', '').decode('hex'))
-
- @no(unicode)
- def find(self, sub, *args):
- return super(newbytes, self).find(sub, *args)
-
- @no(unicode)
- def rfind(self, sub, *args):
- return super(newbytes, self).rfind(sub, *args)
-
- @no(unicode, (1, 2))
- def replace(self, old, new, *args):
- return newbytes(super(newbytes, self).replace(old, new, *args))
-
- def encode(self, *args):
- raise AttributeError("encode method has been disabled in newbytes")
-
- def decode(self, encoding='utf-8', errors='strict'):
- """
- Returns a newstr (i.e. unicode subclass)
-
- Decode B using the codec registered for encoding. Default encoding
- is 'utf-8'. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
- as well as any other name registered with codecs.register_error that is
- able to handle UnicodeDecodeErrors.
- """
- # Py2 str.encode() takes encoding and errors as optional parameter,
- # not keyword arguments as in Python 3 str.
-
- from future.types.newstr import newstr
-
- if errors == 'surrogateescape':
- from future.utils.surrogateescape import register_surrogateescape
- register_surrogateescape()
-
- return newstr(super(newbytes, self).decode(encoding, errors))
-
- # This is currently broken:
- # # We implement surrogateescape error handling here in addition rather
- # # than relying on the custom error handler from
- # # future.utils.surrogateescape to be registered globally, even though
- # # that is fine in the case of decoding. (But not encoding: see the
- # # comments in newstr.encode()``.)
- #
- # if errors == 'surrogateescape':
- # # Decode char by char
- # mybytes = []
- # for code in self:
- # # Code is an int
- # if 0x80 <= code <= 0xFF:
- # b = 0xDC00 + code
- # elif code <= 0x7F:
- # b = _unichr(c).decode(encoding=encoding)
- # else:
- # # # It may be a bad byte
- # # FIXME: What to do in this case? See the Py3 docs / tests.
- # # # Try swallowing it.
- # # continue
- # # print("RAISE!")
- # raise NotASurrogateError
- # mybytes.append(b)
- # return newbytes(mybytes)
- # return newbytes(super(newstr, self).decode(encoding, errors))
-
- @no(unicode)
- def startswith(self, prefix, *args):
- return super(newbytes, self).startswith(prefix, *args)
-
- @no(unicode)
- def endswith(self, prefix, *args):
- return super(newbytes, self).endswith(prefix, *args)
-
- @no(unicode)
- def split(self, sep=None, maxsplit=-1):
- # Py2 str.split() takes maxsplit as an optional parameter, not as a
- # keyword argument as in Python 3 bytes.
- parts = super(newbytes, self).split(sep, maxsplit)
- return [newbytes(part) for part in parts]
-
- def splitlines(self, keepends=False):
- """
- B.splitlines([keepends]) -> list of lines
-
- Return a list of the lines in B, breaking at line boundaries.
- Line breaks are not included in the resulting list unless keepends
- is given and true.
- """
- # Py2 str.splitlines() takes keepends as an optional parameter,
- # not as a keyword argument as in Python 3 bytes.
- parts = super(newbytes, self).splitlines(keepends)
- return [newbytes(part) for part in parts]
-
- @no(unicode)
- def rsplit(self, sep=None, maxsplit=-1):
- # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a
- # keyword argument as in Python 3 bytes.
- parts = super(newbytes, self).rsplit(sep, maxsplit)
- return [newbytes(part) for part in parts]
-
- @no(unicode)
- def partition(self, sep):
- parts = super(newbytes, self).partition(sep)
- return tuple(newbytes(part) for part in parts)
-
- @no(unicode)
- def rpartition(self, sep):
- parts = super(newbytes, self).rpartition(sep)
- return tuple(newbytes(part) for part in parts)
-
- @no(unicode, (1,))
- def rindex(self, sub, *args):
- '''
- S.rindex(sub [,start [,end]]) -> int
-
- Like S.rfind() but raise ValueError when the substring is not found.
- '''
- pos = self.rfind(sub, *args)
- if pos == -1:
- raise ValueError('substring not found')
-
- @no(unicode)
- def index(self, sub, *args):
- '''
- Returns index of sub in bytes.
- Raises ValueError if byte is not in bytes and TypeError if can't
- be converted bytes or its length is not 1.
- '''
- if isinstance(sub, int):
- if len(args) == 0:
- start, end = 0, len(self)
- elif len(args) == 1:
- start = args[0]
- elif len(args) == 2:
- start, end = args
- else:
- raise TypeError('takes at most 3 arguments')
- return list(self)[start:end].index(sub)
- if not isinstance(sub, bytes):
- try:
- sub = self.__class__(sub)
- except (TypeError, ValueError):
- raise TypeError("can't convert sub to bytes")
- try:
- return super(newbytes, self).index(sub, *args)
- except ValueError:
- raise ValueError('substring not found')
-
- def __eq__(self, other):
- if isinstance(other, (_builtin_bytes, bytearray)):
- return super(newbytes, self).__eq__(other)
- else:
- return False
-
- def __ne__(self, other):
- if isinstance(other, _builtin_bytes):
- return super(newbytes, self).__ne__(other)
- else:
- return True
-
- unorderable_err = 'unorderable types: bytes() and {0}'
-
- def __lt__(self, other):
+ def join(self, iterable_of_bytes):
+ errmsg = 'sequence item {0}: expected bytes, {1} found'
+ if isbytes(iterable_of_bytes) or istext(iterable_of_bytes):
+ raise TypeError(errmsg.format(0, type(iterable_of_bytes)))
+ for i, item in enumerate(iterable_of_bytes):
+ if istext(item):
+ raise TypeError(errmsg.format(i, type(item)))
+ return newbytes(super(newbytes, self).join(iterable_of_bytes))
+
+ @classmethod
+ def fromhex(cls, string):
+ # Only on Py2:
+ return cls(string.replace(' ', '').decode('hex'))
+
+ @no(unicode)
+ def find(self, sub, *args):
+ return super(newbytes, self).find(sub, *args)
+
+ @no(unicode)
+ def rfind(self, sub, *args):
+ return super(newbytes, self).rfind(sub, *args)
+
+ @no(unicode, (1, 2))
+ def replace(self, old, new, *args):
+ return newbytes(super(newbytes, self).replace(old, new, *args))
+
+ def encode(self, *args):
+ raise AttributeError("encode method has been disabled in newbytes")
+
+ def decode(self, encoding='utf-8', errors='strict'):
+ """
+ Returns a newstr (i.e. unicode subclass)
+
+ Decode B using the codec registered for encoding. Default encoding
+ is 'utf-8'. errors may be given to set a different error
+ handling scheme. Default is 'strict' meaning that encoding errors raise
+ a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
+ as well as any other name registered with codecs.register_error that is
+ able to handle UnicodeDecodeErrors.
+ """
+ # Py2 str.encode() takes encoding and errors as optional parameter,
+ # not keyword arguments as in Python 3 str.
+
+ from future.types.newstr import newstr
+
+ if errors == 'surrogateescape':
+ from future.utils.surrogateescape import register_surrogateescape
+ register_surrogateescape()
+
+ return newstr(super(newbytes, self).decode(encoding, errors))
+
+ # This is currently broken:
+ # # We implement surrogateescape error handling here in addition rather
+ # # than relying on the custom error handler from
+ # # future.utils.surrogateescape to be registered globally, even though
+ # # that is fine in the case of decoding. (But not encoding: see the
+ # # comments in newstr.encode()``.)
+ #
+ # if errors == 'surrogateescape':
+ # # Decode char by char
+ # mybytes = []
+ # for code in self:
+ # # Code is an int
+ # if 0x80 <= code <= 0xFF:
+ # b = 0xDC00 + code
+ # elif code <= 0x7F:
+ # b = _unichr(c).decode(encoding=encoding)
+ # else:
+ # # # It may be a bad byte
+ # # FIXME: What to do in this case? See the Py3 docs / tests.
+ # # # Try swallowing it.
+ # # continue
+ # # print("RAISE!")
+ # raise NotASurrogateError
+ # mybytes.append(b)
+ # return newbytes(mybytes)
+ # return newbytes(super(newstr, self).decode(encoding, errors))
+
+ @no(unicode)
+ def startswith(self, prefix, *args):
+ return super(newbytes, self).startswith(prefix, *args)
+
+ @no(unicode)
+ def endswith(self, prefix, *args):
+ return super(newbytes, self).endswith(prefix, *args)
+
+ @no(unicode)
+ def split(self, sep=None, maxsplit=-1):
+ # Py2 str.split() takes maxsplit as an optional parameter, not as a
+ # keyword argument as in Python 3 bytes.
+ parts = super(newbytes, self).split(sep, maxsplit)
+ return [newbytes(part) for part in parts]
+
+ def splitlines(self, keepends=False):
+ """
+ B.splitlines([keepends]) -> list of lines
+
+ Return a list of the lines in B, breaking at line boundaries.
+ Line breaks are not included in the resulting list unless keepends
+ is given and true.
+ """
+ # Py2 str.splitlines() takes keepends as an optional parameter,
+ # not as a keyword argument as in Python 3 bytes.
+ parts = super(newbytes, self).splitlines(keepends)
+ return [newbytes(part) for part in parts]
+
+ @no(unicode)
+ def rsplit(self, sep=None, maxsplit=-1):
+ # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a
+ # keyword argument as in Python 3 bytes.
+ parts = super(newbytes, self).rsplit(sep, maxsplit)
+ return [newbytes(part) for part in parts]
+
+ @no(unicode)
+ def partition(self, sep):
+ parts = super(newbytes, self).partition(sep)
+ return tuple(newbytes(part) for part in parts)
+
+ @no(unicode)
+ def rpartition(self, sep):
+ parts = super(newbytes, self).rpartition(sep)
+ return tuple(newbytes(part) for part in parts)
+
+ @no(unicode, (1,))
+ def rindex(self, sub, *args):
+ '''
+ S.rindex(sub [,start [,end]]) -> int
+
+ Like S.rfind() but raise ValueError when the substring is not found.
+ '''
+ pos = self.rfind(sub, *args)
+ if pos == -1:
+ raise ValueError('substring not found')
+
+ @no(unicode)
+ def index(self, sub, *args):
+ '''
+ Returns index of sub in bytes.
+ Raises ValueError if byte is not in bytes and TypeError if can't
+ be converted bytes or its length is not 1.
+ '''
+ if isinstance(sub, int):
+ if len(args) == 0:
+ start, end = 0, len(self)
+ elif len(args) == 1:
+ start = args[0]
+ elif len(args) == 2:
+ start, end = args
+ else:
+ raise TypeError('takes at most 3 arguments')
+ return list(self)[start:end].index(sub)
+ if not isinstance(sub, bytes):
+ try:
+ sub = self.__class__(sub)
+ except (TypeError, ValueError):
+ raise TypeError("can't convert sub to bytes")
+ try:
+ return super(newbytes, self).index(sub, *args)
+ except ValueError:
+ raise ValueError('substring not found')
+
+ def __eq__(self, other):
+ if isinstance(other, (_builtin_bytes, bytearray)):
+ return super(newbytes, self).__eq__(other)
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, _builtin_bytes):
+ return super(newbytes, self).__ne__(other)
+ else:
+ return True
+
+ unorderable_err = 'unorderable types: bytes() and {0}'
+
+ def __lt__(self, other):
if isinstance(other, _builtin_bytes):
return super(newbytes, self).__lt__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __le__(self, other):
+
+ def __le__(self, other):
if isinstance(other, _builtin_bytes):
return super(newbytes, self).__le__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __gt__(self, other):
+
+ def __gt__(self, other):
if isinstance(other, _builtin_bytes):
return super(newbytes, self).__gt__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __ge__(self, other):
+
+ def __ge__(self, other):
if isinstance(other, _builtin_bytes):
return super(newbytes, self).__ge__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __native__(self):
- # We can't just feed a newbytes object into str(), because
- # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes.
- return super(newbytes, self).__str__()
-
- def __getattribute__(self, name):
- """
- A trick to cause the ``hasattr`` builtin-fn to return False for
- the 'encode' method on Py2.
- """
- if name in ['encode', u'encode']:
- raise AttributeError("encode method has been disabled in newbytes")
- return super(newbytes, self).__getattribute__(name)
-
- @no(unicode)
- def rstrip(self, bytes_to_strip=None):
- """
- Strip trailing bytes contained in the argument.
- If the argument is omitted, strip trailing ASCII whitespace.
- """
- return newbytes(super(newbytes, self).rstrip(bytes_to_strip))
-
- @no(unicode)
- def strip(self, bytes_to_strip=None):
- """
- Strip leading and trailing bytes contained in the argument.
- If the argument is omitted, strip trailing ASCII whitespace.
- """
- return newbytes(super(newbytes, self).strip(bytes_to_strip))
-
- def lower(self):
- """
- b.lower() -> copy of b
-
- Return a copy of b with all ASCII characters converted to lowercase.
- """
- return newbytes(super(newbytes, self).lower())
-
- @no(unicode)
- def upper(self):
- """
- b.upper() -> copy of b
-
- Return a copy of b with all ASCII characters converted to uppercase.
- """
- return newbytes(super(newbytes, self).upper())
-
- @classmethod
- @no(unicode)
- def maketrans(cls, frm, to):
- """
- B.maketrans(frm, to) -> translation table
-
- Return a translation table (a bytes object of length 256) suitable
- for use in the bytes or bytearray translate method where each byte
- in frm is mapped to the byte at the same position in to.
- The bytes objects frm and to must be of the same length.
- """
- return newbytes(string.maketrans(frm, to))
-
-
-__all__ = ['newbytes']
+
+ def __native__(self):
+ # We can't just feed a newbytes object into str(), because
+ # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes.
+ return super(newbytes, self).__str__()
+
+ def __getattribute__(self, name):
+ """
+ A trick to cause the ``hasattr`` builtin-fn to return False for
+ the 'encode' method on Py2.
+ """
+ if name in ['encode', u'encode']:
+ raise AttributeError("encode method has been disabled in newbytes")
+ return super(newbytes, self).__getattribute__(name)
+
+ @no(unicode)
+ def rstrip(self, bytes_to_strip=None):
+ """
+ Strip trailing bytes contained in the argument.
+ If the argument is omitted, strip trailing ASCII whitespace.
+ """
+ return newbytes(super(newbytes, self).rstrip(bytes_to_strip))
+
+ @no(unicode)
+ def strip(self, bytes_to_strip=None):
+ """
+ Strip leading and trailing bytes contained in the argument.
+ If the argument is omitted, strip trailing ASCII whitespace.
+ """
+ return newbytes(super(newbytes, self).strip(bytes_to_strip))
+
+ def lower(self):
+ """
+ b.lower() -> copy of b
+
+ Return a copy of b with all ASCII characters converted to lowercase.
+ """
+ return newbytes(super(newbytes, self).lower())
+
+ @no(unicode)
+ def upper(self):
+ """
+ b.upper() -> copy of b
+
+ Return a copy of b with all ASCII characters converted to uppercase.
+ """
+ return newbytes(super(newbytes, self).upper())
+
+ @classmethod
+ @no(unicode)
+ def maketrans(cls, frm, to):
+ """
+ B.maketrans(frm, to) -> translation table
+
+ Return a translation table (a bytes object of length 256) suitable
+ for use in the bytes or bytearray translate method where each byte
+ in frm is mapped to the byte at the same position in to.
+ The bytes objects frm and to must be of the same length.
+ """
+ return newbytes(string.maketrans(frm, to))
+
+
+__all__ = ['newbytes']
diff --git a/contrib/python/future/future/types/newdict.py b/contrib/python/future/future/types/newdict.py
index 5bb233a066..3f3a559dd5 100644
--- a/contrib/python/future/future/types/newdict.py
+++ b/contrib/python/future/future/types/newdict.py
@@ -1,111 +1,111 @@
-"""
-A dict subclass for Python 2 that behaves like Python 3's dict
-
-Example use:
-
->>> from builtins import dict
->>> d1 = dict() # instead of {} for an empty dict
->>> d2 = dict(key1='value1', key2='value2')
-
-The keys, values and items methods now return iterators on Python 2.x
-(with set-like behaviour on Python 2.7).
-
->>> for d in (d1, d2):
-... assert not isinstance(d.keys(), list)
-... assert not isinstance(d.values(), list)
-... assert not isinstance(d.items(), list)
-"""
-
-import sys
-
-from future.utils import with_metaclass
-from future.types.newobject import newobject
-
-
-_builtin_dict = dict
-ver = sys.version_info[:2]
-
-
-class BaseNewDict(type):
- def __instancecheck__(cls, instance):
- if cls == newdict:
- return isinstance(instance, _builtin_dict)
- else:
- return issubclass(instance.__class__, cls)
-
-
-class newdict(with_metaclass(BaseNewDict, _builtin_dict)):
- """
- A backport of the Python 3 dict object to Py2
- """
- def items(self):
- """
- On Python 2.7+:
- D.items() -> a set-like object providing a view on D's items
- On Python 2.6:
- D.items() -> an iterator over D's items
- """
- if ver == (2, 7):
- return self.viewitems()
- elif ver == (2, 6):
- return self.iteritems()
- elif ver >= (3, 0):
- return self.items()
-
- def keys(self):
- """
- On Python 2.7+:
- D.keys() -> a set-like object providing a view on D's keys
- On Python 2.6:
- D.keys() -> an iterator over D's keys
- """
- if ver == (2, 7):
- return self.viewkeys()
- elif ver == (2, 6):
- return self.iterkeys()
- elif ver >= (3, 0):
- return self.keys()
-
- def values(self):
- """
- On Python 2.7+:
- D.values() -> a set-like object providing a view on D's values
- On Python 2.6:
- D.values() -> an iterator over D's values
- """
- if ver == (2, 7):
- return self.viewvalues()
- elif ver == (2, 6):
- return self.itervalues()
- elif ver >= (3, 0):
- return self.values()
-
- def __new__(cls, *args, **kwargs):
- """
- dict() -> new empty dictionary
- dict(mapping) -> new dictionary initialized from a mapping object's
- (key, value) pairs
- dict(iterable) -> new dictionary initialized as if via:
- d = {}
- for k, v in iterable:
- d[k] = v
- dict(**kwargs) -> new dictionary initialized with the name=value pairs
- in the keyword argument list. For example: dict(one=1, two=2)
- """
-
- if len(args) == 0:
- return super(newdict, cls).__new__(cls)
- elif type(args[0]) == newdict:
- value = args[0]
- else:
- value = args[0]
- return super(newdict, cls).__new__(cls, value)
-
- def __native__(self):
- """
- Hook for the future.utils.native() function
- """
- return dict(self)
-
-
-__all__ = ['newdict']
+"""
+A dict subclass for Python 2 that behaves like Python 3's dict
+
+Example use:
+
+>>> from builtins import dict
+>>> d1 = dict() # instead of {} for an empty dict
+>>> d2 = dict(key1='value1', key2='value2')
+
+The keys, values and items methods now return iterators on Python 2.x
+(with set-like behaviour on Python 2.7).
+
+>>> for d in (d1, d2):
+... assert not isinstance(d.keys(), list)
+... assert not isinstance(d.values(), list)
+... assert not isinstance(d.items(), list)
+"""
+
+import sys
+
+from future.utils import with_metaclass
+from future.types.newobject import newobject
+
+
+_builtin_dict = dict
+ver = sys.version_info[:2]
+
+
+class BaseNewDict(type):
+ def __instancecheck__(cls, instance):
+ if cls == newdict:
+ return isinstance(instance, _builtin_dict)
+ else:
+ return issubclass(instance.__class__, cls)
+
+
+class newdict(with_metaclass(BaseNewDict, _builtin_dict)):
+ """
+ A backport of the Python 3 dict object to Py2
+ """
+ def items(self):
+ """
+ On Python 2.7+:
+ D.items() -> a set-like object providing a view on D's items
+ On Python 2.6:
+ D.items() -> an iterator over D's items
+ """
+ if ver == (2, 7):
+ return self.viewitems()
+ elif ver == (2, 6):
+ return self.iteritems()
+ elif ver >= (3, 0):
+ return self.items()
+
+ def keys(self):
+ """
+ On Python 2.7+:
+ D.keys() -> a set-like object providing a view on D's keys
+ On Python 2.6:
+ D.keys() -> an iterator over D's keys
+ """
+ if ver == (2, 7):
+ return self.viewkeys()
+ elif ver == (2, 6):
+ return self.iterkeys()
+ elif ver >= (3, 0):
+ return self.keys()
+
+ def values(self):
+ """
+ On Python 2.7+:
+ D.values() -> a set-like object providing a view on D's values
+ On Python 2.6:
+ D.values() -> an iterator over D's values
+ """
+ if ver == (2, 7):
+ return self.viewvalues()
+ elif ver == (2, 6):
+ return self.itervalues()
+ elif ver >= (3, 0):
+ return self.values()
+
+ def __new__(cls, *args, **kwargs):
+ """
+ dict() -> new empty dictionary
+ dict(mapping) -> new dictionary initialized from a mapping object's
+ (key, value) pairs
+ dict(iterable) -> new dictionary initialized as if via:
+ d = {}
+ for k, v in iterable:
+ d[k] = v
+ dict(**kwargs) -> new dictionary initialized with the name=value pairs
+ in the keyword argument list. For example: dict(one=1, two=2)
+ """
+
+ if len(args) == 0:
+ return super(newdict, cls).__new__(cls)
+ elif type(args[0]) == newdict:
+ value = args[0]
+ else:
+ value = args[0]
+ return super(newdict, cls).__new__(cls, value)
+
+ def __native__(self):
+ """
+ Hook for the future.utils.native() function
+ """
+ return dict(self)
+
+
+__all__ = ['newdict']
diff --git a/contrib/python/future/future/types/newint.py b/contrib/python/future/future/types/newint.py
index 4360d42254..748dba9d23 100644
--- a/contrib/python/future/future/types/newint.py
+++ b/contrib/python/future/future/types/newint.py
@@ -1,381 +1,381 @@
-"""
-Backport of Python 3's int, based on Py2's long.
-
-They are very similar. The most notable difference is:
-
-- representation: trailing L in Python 2 removed in Python 3
-"""
-from __future__ import division
-
-import struct
-
-from future.types.newbytes import newbytes
-from future.types.newobject import newobject
-from future.utils import PY3, isint, istext, isbytes, with_metaclass, native
-
-
-if PY3:
- long = int
+"""
+Backport of Python 3's int, based on Py2's long.
+
+They are very similar. The most notable difference is:
+
+- representation: trailing L in Python 2 removed in Python 3
+"""
+from __future__ import division
+
+import struct
+
+from future.types.newbytes import newbytes
+from future.types.newobject import newobject
+from future.utils import PY3, isint, istext, isbytes, with_metaclass, native
+
+
+if PY3:
+ long = int
from collections.abc import Iterable
else:
from collections import Iterable
-
-
-class BaseNewInt(type):
- def __instancecheck__(cls, instance):
- if cls == newint:
- # Special case for Py2 short or long int
- return isinstance(instance, (int, long))
- else:
- return issubclass(instance.__class__, cls)
-
-
-class newint(with_metaclass(BaseNewInt, long)):
- """
- A backport of the Python 3 int object to Py2
- """
- def __new__(cls, x=0, base=10):
- """
- From the Py3 int docstring:
-
- | int(x=0) -> integer
- | int(x, base=10) -> integer
- |
- | Convert a number or string to an integer, or return 0 if no
- | arguments are given. If x is a number, return x.__int__(). For
- | floating point numbers, this truncates towards zero.
- |
- | If x is not a number or if base is given, then x must be a string,
- | bytes, or bytearray instance representing an integer literal in the
- | given base. The literal can be preceded by '+' or '-' and be
- | surrounded by whitespace. The base defaults to 10. Valid bases are
- | 0 and 2-36. Base 0 means to interpret the base from the string as an
- | integer literal.
- | >>> int('0b100', base=0)
- | 4
-
- """
- try:
- val = x.__int__()
- except AttributeError:
- val = x
- else:
- if not isint(val):
- raise TypeError('__int__ returned non-int ({0})'.format(
- type(val)))
-
- if base != 10:
- # Explicit base
- if not (istext(val) or isbytes(val) or isinstance(val, bytearray)):
- raise TypeError(
- "int() can't convert non-string with explicit base")
- try:
- return super(newint, cls).__new__(cls, val, base)
- except TypeError:
- return super(newint, cls).__new__(cls, newbytes(val), base)
- # After here, base is 10
- try:
- return super(newint, cls).__new__(cls, val)
- except TypeError:
- # Py2 long doesn't handle bytearray input with an explicit base, so
- # handle this here.
- # Py3: int(bytearray(b'10'), 2) == 2
- # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError
- # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError
- try:
- return super(newint, cls).__new__(cls, newbytes(val))
- except:
- raise TypeError("newint argument must be a string or a number,"
- "not '{0}'".format(type(val)))
-
- def __repr__(self):
- """
- Without the L suffix
- """
- value = super(newint, self).__repr__()
- assert value[-1] == 'L'
- return value[:-1]
-
- def __add__(self, other):
- value = super(newint, self).__add__(other)
- if value is NotImplemented:
- return long(self) + other
- return newint(value)
-
- def __radd__(self, other):
- value = super(newint, self).__radd__(other)
- if value is NotImplemented:
- return other + long(self)
- return newint(value)
-
- def __sub__(self, other):
- value = super(newint, self).__sub__(other)
- if value is NotImplemented:
- return long(self) - other
- return newint(value)
-
- def __rsub__(self, other):
- value = super(newint, self).__rsub__(other)
- if value is NotImplemented:
- return other - long(self)
- return newint(value)
-
- def __mul__(self, other):
- value = super(newint, self).__mul__(other)
- if isint(value):
- return newint(value)
- elif value is NotImplemented:
- return long(self) * other
- return value
-
- def __rmul__(self, other):
- value = super(newint, self).__rmul__(other)
- if isint(value):
- return newint(value)
- elif value is NotImplemented:
- return other * long(self)
- return value
-
- def __div__(self, other):
- # We override this rather than e.g. relying on object.__div__ or
- # long.__div__ because we want to wrap the value in a newint()
- # call if other is another int
- value = long(self) / other
- if isinstance(other, (int, long)):
- return newint(value)
- else:
- return value
-
- def __rdiv__(self, other):
- value = other / long(self)
- if isinstance(other, (int, long)):
- return newint(value)
- else:
- return value
-
- def __idiv__(self, other):
- # long has no __idiv__ method. Use __itruediv__ and cast back to
- # newint:
- value = self.__itruediv__(other)
- if isinstance(other, (int, long)):
- return newint(value)
- else:
- return value
-
- def __truediv__(self, other):
- value = super(newint, self).__truediv__(other)
- if value is NotImplemented:
- value = long(self) / other
- return value
-
- def __rtruediv__(self, other):
- return super(newint, self).__rtruediv__(other)
-
- def __itruediv__(self, other):
- # long has no __itruediv__ method
- mylong = long(self)
- mylong /= other
- return mylong
-
- def __floordiv__(self, other):
- return newint(super(newint, self).__floordiv__(other))
-
- def __rfloordiv__(self, other):
- return newint(super(newint, self).__rfloordiv__(other))
-
- def __ifloordiv__(self, other):
- # long has no __ifloordiv__ method
- mylong = long(self)
- mylong //= other
- return newint(mylong)
-
- def __mod__(self, other):
- value = super(newint, self).__mod__(other)
- if value is NotImplemented:
- return long(self) % other
- return newint(value)
-
- def __rmod__(self, other):
- value = super(newint, self).__rmod__(other)
- if value is NotImplemented:
- return other % long(self)
- return newint(value)
-
- def __divmod__(self, other):
- value = super(newint, self).__divmod__(other)
- if value is NotImplemented:
- mylong = long(self)
- return (mylong // other, mylong % other)
- return (newint(value[0]), newint(value[1]))
-
- def __rdivmod__(self, other):
- value = super(newint, self).__rdivmod__(other)
- if value is NotImplemented:
- mylong = long(self)
- return (other // mylong, other % mylong)
- return (newint(value[0]), newint(value[1]))
-
- def __pow__(self, other):
- value = super(newint, self).__pow__(other)
- if value is NotImplemented:
- return long(self) ** other
- return newint(value)
-
- def __rpow__(self, other):
- value = super(newint, self).__rpow__(other)
- if value is NotImplemented:
- return other ** long(self)
- return newint(value)
-
- def __lshift__(self, other):
- if not isint(other):
- raise TypeError(
- "unsupported operand type(s) for <<: '%s' and '%s'" %
- (type(self).__name__, type(other).__name__))
- return newint(super(newint, self).__lshift__(other))
-
- def __rshift__(self, other):
- if not isint(other):
- raise TypeError(
- "unsupported operand type(s) for >>: '%s' and '%s'" %
- (type(self).__name__, type(other).__name__))
- return newint(super(newint, self).__rshift__(other))
-
- def __and__(self, other):
- if not isint(other):
- raise TypeError(
- "unsupported operand type(s) for &: '%s' and '%s'" %
- (type(self).__name__, type(other).__name__))
- return newint(super(newint, self).__and__(other))
-
- def __or__(self, other):
- if not isint(other):
- raise TypeError(
- "unsupported operand type(s) for |: '%s' and '%s'" %
- (type(self).__name__, type(other).__name__))
- return newint(super(newint, self).__or__(other))
-
- def __xor__(self, other):
- if not isint(other):
- raise TypeError(
- "unsupported operand type(s) for ^: '%s' and '%s'" %
- (type(self).__name__, type(other).__name__))
- return newint(super(newint, self).__xor__(other))
-
- def __neg__(self):
- return newint(super(newint, self).__neg__())
-
- def __pos__(self):
- return newint(super(newint, self).__pos__())
-
- def __abs__(self):
- return newint(super(newint, self).__abs__())
-
- def __invert__(self):
- return newint(super(newint, self).__invert__())
-
- def __int__(self):
- return self
-
- def __nonzero__(self):
- return self.__bool__()
-
- def __bool__(self):
- """
- So subclasses can override this, Py3-style
- """
- return super(newint, self).__nonzero__()
-
- def __native__(self):
- return long(self)
-
- def to_bytes(self, length, byteorder='big', signed=False):
- """
- Return an array of bytes representing an integer.
-
- The integer is represented using length bytes. An OverflowError is
- raised if the integer is not representable with the given number of
- bytes.
-
- The byteorder argument determines the byte order used to represent the
- integer. If byteorder is 'big', the most significant byte is at the
- beginning of the byte array. If byteorder is 'little', the most
- significant byte is at the end of the byte array. To request the native
- byte order of the host system, use `sys.byteorder' as the byte order value.
-
- The signed keyword-only argument determines whether two's complement is
- used to represent the integer. If signed is False and a negative integer
- is given, an OverflowError is raised.
- """
- if length < 0:
- raise ValueError("length argument must be non-negative")
- if length == 0 and self == 0:
- return newbytes()
- if signed and self < 0:
- bits = length * 8
- num = (2**bits) + self
- if num <= 0:
- raise OverflowError("int too smal to convert")
- else:
- if self < 0:
- raise OverflowError("can't convert negative int to unsigned")
- num = self
- if byteorder not in ('little', 'big'):
- raise ValueError("byteorder must be either 'little' or 'big'")
- h = b'%x' % num
- s = newbytes((b'0'*(len(h) % 2) + h).zfill(length*2).decode('hex'))
- if signed:
- high_set = s[0] & 0x80
- if self > 0 and high_set:
- raise OverflowError("int too big to convert")
- if self < 0 and not high_set:
- raise OverflowError("int too small to convert")
- if len(s) > length:
- raise OverflowError("int too big to convert")
- return s if byteorder == 'big' else s[::-1]
-
- @classmethod
- def from_bytes(cls, mybytes, byteorder='big', signed=False):
- """
- Return the integer represented by the given array of bytes.
-
- The mybytes argument must either support the buffer protocol or be an
- iterable object producing bytes. Bytes and bytearray are examples of
- built-in objects that support the buffer protocol.
-
- The byteorder argument determines the byte order used to represent the
- integer. If byteorder is 'big', the most significant byte is at the
- beginning of the byte array. If byteorder is 'little', the most
- significant byte is at the end of the byte array. To request the native
- byte order of the host system, use `sys.byteorder' as the byte order value.
-
- The signed keyword-only argument indicates whether two's complement is
- used to represent the integer.
- """
- if byteorder not in ('little', 'big'):
- raise ValueError("byteorder must be either 'little' or 'big'")
- if isinstance(mybytes, unicode):
- raise TypeError("cannot convert unicode objects to bytes")
- # mybytes can also be passed as a sequence of integers on Py3.
- # Test for this:
+
+
+class BaseNewInt(type):
+ def __instancecheck__(cls, instance):
+ if cls == newint:
+ # Special case for Py2 short or long int
+ return isinstance(instance, (int, long))
+ else:
+ return issubclass(instance.__class__, cls)
+
+
+class newint(with_metaclass(BaseNewInt, long)):
+ """
+ A backport of the Python 3 int object to Py2
+ """
+ def __new__(cls, x=0, base=10):
+ """
+ From the Py3 int docstring:
+
+ | int(x=0) -> integer
+ | int(x, base=10) -> integer
+ |
+ | Convert a number or string to an integer, or return 0 if no
+ | arguments are given. If x is a number, return x.__int__(). For
+ | floating point numbers, this truncates towards zero.
+ |
+ | If x is not a number or if base is given, then x must be a string,
+ | bytes, or bytearray instance representing an integer literal in the
+ | given base. The literal can be preceded by '+' or '-' and be
+ | surrounded by whitespace. The base defaults to 10. Valid bases are
+ | 0 and 2-36. Base 0 means to interpret the base from the string as an
+ | integer literal.
+ | >>> int('0b100', base=0)
+ | 4
+
+ """
+ try:
+ val = x.__int__()
+ except AttributeError:
+ val = x
+ else:
+ if not isint(val):
+ raise TypeError('__int__ returned non-int ({0})'.format(
+ type(val)))
+
+ if base != 10:
+ # Explicit base
+ if not (istext(val) or isbytes(val) or isinstance(val, bytearray)):
+ raise TypeError(
+ "int() can't convert non-string with explicit base")
+ try:
+ return super(newint, cls).__new__(cls, val, base)
+ except TypeError:
+ return super(newint, cls).__new__(cls, newbytes(val), base)
+ # After here, base is 10
+ try:
+ return super(newint, cls).__new__(cls, val)
+ except TypeError:
+ # Py2 long doesn't handle bytearray input with an explicit base, so
+ # handle this here.
+ # Py3: int(bytearray(b'10'), 2) == 2
+ # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError
+ # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError
+ try:
+ return super(newint, cls).__new__(cls, newbytes(val))
+ except:
+ raise TypeError("newint argument must be a string or a number,"
+ "not '{0}'".format(type(val)))
+
+ def __repr__(self):
+ """
+ Without the L suffix
+ """
+ value = super(newint, self).__repr__()
+ assert value[-1] == 'L'
+ return value[:-1]
+
+ def __add__(self, other):
+ value = super(newint, self).__add__(other)
+ if value is NotImplemented:
+ return long(self) + other
+ return newint(value)
+
+ def __radd__(self, other):
+ value = super(newint, self).__radd__(other)
+ if value is NotImplemented:
+ return other + long(self)
+ return newint(value)
+
+ def __sub__(self, other):
+ value = super(newint, self).__sub__(other)
+ if value is NotImplemented:
+ return long(self) - other
+ return newint(value)
+
+ def __rsub__(self, other):
+ value = super(newint, self).__rsub__(other)
+ if value is NotImplemented:
+ return other - long(self)
+ return newint(value)
+
+ def __mul__(self, other):
+ value = super(newint, self).__mul__(other)
+ if isint(value):
+ return newint(value)
+ elif value is NotImplemented:
+ return long(self) * other
+ return value
+
+ def __rmul__(self, other):
+ value = super(newint, self).__rmul__(other)
+ if isint(value):
+ return newint(value)
+ elif value is NotImplemented:
+ return other * long(self)
+ return value
+
+ def __div__(self, other):
+ # We override this rather than e.g. relying on object.__div__ or
+ # long.__div__ because we want to wrap the value in a newint()
+ # call if other is another int
+ value = long(self) / other
+ if isinstance(other, (int, long)):
+ return newint(value)
+ else:
+ return value
+
+ def __rdiv__(self, other):
+ value = other / long(self)
+ if isinstance(other, (int, long)):
+ return newint(value)
+ else:
+ return value
+
+ def __idiv__(self, other):
+ # long has no __idiv__ method. Use __itruediv__ and cast back to
+ # newint:
+ value = self.__itruediv__(other)
+ if isinstance(other, (int, long)):
+ return newint(value)
+ else:
+ return value
+
+ def __truediv__(self, other):
+ value = super(newint, self).__truediv__(other)
+ if value is NotImplemented:
+ value = long(self) / other
+ return value
+
+ def __rtruediv__(self, other):
+ return super(newint, self).__rtruediv__(other)
+
+ def __itruediv__(self, other):
+ # long has no __itruediv__ method
+ mylong = long(self)
+ mylong /= other
+ return mylong
+
+ def __floordiv__(self, other):
+ return newint(super(newint, self).__floordiv__(other))
+
+ def __rfloordiv__(self, other):
+ return newint(super(newint, self).__rfloordiv__(other))
+
+ def __ifloordiv__(self, other):
+ # long has no __ifloordiv__ method
+ mylong = long(self)
+ mylong //= other
+ return newint(mylong)
+
+ def __mod__(self, other):
+ value = super(newint, self).__mod__(other)
+ if value is NotImplemented:
+ return long(self) % other
+ return newint(value)
+
+ def __rmod__(self, other):
+ value = super(newint, self).__rmod__(other)
+ if value is NotImplemented:
+ return other % long(self)
+ return newint(value)
+
+ def __divmod__(self, other):
+ value = super(newint, self).__divmod__(other)
+ if value is NotImplemented:
+ mylong = long(self)
+ return (mylong // other, mylong % other)
+ return (newint(value[0]), newint(value[1]))
+
+ def __rdivmod__(self, other):
+ value = super(newint, self).__rdivmod__(other)
+ if value is NotImplemented:
+ mylong = long(self)
+ return (other // mylong, other % mylong)
+ return (newint(value[0]), newint(value[1]))
+
+ def __pow__(self, other):
+ value = super(newint, self).__pow__(other)
+ if value is NotImplemented:
+ return long(self) ** other
+ return newint(value)
+
+ def __rpow__(self, other):
+ value = super(newint, self).__rpow__(other)
+ if value is NotImplemented:
+ return other ** long(self)
+ return newint(value)
+
+ def __lshift__(self, other):
+ if not isint(other):
+ raise TypeError(
+ "unsupported operand type(s) for <<: '%s' and '%s'" %
+ (type(self).__name__, type(other).__name__))
+ return newint(super(newint, self).__lshift__(other))
+
+ def __rshift__(self, other):
+ if not isint(other):
+ raise TypeError(
+ "unsupported operand type(s) for >>: '%s' and '%s'" %
+ (type(self).__name__, type(other).__name__))
+ return newint(super(newint, self).__rshift__(other))
+
+ def __and__(self, other):
+ if not isint(other):
+ raise TypeError(
+ "unsupported operand type(s) for &: '%s' and '%s'" %
+ (type(self).__name__, type(other).__name__))
+ return newint(super(newint, self).__and__(other))
+
+ def __or__(self, other):
+ if not isint(other):
+ raise TypeError(
+ "unsupported operand type(s) for |: '%s' and '%s'" %
+ (type(self).__name__, type(other).__name__))
+ return newint(super(newint, self).__or__(other))
+
+ def __xor__(self, other):
+ if not isint(other):
+ raise TypeError(
+ "unsupported operand type(s) for ^: '%s' and '%s'" %
+ (type(self).__name__, type(other).__name__))
+ return newint(super(newint, self).__xor__(other))
+
+ def __neg__(self):
+ return newint(super(newint, self).__neg__())
+
+ def __pos__(self):
+ return newint(super(newint, self).__pos__())
+
+ def __abs__(self):
+ return newint(super(newint, self).__abs__())
+
+ def __invert__(self):
+ return newint(super(newint, self).__invert__())
+
+ def __int__(self):
+ return self
+
+ def __nonzero__(self):
+ return self.__bool__()
+
+ def __bool__(self):
+ """
+ So subclasses can override this, Py3-style
+ """
+ return super(newint, self).__nonzero__()
+
+ def __native__(self):
+ return long(self)
+
+ def to_bytes(self, length, byteorder='big', signed=False):
+ """
+ Return an array of bytes representing an integer.
+
+ The integer is represented using length bytes. An OverflowError is
+ raised if the integer is not representable with the given number of
+ bytes.
+
+ The byteorder argument determines the byte order used to represent the
+ integer. If byteorder is 'big', the most significant byte is at the
+ beginning of the byte array. If byteorder is 'little', the most
+ significant byte is at the end of the byte array. To request the native
+ byte order of the host system, use `sys.byteorder' as the byte order value.
+
+ The signed keyword-only argument determines whether two's complement is
+ used to represent the integer. If signed is False and a negative integer
+ is given, an OverflowError is raised.
+ """
+ if length < 0:
+ raise ValueError("length argument must be non-negative")
+ if length == 0 and self == 0:
+ return newbytes()
+ if signed and self < 0:
+ bits = length * 8
+ num = (2**bits) + self
+ if num <= 0:
+ raise OverflowError("int too smal to convert")
+ else:
+ if self < 0:
+ raise OverflowError("can't convert negative int to unsigned")
+ num = self
+ if byteorder not in ('little', 'big'):
+ raise ValueError("byteorder must be either 'little' or 'big'")
+ h = b'%x' % num
+ s = newbytes((b'0'*(len(h) % 2) + h).zfill(length*2).decode('hex'))
+ if signed:
+ high_set = s[0] & 0x80
+ if self > 0 and high_set:
+ raise OverflowError("int too big to convert")
+ if self < 0 and not high_set:
+ raise OverflowError("int too small to convert")
+ if len(s) > length:
+ raise OverflowError("int too big to convert")
+ return s if byteorder == 'big' else s[::-1]
+
+ @classmethod
+ def from_bytes(cls, mybytes, byteorder='big', signed=False):
+ """
+ Return the integer represented by the given array of bytes.
+
+ The mybytes argument must either support the buffer protocol or be an
+ iterable object producing bytes. Bytes and bytearray are examples of
+ built-in objects that support the buffer protocol.
+
+ The byteorder argument determines the byte order used to represent the
+ integer. If byteorder is 'big', the most significant byte is at the
+ beginning of the byte array. If byteorder is 'little', the most
+ significant byte is at the end of the byte array. To request the native
+ byte order of the host system, use `sys.byteorder' as the byte order value.
+
+ The signed keyword-only argument indicates whether two's complement is
+ used to represent the integer.
+ """
+ if byteorder not in ('little', 'big'):
+ raise ValueError("byteorder must be either 'little' or 'big'")
+ if isinstance(mybytes, unicode):
+ raise TypeError("cannot convert unicode objects to bytes")
+ # mybytes can also be passed as a sequence of integers on Py3.
+ # Test for this:
elif isinstance(mybytes, Iterable):
- mybytes = newbytes(mybytes)
- b = mybytes if byteorder == 'big' else mybytes[::-1]
- if len(b) == 0:
- b = b'\x00'
- # The encode() method has been disabled by newbytes, but Py2's
- # str has it:
- num = int(native(b).encode('hex'), 16)
- if signed and (b[0] & 0x80):
- num = num - (2 ** (len(b)*8))
- return cls(num)
-
-
-# def _twos_comp(val, bits):
-# """compute the 2's compliment of int value val"""
-# if( (val&(1<<(bits-1))) != 0 ):
-# val = val - (1<<bits)
-# return val
-
-
-__all__ = ['newint']
+ mybytes = newbytes(mybytes)
+ b = mybytes if byteorder == 'big' else mybytes[::-1]
+ if len(b) == 0:
+ b = b'\x00'
+ # The encode() method has been disabled by newbytes, but Py2's
+ # str has it:
+ num = int(native(b).encode('hex'), 16)
+ if signed and (b[0] & 0x80):
+ num = num - (2 ** (len(b)*8))
+ return cls(num)
+
+
+# def _twos_comp(val, bits):
+# """compute the 2's compliment of int value val"""
+# if( (val&(1<<(bits-1))) != 0 ):
+# val = val - (1<<bits)
+# return val
+
+
+__all__ = ['newint']
diff --git a/contrib/python/future/future/types/newlist.py b/contrib/python/future/future/types/newlist.py
index 71af947816..74d8f6cebe 100644
--- a/contrib/python/future/future/types/newlist.py
+++ b/contrib/python/future/future/types/newlist.py
@@ -1,95 +1,95 @@
-"""
-A list subclass for Python 2 that behaves like Python 3's list.
-
-The primary difference is that lists have a .copy() method in Py3.
-
-Example use:
-
->>> from builtins import list
->>> l1 = list() # instead of {} for an empty list
->>> l1.append('hello')
->>> l2 = l1.copy()
-
-"""
-
-import sys
-import copy
-
-from future.utils import with_metaclass
-from future.types.newobject import newobject
-
-
-_builtin_list = list
-ver = sys.version_info[:2]
-
-
-class BaseNewList(type):
- def __instancecheck__(cls, instance):
- if cls == newlist:
- return isinstance(instance, _builtin_list)
- else:
- return issubclass(instance.__class__, cls)
-
-
-class newlist(with_metaclass(BaseNewList, _builtin_list)):
- """
- A backport of the Python 3 list object to Py2
- """
- def copy(self):
- """
- L.copy() -> list -- a shallow copy of L
- """
- return copy.copy(self)
-
- def clear(self):
- """L.clear() -> None -- remove all items from L"""
- for i in range(len(self)):
- self.pop()
-
- def __new__(cls, *args, **kwargs):
- """
- list() -> new empty list
- list(iterable) -> new list initialized from iterable's items
- """
-
- if len(args) == 0:
- return super(newlist, cls).__new__(cls)
- elif type(args[0]) == newlist:
- value = args[0]
- else:
- value = args[0]
- return super(newlist, cls).__new__(cls, value)
-
- def __add__(self, value):
- return newlist(super(newlist, self).__add__(value))
-
- def __radd__(self, left):
- " left + self "
- try:
- return newlist(left) + self
- except:
- return NotImplemented
-
- def __getitem__(self, y):
- """
- x.__getitem__(y) <==> x[y]
-
- Warning: a bug in Python 2.x prevents indexing via a slice from
- returning a newlist object.
- """
- if isinstance(y, slice):
- return newlist(super(newlist, self).__getitem__(y))
- else:
- return super(newlist, self).__getitem__(y)
-
- def __native__(self):
- """
- Hook for the future.utils.native() function
- """
- return list(self)
-
- def __nonzero__(self):
- return len(self) > 0
-
-
-__all__ = ['newlist']
+"""
+A list subclass for Python 2 that behaves like Python 3's list.
+
+The primary difference is that lists have a .copy() method in Py3.
+
+Example use:
+
+>>> from builtins import list
+>>> l1 = list() # instead of {} for an empty list
+>>> l1.append('hello')
+>>> l2 = l1.copy()
+
+"""
+
+import sys
+import copy
+
+from future.utils import with_metaclass
+from future.types.newobject import newobject
+
+
+_builtin_list = list
+ver = sys.version_info[:2]
+
+
+class BaseNewList(type):
+ def __instancecheck__(cls, instance):
+ if cls == newlist:
+ return isinstance(instance, _builtin_list)
+ else:
+ return issubclass(instance.__class__, cls)
+
+
+class newlist(with_metaclass(BaseNewList, _builtin_list)):
+ """
+ A backport of the Python 3 list object to Py2
+ """
+ def copy(self):
+ """
+ L.copy() -> list -- a shallow copy of L
+ """
+ return copy.copy(self)
+
+ def clear(self):
+ """L.clear() -> None -- remove all items from L"""
+ for i in range(len(self)):
+ self.pop()
+
+ def __new__(cls, *args, **kwargs):
+ """
+ list() -> new empty list
+ list(iterable) -> new list initialized from iterable's items
+ """
+
+ if len(args) == 0:
+ return super(newlist, cls).__new__(cls)
+ elif type(args[0]) == newlist:
+ value = args[0]
+ else:
+ value = args[0]
+ return super(newlist, cls).__new__(cls, value)
+
+ def __add__(self, value):
+ return newlist(super(newlist, self).__add__(value))
+
+ def __radd__(self, left):
+ " left + self "
+ try:
+ return newlist(left) + self
+ except:
+ return NotImplemented
+
+ def __getitem__(self, y):
+ """
+ x.__getitem__(y) <==> x[y]
+
+ Warning: a bug in Python 2.x prevents indexing via a slice from
+ returning a newlist object.
+ """
+ if isinstance(y, slice):
+ return newlist(super(newlist, self).__getitem__(y))
+ else:
+ return super(newlist, self).__getitem__(y)
+
+ def __native__(self):
+ """
+ Hook for the future.utils.native() function
+ """
+ return list(self)
+
+ def __nonzero__(self):
+ return len(self) > 0
+
+
+__all__ = ['newlist']
diff --git a/contrib/python/future/future/types/newmemoryview.py b/contrib/python/future/future/types/newmemoryview.py
index af2b18968e..09f804dcf4 100644
--- a/contrib/python/future/future/types/newmemoryview.py
+++ b/contrib/python/future/future/types/newmemoryview.py
@@ -1,29 +1,29 @@
-"""
-A pretty lame implementation of a memoryview object for Python 2.6.
-"""
-from numbers import Integral
-import string
-
+"""
+A pretty lame implementation of a memoryview object for Python 2.6.
+"""
+from numbers import Integral
+import string
+
from future.utils import istext, isbytes, PY2, with_metaclass
-from future.types import no, issubset
-
+from future.types import no, issubset
+
if PY2:
from collections import Iterable
else:
from collections.abc import Iterable
-
-# class BaseNewBytes(type):
-# def __instancecheck__(cls, instance):
-# return isinstance(instance, _builtin_bytes)
-
-
-class newmemoryview(object): # with_metaclass(BaseNewBytes, _builtin_bytes)):
- """
- A pretty lame backport of the Python 2.7 and Python 3.x
- memoryviewview object to Py2.6.
- """
- def __init__(self, obj):
- return obj
-
-
-__all__ = ['newmemoryview']
+
+# class BaseNewBytes(type):
+# def __instancecheck__(cls, instance):
+# return isinstance(instance, _builtin_bytes)
+
+
+class newmemoryview(object): # with_metaclass(BaseNewBytes, _builtin_bytes)):
+ """
+ A pretty lame backport of the Python 2.7 and Python 3.x
+ memoryviewview object to Py2.6.
+ """
+ def __init__(self, obj):
+ return obj
+
+
+__all__ = ['newmemoryview']
diff --git a/contrib/python/future/future/types/newobject.py b/contrib/python/future/future/types/newobject.py
index 962bc6e012..31b84fc12c 100644
--- a/contrib/python/future/future/types/newobject.py
+++ b/contrib/python/future/future/types/newobject.py
@@ -1,117 +1,117 @@
-"""
-An object subclass for Python 2 that gives new-style classes written in the
-style of Python 3 (with ``__next__`` and unicode-returning ``__str__`` methods)
-the appropriate Python 2-style ``next`` and ``__unicode__`` methods for compatible.
-
-Example use::
-
- from builtins import object
-
- my_unicode_str = u'Unicode string: \u5b54\u5b50'
-
- class A(object):
- def __str__(self):
- return my_unicode_str
-
- a = A()
- print(str(a))
-
- # On Python 2, these relations hold:
- assert unicode(a) == my_unicode_string
+"""
+An object subclass for Python 2 that gives new-style classes written in the
+style of Python 3 (with ``__next__`` and unicode-returning ``__str__`` methods)
+the appropriate Python 2-style ``next`` and ``__unicode__`` methods for compatible.
+
+Example use::
+
+ from builtins import object
+
+ my_unicode_str = u'Unicode string: \u5b54\u5b50'
+
+ class A(object):
+ def __str__(self):
+ return my_unicode_str
+
+ a = A()
+ print(str(a))
+
+ # On Python 2, these relations hold:
+ assert unicode(a) == my_unicode_string
assert str(a) == my_unicode_string.encode('utf-8')
-
-
-Another example::
-
- from builtins import object
-
- class Upper(object):
- def __init__(self, iterable):
- self._iter = iter(iterable)
- def __next__(self): # note the Py3 interface
- return next(self._iter).upper()
- def __iter__(self):
- return self
-
- assert list(Upper('hello')) == list('HELLO')
-
-"""
-
-
-class newobject(object):
- """
- A magical object class that provides Python 2 compatibility methods::
- next
- __unicode__
- __nonzero__
-
- Subclasses of this class can merely define the Python 3 methods (__next__,
- __str__, and __bool__).
- """
- def next(self):
- if hasattr(self, '__next__'):
- return type(self).__next__(self)
- raise TypeError('newobject is not an iterator')
-
- def __unicode__(self):
- # All subclasses of the builtin object should have __str__ defined.
- # Note that old-style classes do not have __str__ defined.
- if hasattr(self, '__str__'):
- s = type(self).__str__(self)
- else:
- s = str(self)
- if isinstance(s, unicode):
- return s
- else:
- return s.decode('utf-8')
-
- def __nonzero__(self):
- if hasattr(self, '__bool__'):
- return type(self).__bool__(self)
- if hasattr(self, '__len__'):
- return type(self).__len__(self)
- # object has no __nonzero__ method
- return True
-
- # Are these ever needed?
- # def __div__(self):
- # return self.__truediv__()
-
- # def __idiv__(self, other):
- # return self.__itruediv__(other)
-
- def __long__(self):
- if not hasattr(self, '__int__'):
- return NotImplemented
- return self.__int__() # not type(self).__int__(self)
-
- # def __new__(cls, *args, **kwargs):
- # """
- # dict() -> new empty dictionary
- # dict(mapping) -> new dictionary initialized from a mapping object's
- # (key, value) pairs
- # dict(iterable) -> new dictionary initialized as if via:
- # d = {}
- # for k, v in iterable:
- # d[k] = v
- # dict(**kwargs) -> new dictionary initialized with the name=value pairs
- # in the keyword argument list. For example: dict(one=1, two=2)
- # """
-
- # if len(args) == 0:
- # return super(newdict, cls).__new__(cls)
- # elif type(args[0]) == newdict:
- # return args[0]
- # else:
- # value = args[0]
- # return super(newdict, cls).__new__(cls, value)
-
- def __native__(self):
- """
- Hook for the future.utils.native() function
- """
- return object(self)
-
+
+
+Another example::
+
+ from builtins import object
+
+ class Upper(object):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __next__(self): # note the Py3 interface
+ return next(self._iter).upper()
+ def __iter__(self):
+ return self
+
+ assert list(Upper('hello')) == list('HELLO')
+
+"""
+
+
+class newobject(object):
+ """
+ A magical object class that provides Python 2 compatibility methods::
+ next
+ __unicode__
+ __nonzero__
+
+ Subclasses of this class can merely define the Python 3 methods (__next__,
+ __str__, and __bool__).
+ """
+ def next(self):
+ if hasattr(self, '__next__'):
+ return type(self).__next__(self)
+ raise TypeError('newobject is not an iterator')
+
+ def __unicode__(self):
+ # All subclasses of the builtin object should have __str__ defined.
+ # Note that old-style classes do not have __str__ defined.
+ if hasattr(self, '__str__'):
+ s = type(self).__str__(self)
+ else:
+ s = str(self)
+ if isinstance(s, unicode):
+ return s
+ else:
+ return s.decode('utf-8')
+
+ def __nonzero__(self):
+ if hasattr(self, '__bool__'):
+ return type(self).__bool__(self)
+ if hasattr(self, '__len__'):
+ return type(self).__len__(self)
+ # object has no __nonzero__ method
+ return True
+
+ # Are these ever needed?
+ # def __div__(self):
+ # return self.__truediv__()
+
+ # def __idiv__(self, other):
+ # return self.__itruediv__(other)
+
+ def __long__(self):
+ if not hasattr(self, '__int__'):
+ return NotImplemented
+ return self.__int__() # not type(self).__int__(self)
+
+ # def __new__(cls, *args, **kwargs):
+ # """
+ # dict() -> new empty dictionary
+ # dict(mapping) -> new dictionary initialized from a mapping object's
+ # (key, value) pairs
+ # dict(iterable) -> new dictionary initialized as if via:
+ # d = {}
+ # for k, v in iterable:
+ # d[k] = v
+ # dict(**kwargs) -> new dictionary initialized with the name=value pairs
+ # in the keyword argument list. For example: dict(one=1, two=2)
+ # """
+
+ # if len(args) == 0:
+ # return super(newdict, cls).__new__(cls)
+ # elif type(args[0]) == newdict:
+ # return args[0]
+ # else:
+ # value = args[0]
+ # return super(newdict, cls).__new__(cls, value)
+
+ def __native__(self):
+ """
+ Hook for the future.utils.native() function
+ """
+ return object(self)
+
__slots__ = []
-
-__all__ = ['newobject']
+
+__all__ = ['newobject']
diff --git a/contrib/python/future/future/types/newopen.py b/contrib/python/future/future/types/newopen.py
index 2d5c1f4b03..b75d45afb2 100644
--- a/contrib/python/future/future/types/newopen.py
+++ b/contrib/python/future/future/types/newopen.py
@@ -1,32 +1,32 @@
-"""
-A substitute for the Python 3 open() function.
-
-Note that io.open() is more complete but maybe slower. Even so, the
-completeness may be a better default. TODO: compare these
-"""
-
-_builtin_open = open
-
-class newopen(object):
- """Wrapper providing key part of Python 3 open() interface.
-
- From IPython's py3compat.py module. License: BSD.
- """
- def __init__(self, fname, mode="r", encoding="utf-8"):
- self.f = _builtin_open(fname, mode)
- self.enc = encoding
-
- def write(self, s):
- return self.f.write(s.encode(self.enc))
-
- def read(self, size=-1):
- return self.f.read(size).decode(self.enc)
-
- def close(self):
- return self.f.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, etype, value, traceback):
- self.f.close()
+"""
+A substitute for the Python 3 open() function.
+
+Note that io.open() is more complete but maybe slower. Even so, the
+completeness may be a better default. TODO: compare these
+"""
+
+_builtin_open = open
+
+class newopen(object):
+ """Wrapper providing key part of Python 3 open() interface.
+
+ From IPython's py3compat.py module. License: BSD.
+ """
+ def __init__(self, fname, mode="r", encoding="utf-8"):
+ self.f = _builtin_open(fname, mode)
+ self.enc = encoding
+
+ def write(self, s):
+ return self.f.write(s.encode(self.enc))
+
+ def read(self, size=-1):
+ return self.f.read(size).decode(self.enc)
+
+ def close(self):
+ return self.f.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, etype, value, traceback):
+ self.f.close()
diff --git a/contrib/python/future/future/types/newrange.py b/contrib/python/future/future/types/newrange.py
index b73676c6a7..eda01a5a50 100644
--- a/contrib/python/future/future/types/newrange.py
+++ b/contrib/python/future/future/types/newrange.py
@@ -1,170 +1,170 @@
-"""
-Nearly identical to xrange.py, by Dan Crosta, from
-
- https://github.com/dcrosta/xrange.git
-
-This is included here in the ``future`` package rather than pointed to as
-a dependency because there is no package for ``xrange`` on PyPI. It is
-also tweaked to appear like a regular Python 3 ``range`` object rather
-than a Python 2 xrange.
-
-From Dan Crosta's README:
-
- "A pure-Python implementation of Python 2.7's xrange built-in, with
- some features backported from the Python 3.x range built-in (which
- replaced xrange) in that version."
-
- Read more at
- https://late.am/post/2012/06/18/what-the-heck-is-an-xrange
-"""
-from __future__ import absolute_import
-
+"""
+Nearly identical to xrange.py, by Dan Crosta, from
+
+ https://github.com/dcrosta/xrange.git
+
+This is included here in the ``future`` package rather than pointed to as
+a dependency because there is no package for ``xrange`` on PyPI. It is
+also tweaked to appear like a regular Python 3 ``range`` object rather
+than a Python 2 xrange.
+
+From Dan Crosta's README:
+
+ "A pure-Python implementation of Python 2.7's xrange built-in, with
+ some features backported from the Python 3.x range built-in (which
+ replaced xrange) in that version."
+
+ Read more at
+ https://late.am/post/2012/06/18/what-the-heck-is-an-xrange
+"""
+from __future__ import absolute_import
+
from future.utils import PY2
if PY2:
from collections import Sequence, Iterator
else:
from collections.abc import Sequence, Iterator
-from itertools import islice
-
-from future.backports.misc import count # with step parameter on Py2.6
-# For backward compatibility with python-future versions < 0.14.4:
-_count = count
-
-
-class newrange(Sequence):
- """
- Pure-Python backport of Python 3's range object. See `the CPython
- documentation for details:
- <http://docs.python.org/py3k/library/functions.html#range>`_
- """
-
- def __init__(self, *args):
- if len(args) == 1:
- start, stop, step = 0, args[0], 1
- elif len(args) == 2:
- start, stop, step = args[0], args[1], 1
- elif len(args) == 3:
- start, stop, step = args
- else:
- raise TypeError('range() requires 1-3 int arguments')
-
- try:
- start, stop, step = int(start), int(stop), int(step)
- except ValueError:
- raise TypeError('an integer is required')
-
- if step == 0:
- raise ValueError('range() arg 3 must not be zero')
- elif step < 0:
- stop = min(stop, start)
- else:
- stop = max(stop, start)
-
- self._start = start
- self._stop = stop
- self._step = step
- self._len = (stop - start) // step + bool((stop - start) % step)
-
- @property
- def start(self):
- return self._start
-
- @property
- def stop(self):
- return self._stop
-
- @property
- def step(self):
- return self._step
-
- def __repr__(self):
- if self._step == 1:
- return 'range(%d, %d)' % (self._start, self._stop)
- return 'range(%d, %d, %d)' % (self._start, self._stop, self._step)
-
- def __eq__(self, other):
- return (isinstance(other, newrange) and
- (self._len == 0 == other._len or
- (self._start, self._step, self._len) ==
- (other._start, other._step, self._len)))
-
- def __len__(self):
- return self._len
-
- def index(self, value):
- """Return the 0-based position of integer `value` in
- the sequence this range represents."""
+from itertools import islice
+
+from future.backports.misc import count # with step parameter on Py2.6
+# For backward compatibility with python-future versions < 0.14.4:
+_count = count
+
+
+class newrange(Sequence):
+ """
+ Pure-Python backport of Python 3's range object. See `the CPython
+ documentation for details:
+ <http://docs.python.org/py3k/library/functions.html#range>`_
+ """
+
+ def __init__(self, *args):
+ if len(args) == 1:
+ start, stop, step = 0, args[0], 1
+ elif len(args) == 2:
+ start, stop, step = args[0], args[1], 1
+ elif len(args) == 3:
+ start, stop, step = args
+ else:
+ raise TypeError('range() requires 1-3 int arguments')
+
+ try:
+ start, stop, step = int(start), int(stop), int(step)
+ except ValueError:
+ raise TypeError('an integer is required')
+
+ if step == 0:
+ raise ValueError('range() arg 3 must not be zero')
+ elif step < 0:
+ stop = min(stop, start)
+ else:
+ stop = max(stop, start)
+
+ self._start = start
+ self._stop = stop
+ self._step = step
+ self._len = (stop - start) // step + bool((stop - start) % step)
+
+ @property
+ def start(self):
+ return self._start
+
+ @property
+ def stop(self):
+ return self._stop
+
+ @property
+ def step(self):
+ return self._step
+
+ def __repr__(self):
+ if self._step == 1:
+ return 'range(%d, %d)' % (self._start, self._stop)
+ return 'range(%d, %d, %d)' % (self._start, self._stop, self._step)
+
+ def __eq__(self, other):
+ return (isinstance(other, newrange) and
+ (self._len == 0 == other._len or
+ (self._start, self._step, self._len) ==
+ (other._start, other._step, self._len)))
+
+ def __len__(self):
+ return self._len
+
+ def index(self, value):
+ """Return the 0-based position of integer `value` in
+ the sequence this range represents."""
try:
diff = value - self._start
except TypeError:
raise ValueError('%r is not in range' % value)
- quotient, remainder = divmod(diff, self._step)
- if remainder == 0 and 0 <= quotient < self._len:
- return abs(quotient)
- raise ValueError('%r is not in range' % value)
-
- def count(self, value):
- """Return the number of ocurrences of integer `value`
- in the sequence this range represents."""
- # a value can occur exactly zero or one times
- return int(value in self)
-
- def __contains__(self, value):
- """Return ``True`` if the integer `value` occurs in
- the sequence this range represents."""
- try:
- self.index(value)
- return True
- except ValueError:
- return False
-
- def __reversed__(self):
- return iter(self[::-1])
-
- def __getitem__(self, index):
- """Return the element at position ``index`` in the sequence
- this range represents, or raise :class:`IndexError` if the
- position is out of range."""
- if isinstance(index, slice):
- return self.__getitem_slice(index)
- if index < 0:
- # negative indexes access from the end
- index = self._len + index
- if index < 0 or index >= self._len:
- raise IndexError('range object index out of range')
- return self._start + index * self._step
-
- def __getitem_slice(self, slce):
- """Return a range which represents the requested slce
- of the sequence represented by this range.
- """
- scaled_indices = (self._step * n for n in slce.indices(self._len))
- start_offset, stop_offset, new_step = scaled_indices
- return newrange(self._start + start_offset,
- self._start + stop_offset,
- new_step)
-
- def __iter__(self):
- """Return an iterator which enumerates the elements of the
- sequence this range represents."""
- return range_iterator(self)
-
-
-class range_iterator(Iterator):
- """An iterator for a :class:`range`.
- """
- def __init__(self, range_):
- self._stepper = islice(count(range_.start, range_.step), len(range_))
-
- def __iter__(self):
- return self
-
+ quotient, remainder = divmod(diff, self._step)
+ if remainder == 0 and 0 <= quotient < self._len:
+ return abs(quotient)
+ raise ValueError('%r is not in range' % value)
+
+ def count(self, value):
+ """Return the number of ocurrences of integer `value`
+ in the sequence this range represents."""
+ # a value can occur exactly zero or one times
+ return int(value in self)
+
+ def __contains__(self, value):
+ """Return ``True`` if the integer `value` occurs in
+ the sequence this range represents."""
+ try:
+ self.index(value)
+ return True
+ except ValueError:
+ return False
+
+ def __reversed__(self):
+ return iter(self[::-1])
+
+ def __getitem__(self, index):
+ """Return the element at position ``index`` in the sequence
+ this range represents, or raise :class:`IndexError` if the
+ position is out of range."""
+ if isinstance(index, slice):
+ return self.__getitem_slice(index)
+ if index < 0:
+ # negative indexes access from the end
+ index = self._len + index
+ if index < 0 or index >= self._len:
+ raise IndexError('range object index out of range')
+ return self._start + index * self._step
+
+ def __getitem_slice(self, slce):
+ """Return a range which represents the requested slce
+ of the sequence represented by this range.
+ """
+ scaled_indices = (self._step * n for n in slce.indices(self._len))
+ start_offset, stop_offset, new_step = scaled_indices
+ return newrange(self._start + start_offset,
+ self._start + stop_offset,
+ new_step)
+
+ def __iter__(self):
+ """Return an iterator which enumerates the elements of the
+ sequence this range represents."""
+ return range_iterator(self)
+
+
+class range_iterator(Iterator):
+ """An iterator for a :class:`range`.
+ """
+ def __init__(self, range_):
+ self._stepper = islice(count(range_.start, range_.step), len(range_))
+
+ def __iter__(self):
+ return self
+
def __next__(self):
return next(self._stepper)
- def next(self):
- return next(self._stepper)
-
-
-__all__ = ['newrange']
+ def next(self):
+ return next(self._stepper)
+
+
+__all__ = ['newrange']
diff --git a/contrib/python/future/future/types/newstr.py b/contrib/python/future/future/types/newstr.py
index c076a5a181..8ca191f978 100644
--- a/contrib/python/future/future/types/newstr.py
+++ b/contrib/python/future/future/types/newstr.py
@@ -1,300 +1,300 @@
-"""
-This module redefines ``str`` on Python 2.x to be a subclass of the Py2
-``unicode`` type that behaves like the Python 3.x ``str``.
-
-The main differences between ``newstr`` and Python 2.x's ``unicode`` type are
-the stricter type-checking and absence of a `u''` prefix in the representation.
-
-It is designed to be used together with the ``unicode_literals`` import
-as follows:
-
- >>> from __future__ import unicode_literals
- >>> from builtins import str, isinstance
-
-On Python 3.x and normally on Python 2.x, these expressions hold
-
- >>> str('blah') is 'blah'
- True
- >>> isinstance('blah', str)
- True
-
-However, on Python 2.x, with this import:
-
- >>> from __future__ import unicode_literals
-
-the same expressions are False:
-
- >>> str('blah') is 'blah'
- False
- >>> isinstance('blah', str)
- False
-
-This module is designed to be imported together with ``unicode_literals`` on
-Python 2 to bring the meaning of ``str`` back into alignment with unprefixed
-string literals (i.e. ``unicode`` subclasses).
-
-Note that ``str()`` (and ``print()``) would then normally call the
-``__unicode__`` method on objects in Python 2. To define string
-representations of your objects portably across Py3 and Py2, use the
-:func:`python_2_unicode_compatible` decorator in :mod:`future.utils`.
-
-"""
-
-from numbers import Number
-
-from future.utils import PY3, istext, with_metaclass, isnewbytes
-from future.types import no, issubset
-from future.types.newobject import newobject
-
-
-if PY3:
- # We'll probably never use newstr on Py3 anyway...
- unicode = str
+"""
+This module redefines ``str`` on Python 2.x to be a subclass of the Py2
+``unicode`` type that behaves like the Python 3.x ``str``.
+
+The main differences between ``newstr`` and Python 2.x's ``unicode`` type are
+the stricter type-checking and absence of a `u''` prefix in the representation.
+
+It is designed to be used together with the ``unicode_literals`` import
+as follows:
+
+ >>> from __future__ import unicode_literals
+ >>> from builtins import str, isinstance
+
+On Python 3.x and normally on Python 2.x, these expressions hold
+
+ >>> str('blah') is 'blah'
+ True
+ >>> isinstance('blah', str)
+ True
+
+However, on Python 2.x, with this import:
+
+ >>> from __future__ import unicode_literals
+
+the same expressions are False:
+
+ >>> str('blah') is 'blah'
+ False
+ >>> isinstance('blah', str)
+ False
+
+This module is designed to be imported together with ``unicode_literals`` on
+Python 2 to bring the meaning of ``str`` back into alignment with unprefixed
+string literals (i.e. ``unicode`` subclasses).
+
+Note that ``str()`` (and ``print()``) would then normally call the
+``__unicode__`` method on objects in Python 2. To define string
+representations of your objects portably across Py3 and Py2, use the
+:func:`python_2_unicode_compatible` decorator in :mod:`future.utils`.
+
+"""
+
+from numbers import Number
+
+from future.utils import PY3, istext, with_metaclass, isnewbytes
+from future.types import no, issubset
+from future.types.newobject import newobject
+
+
+if PY3:
+ # We'll probably never use newstr on Py3 anyway...
+ unicode = str
from collections.abc import Iterable
else:
from collections import Iterable
-
-
-class BaseNewStr(type):
- def __instancecheck__(cls, instance):
- if cls == newstr:
- return isinstance(instance, unicode)
- else:
- return issubclass(instance.__class__, cls)
-
-
-class newstr(with_metaclass(BaseNewStr, unicode)):
- """
- A backport of the Python 3 str object to Py2
- """
- no_convert_msg = "Can't convert '{0}' object to str implicitly"
-
- def __new__(cls, *args, **kwargs):
- """
- From the Py3 str docstring:
-
- str(object='') -> str
- str(bytes_or_buffer[, encoding[, errors]]) -> str
-
- Create a new string object from the given object. If encoding or
- errors is specified, then the object must expose a data buffer
- that will be decoded using the given encoding and error handler.
- Otherwise, returns the result of object.__str__() (if defined)
- or repr(object).
- encoding defaults to sys.getdefaultencoding().
- errors defaults to 'strict'.
-
- """
- if len(args) == 0:
- return super(newstr, cls).__new__(cls)
- # Special case: If someone requests str(str(u'abc')), return the same
- # object (same id) for consistency with Py3.3. This is not true for
- # other objects like list or dict.
- elif type(args[0]) == newstr and cls == newstr:
- return args[0]
- elif isinstance(args[0], unicode):
- value = args[0]
- elif isinstance(args[0], bytes): # i.e. Py2 bytes or newbytes
- if 'encoding' in kwargs or len(args) > 1:
- value = args[0].decode(*args[1:], **kwargs)
- else:
- value = args[0].__str__()
- else:
- value = args[0]
- return super(newstr, cls).__new__(cls, value)
-
- def __repr__(self):
- """
- Without the u prefix
- """
-
- value = super(newstr, self).__repr__()
- # assert value[0] == u'u'
- return value[1:]
-
- def __getitem__(self, y):
- """
- Warning: Python <= 2.7.6 has a bug that causes this method never to be called
- when y is a slice object. Therefore the type of newstr()[:2] is wrong
- (unicode instead of newstr).
- """
- return newstr(super(newstr, self).__getitem__(y))
-
- def __contains__(self, key):
- errmsg = "'in <string>' requires string as left operand, not {0}"
- # Don't use isinstance() here because we only want to catch
- # newstr, not Python 2 unicode:
- if type(key) == newstr:
- newkey = key
- elif isinstance(key, unicode) or isinstance(key, bytes) and not isnewbytes(key):
- newkey = newstr(key)
- else:
- raise TypeError(errmsg.format(type(key)))
- return issubset(list(newkey), list(self))
-
- @no('newbytes')
- def __add__(self, other):
- return newstr(super(newstr, self).__add__(other))
-
- @no('newbytes')
- def __radd__(self, left):
- " left + self "
- try:
- return newstr(left) + self
- except:
- return NotImplemented
-
- def __mul__(self, other):
- return newstr(super(newstr, self).__mul__(other))
-
- def __rmul__(self, other):
- return newstr(super(newstr, self).__rmul__(other))
-
- def join(self, iterable):
- errmsg = 'sequence item {0}: expected unicode string, found bytes'
- for i, item in enumerate(iterable):
- # Here we use type() rather than isinstance() because
- # __instancecheck__ is being overridden. E.g.
- # isinstance(b'abc', newbytes) is True on Py2.
- if isnewbytes(item):
- raise TypeError(errmsg.format(i))
- # Support use as a staticmethod: str.join('-', ['a', 'b'])
- if type(self) == newstr:
- return newstr(super(newstr, self).join(iterable))
- else:
- return newstr(super(newstr, newstr(self)).join(iterable))
-
- @no('newbytes')
- def find(self, sub, *args):
- return super(newstr, self).find(sub, *args)
-
- @no('newbytes')
- def rfind(self, sub, *args):
- return super(newstr, self).rfind(sub, *args)
-
- @no('newbytes', (1, 2))
- def replace(self, old, new, *args):
- return newstr(super(newstr, self).replace(old, new, *args))
-
- def decode(self, *args):
- raise AttributeError("decode method has been disabled in newstr")
-
- def encode(self, encoding='utf-8', errors='strict'):
- """
- Returns bytes
-
- Encode S using the codec registered for encoding. Default encoding
- is 'utf-8'. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
- 'xmlcharrefreplace' as well as any other name registered with
- codecs.register_error that can handle UnicodeEncodeErrors.
- """
- from future.types.newbytes import newbytes
- # Py2 unicode.encode() takes encoding and errors as optional parameter,
- # not keyword arguments as in Python 3 str.
-
- # For the surrogateescape error handling mechanism, the
- # codecs.register_error() function seems to be inadequate for an
- # implementation of it when encoding. (Decoding seems fine, however.)
- # For example, in the case of
- # u'\udcc3'.encode('ascii', 'surrogateescape_handler')
- # after registering the ``surrogateescape_handler`` function in
- # future.utils.surrogateescape, both Python 2.x and 3.x raise an
- # exception anyway after the function is called because the unicode
- # string it has to return isn't encodable strictly as ASCII.
-
- if errors == 'surrogateescape':
- if encoding == 'utf-16':
- # Known to fail here. See test_encoding_works_normally()
- raise NotImplementedError('FIXME: surrogateescape handling is '
- 'not yet implemented properly')
- # Encode char by char, building up list of byte-strings
- mybytes = []
- for c in self:
- code = ord(c)
- if 0xD800 <= code <= 0xDCFF:
- mybytes.append(newbytes([code - 0xDC00]))
- else:
- mybytes.append(c.encode(encoding=encoding))
- return newbytes(b'').join(mybytes)
- return newbytes(super(newstr, self).encode(encoding, errors))
-
- @no('newbytes', 1)
- def startswith(self, prefix, *args):
- if isinstance(prefix, Iterable):
- for thing in prefix:
- if isnewbytes(thing):
- raise TypeError(self.no_convert_msg.format(type(thing)))
- return super(newstr, self).startswith(prefix, *args)
-
- @no('newbytes', 1)
- def endswith(self, prefix, *args):
- # Note we need the decorator above as well as the isnewbytes()
- # check because prefix can be either a bytes object or e.g. a
- # tuple of possible prefixes. (If it's a bytes object, each item
- # in it is an int.)
- if isinstance(prefix, Iterable):
- for thing in prefix:
- if isnewbytes(thing):
- raise TypeError(self.no_convert_msg.format(type(thing)))
- return super(newstr, self).endswith(prefix, *args)
-
- @no('newbytes', 1)
- def split(self, sep=None, maxsplit=-1):
- # Py2 unicode.split() takes maxsplit as an optional parameter,
- # not as a keyword argument as in Python 3 str.
- parts = super(newstr, self).split(sep, maxsplit)
- return [newstr(part) for part in parts]
-
- @no('newbytes', 1)
- def rsplit(self, sep=None, maxsplit=-1):
- # Py2 unicode.rsplit() takes maxsplit as an optional parameter,
- # not as a keyword argument as in Python 3 str.
- parts = super(newstr, self).rsplit(sep, maxsplit)
- return [newstr(part) for part in parts]
-
- @no('newbytes', 1)
- def partition(self, sep):
- parts = super(newstr, self).partition(sep)
- return tuple(newstr(part) for part in parts)
-
- @no('newbytes', 1)
- def rpartition(self, sep):
- parts = super(newstr, self).rpartition(sep)
- return tuple(newstr(part) for part in parts)
-
- @no('newbytes', 1)
- def index(self, sub, *args):
- """
- Like newstr.find() but raise ValueError when the substring is not
- found.
- """
- pos = self.find(sub, *args)
- if pos == -1:
- raise ValueError('substring not found')
- return pos
-
- def splitlines(self, keepends=False):
- """
- S.splitlines(keepends=False) -> list of strings
-
- Return a list of the lines in S, breaking at line boundaries.
- Line breaks are not included in the resulting list unless keepends
- is given and true.
- """
- # Py2 unicode.splitlines() takes keepends as an optional parameter,
- # not as a keyword argument as in Python 3 str.
- parts = super(newstr, self).splitlines(keepends)
- return [newstr(part) for part in parts]
-
- def __eq__(self, other):
- if (isinstance(other, unicode) or
- isinstance(other, bytes) and not isnewbytes(other)):
- return super(newstr, self).__eq__(other)
- else:
+
+
+class BaseNewStr(type):
+ def __instancecheck__(cls, instance):
+ if cls == newstr:
+ return isinstance(instance, unicode)
+ else:
+ return issubclass(instance.__class__, cls)
+
+
+class newstr(with_metaclass(BaseNewStr, unicode)):
+ """
+ A backport of the Python 3 str object to Py2
+ """
+ no_convert_msg = "Can't convert '{0}' object to str implicitly"
+
+ def __new__(cls, *args, **kwargs):
+ """
+ From the Py3 str docstring:
+
+ str(object='') -> str
+ str(bytes_or_buffer[, encoding[, errors]]) -> str
+
+ Create a new string object from the given object. If encoding or
+ errors is specified, then the object must expose a data buffer
+ that will be decoded using the given encoding and error handler.
+ Otherwise, returns the result of object.__str__() (if defined)
+ or repr(object).
+ encoding defaults to sys.getdefaultencoding().
+ errors defaults to 'strict'.
+
+ """
+ if len(args) == 0:
+ return super(newstr, cls).__new__(cls)
+ # Special case: If someone requests str(str(u'abc')), return the same
+ # object (same id) for consistency with Py3.3. This is not true for
+ # other objects like list or dict.
+ elif type(args[0]) == newstr and cls == newstr:
+ return args[0]
+ elif isinstance(args[0], unicode):
+ value = args[0]
+ elif isinstance(args[0], bytes): # i.e. Py2 bytes or newbytes
+ if 'encoding' in kwargs or len(args) > 1:
+ value = args[0].decode(*args[1:], **kwargs)
+ else:
+ value = args[0].__str__()
+ else:
+ value = args[0]
+ return super(newstr, cls).__new__(cls, value)
+
+ def __repr__(self):
+ """
+ Without the u prefix
+ """
+
+ value = super(newstr, self).__repr__()
+ # assert value[0] == u'u'
+ return value[1:]
+
+ def __getitem__(self, y):
+ """
+ Warning: Python <= 2.7.6 has a bug that causes this method never to be called
+ when y is a slice object. Therefore the type of newstr()[:2] is wrong
+ (unicode instead of newstr).
+ """
+ return newstr(super(newstr, self).__getitem__(y))
+
+ def __contains__(self, key):
+ errmsg = "'in <string>' requires string as left operand, not {0}"
+ # Don't use isinstance() here because we only want to catch
+ # newstr, not Python 2 unicode:
+ if type(key) == newstr:
+ newkey = key
+ elif isinstance(key, unicode) or isinstance(key, bytes) and not isnewbytes(key):
+ newkey = newstr(key)
+ else:
+ raise TypeError(errmsg.format(type(key)))
+ return issubset(list(newkey), list(self))
+
+ @no('newbytes')
+ def __add__(self, other):
+ return newstr(super(newstr, self).__add__(other))
+
+ @no('newbytes')
+ def __radd__(self, left):
+ " left + self "
+ try:
+ return newstr(left) + self
+ except:
+ return NotImplemented
+
+ def __mul__(self, other):
+ return newstr(super(newstr, self).__mul__(other))
+
+ def __rmul__(self, other):
+ return newstr(super(newstr, self).__rmul__(other))
+
+ def join(self, iterable):
+ errmsg = 'sequence item {0}: expected unicode string, found bytes'
+ for i, item in enumerate(iterable):
+ # Here we use type() rather than isinstance() because
+ # __instancecheck__ is being overridden. E.g.
+ # isinstance(b'abc', newbytes) is True on Py2.
+ if isnewbytes(item):
+ raise TypeError(errmsg.format(i))
+ # Support use as a staticmethod: str.join('-', ['a', 'b'])
+ if type(self) == newstr:
+ return newstr(super(newstr, self).join(iterable))
+ else:
+ return newstr(super(newstr, newstr(self)).join(iterable))
+
+ @no('newbytes')
+ def find(self, sub, *args):
+ return super(newstr, self).find(sub, *args)
+
+ @no('newbytes')
+ def rfind(self, sub, *args):
+ return super(newstr, self).rfind(sub, *args)
+
+ @no('newbytes', (1, 2))
+ def replace(self, old, new, *args):
+ return newstr(super(newstr, self).replace(old, new, *args))
+
+ def decode(self, *args):
+ raise AttributeError("decode method has been disabled in newstr")
+
+ def encode(self, encoding='utf-8', errors='strict'):
+ """
+ Returns bytes
+
+ Encode S using the codec registered for encoding. Default encoding
+ is 'utf-8'. errors may be given to set a different error
+ handling scheme. Default is 'strict' meaning that encoding errors raise
+ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
+ 'xmlcharrefreplace' as well as any other name registered with
+ codecs.register_error that can handle UnicodeEncodeErrors.
+ """
+ from future.types.newbytes import newbytes
+ # Py2 unicode.encode() takes encoding and errors as optional parameter,
+ # not keyword arguments as in Python 3 str.
+
+ # For the surrogateescape error handling mechanism, the
+ # codecs.register_error() function seems to be inadequate for an
+ # implementation of it when encoding. (Decoding seems fine, however.)
+ # For example, in the case of
+ # u'\udcc3'.encode('ascii', 'surrogateescape_handler')
+ # after registering the ``surrogateescape_handler`` function in
+ # future.utils.surrogateescape, both Python 2.x and 3.x raise an
+ # exception anyway after the function is called because the unicode
+ # string it has to return isn't encodable strictly as ASCII.
+
+ if errors == 'surrogateescape':
+ if encoding == 'utf-16':
+ # Known to fail here. See test_encoding_works_normally()
+ raise NotImplementedError('FIXME: surrogateescape handling is '
+ 'not yet implemented properly')
+ # Encode char by char, building up list of byte-strings
+ mybytes = []
+ for c in self:
+ code = ord(c)
+ if 0xD800 <= code <= 0xDCFF:
+ mybytes.append(newbytes([code - 0xDC00]))
+ else:
+ mybytes.append(c.encode(encoding=encoding))
+ return newbytes(b'').join(mybytes)
+ return newbytes(super(newstr, self).encode(encoding, errors))
+
+ @no('newbytes', 1)
+ def startswith(self, prefix, *args):
+ if isinstance(prefix, Iterable):
+ for thing in prefix:
+ if isnewbytes(thing):
+ raise TypeError(self.no_convert_msg.format(type(thing)))
+ return super(newstr, self).startswith(prefix, *args)
+
+ @no('newbytes', 1)
+ def endswith(self, prefix, *args):
+ # Note we need the decorator above as well as the isnewbytes()
+ # check because prefix can be either a bytes object or e.g. a
+ # tuple of possible prefixes. (If it's a bytes object, each item
+ # in it is an int.)
+ if isinstance(prefix, Iterable):
+ for thing in prefix:
+ if isnewbytes(thing):
+ raise TypeError(self.no_convert_msg.format(type(thing)))
+ return super(newstr, self).endswith(prefix, *args)
+
+ @no('newbytes', 1)
+ def split(self, sep=None, maxsplit=-1):
+ # Py2 unicode.split() takes maxsplit as an optional parameter,
+ # not as a keyword argument as in Python 3 str.
+ parts = super(newstr, self).split(sep, maxsplit)
+ return [newstr(part) for part in parts]
+
+ @no('newbytes', 1)
+ def rsplit(self, sep=None, maxsplit=-1):
+ # Py2 unicode.rsplit() takes maxsplit as an optional parameter,
+ # not as a keyword argument as in Python 3 str.
+ parts = super(newstr, self).rsplit(sep, maxsplit)
+ return [newstr(part) for part in parts]
+
+ @no('newbytes', 1)
+ def partition(self, sep):
+ parts = super(newstr, self).partition(sep)
+ return tuple(newstr(part) for part in parts)
+
+ @no('newbytes', 1)
+ def rpartition(self, sep):
+ parts = super(newstr, self).rpartition(sep)
+ return tuple(newstr(part) for part in parts)
+
+ @no('newbytes', 1)
+ def index(self, sub, *args):
+ """
+ Like newstr.find() but raise ValueError when the substring is not
+ found.
+ """
+ pos = self.find(sub, *args)
+ if pos == -1:
+ raise ValueError('substring not found')
+ return pos
+
+ def splitlines(self, keepends=False):
+ """
+ S.splitlines(keepends=False) -> list of strings
+
+ Return a list of the lines in S, breaking at line boundaries.
+ Line breaks are not included in the resulting list unless keepends
+ is given and true.
+ """
+ # Py2 unicode.splitlines() takes keepends as an optional parameter,
+ # not as a keyword argument as in Python 3 str.
+ parts = super(newstr, self).splitlines(keepends)
+ return [newstr(part) for part in parts]
+
+ def __eq__(self, other):
+ if (isinstance(other, unicode) or
+ isinstance(other, bytes) and not isnewbytes(other)):
+ return super(newstr, self).__eq__(other)
+ else:
return NotImplemented
-
+
def __hash__(self):
if (isinstance(self, unicode) or
isinstance(self, bytes) and not isnewbytes(self)):
@@ -302,125 +302,125 @@ class newstr(with_metaclass(BaseNewStr, unicode)):
else:
raise NotImplementedError()
- def __ne__(self, other):
- if (isinstance(other, unicode) or
- isinstance(other, bytes) and not isnewbytes(other)):
- return super(newstr, self).__ne__(other)
- else:
- return True
-
- unorderable_err = 'unorderable types: str() and {0}'
-
- def __lt__(self, other):
+ def __ne__(self, other):
+ if (isinstance(other, unicode) or
+ isinstance(other, bytes) and not isnewbytes(other)):
+ return super(newstr, self).__ne__(other)
+ else:
+ return True
+
+ unorderable_err = 'unorderable types: str() and {0}'
+
+ def __lt__(self, other):
if (isinstance(other, unicode) or
isinstance(other, bytes) and not isnewbytes(other)):
return super(newstr, self).__lt__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __le__(self, other):
+
+ def __le__(self, other):
if (isinstance(other, unicode) or
isinstance(other, bytes) and not isnewbytes(other)):
return super(newstr, self).__le__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __gt__(self, other):
+
+ def __gt__(self, other):
if (isinstance(other, unicode) or
isinstance(other, bytes) and not isnewbytes(other)):
return super(newstr, self).__gt__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __ge__(self, other):
+
+ def __ge__(self, other):
if (isinstance(other, unicode) or
isinstance(other, bytes) and not isnewbytes(other)):
return super(newstr, self).__ge__(other)
raise TypeError(self.unorderable_err.format(type(other)))
-
- def __getattribute__(self, name):
- """
- A trick to cause the ``hasattr`` builtin-fn to return False for
- the 'decode' method on Py2.
- """
- if name in ['decode', u'decode']:
- raise AttributeError("decode method has been disabled in newstr")
- return super(newstr, self).__getattribute__(name)
-
- def __native__(self):
- """
- A hook for the future.utils.native() function.
- """
- return unicode(self)
-
- @staticmethod
- def maketrans(x, y=None, z=None):
- """
- Return a translation table usable for str.translate().
-
- If there is only one argument, it must be a dictionary mapping Unicode
- ordinals (integers) or characters to Unicode ordinals, strings or None.
- Character keys will be then converted to ordinals.
- If there are two arguments, they must be strings of equal length, and
- in the resulting dictionary, each character in x will be mapped to the
- character at the same position in y. If there is a third argument, it
- must be a string, whose characters will be mapped to None in the result.
- """
-
- if y is None:
- assert z is None
- if not isinstance(x, dict):
- raise TypeError('if you give only one argument to maketrans it must be a dict')
- result = {}
- for (key, value) in x.items():
- if len(key) > 1:
- raise ValueError('keys in translate table must be strings or integers')
- result[ord(key)] = value
- else:
- if not isinstance(x, unicode) and isinstance(y, unicode):
- raise TypeError('x and y must be unicode strings')
- if not len(x) == len(y):
- raise ValueError('the first two maketrans arguments must have equal length')
- result = {}
- for (xi, yi) in zip(x, y):
- if len(xi) > 1:
- raise ValueError('keys in translate table must be strings or integers')
- result[ord(xi)] = ord(yi)
-
- if z is not None:
- for char in z:
- result[ord(char)] = None
- return result
-
- def translate(self, table):
- """
- S.translate(table) -> str
-
- Return a copy of the string S, where all characters have been mapped
- through the given translation table, which must be a mapping of
- Unicode ordinals to Unicode ordinals, strings, or None.
- Unmapped characters are left untouched. Characters mapped to None
- are deleted.
- """
- l = []
- for c in self:
- if ord(c) in table:
- val = table[ord(c)]
- if val is None:
- continue
- elif isinstance(val, unicode):
- l.append(val)
- else:
- l.append(chr(val))
- else:
- l.append(c)
- return ''.join(l)
-
- def isprintable(self):
- raise NotImplementedError('fixme')
-
- def isidentifier(self):
- raise NotImplementedError('fixme')
-
- def format_map(self):
- raise NotImplementedError('fixme')
-
-
-__all__ = ['newstr']
+
+ def __getattribute__(self, name):
+ """
+ A trick to cause the ``hasattr`` builtin-fn to return False for
+ the 'decode' method on Py2.
+ """
+ if name in ['decode', u'decode']:
+ raise AttributeError("decode method has been disabled in newstr")
+ return super(newstr, self).__getattribute__(name)
+
+ def __native__(self):
+ """
+ A hook for the future.utils.native() function.
+ """
+ return unicode(self)
+
+ @staticmethod
+ def maketrans(x, y=None, z=None):
+ """
+ Return a translation table usable for str.translate().
+
+ If there is only one argument, it must be a dictionary mapping Unicode
+ ordinals (integers) or characters to Unicode ordinals, strings or None.
+ Character keys will be then converted to ordinals.
+ If there are two arguments, they must be strings of equal length, and
+ in the resulting dictionary, each character in x will be mapped to the
+ character at the same position in y. If there is a third argument, it
+ must be a string, whose characters will be mapped to None in the result.
+ """
+
+ if y is None:
+ assert z is None
+ if not isinstance(x, dict):
+ raise TypeError('if you give only one argument to maketrans it must be a dict')
+ result = {}
+ for (key, value) in x.items():
+ if len(key) > 1:
+ raise ValueError('keys in translate table must be strings or integers')
+ result[ord(key)] = value
+ else:
+ if not isinstance(x, unicode) and isinstance(y, unicode):
+ raise TypeError('x and y must be unicode strings')
+ if not len(x) == len(y):
+ raise ValueError('the first two maketrans arguments must have equal length')
+ result = {}
+ for (xi, yi) in zip(x, y):
+ if len(xi) > 1:
+ raise ValueError('keys in translate table must be strings or integers')
+ result[ord(xi)] = ord(yi)
+
+ if z is not None:
+ for char in z:
+ result[ord(char)] = None
+ return result
+
+ def translate(self, table):
+ """
+ S.translate(table) -> str
+
+ Return a copy of the string S, where all characters have been mapped
+ through the given translation table, which must be a mapping of
+ Unicode ordinals to Unicode ordinals, strings, or None.
+ Unmapped characters are left untouched. Characters mapped to None
+ are deleted.
+ """
+ l = []
+ for c in self:
+ if ord(c) in table:
+ val = table[ord(c)]
+ if val is None:
+ continue
+ elif isinstance(val, unicode):
+ l.append(val)
+ else:
+ l.append(chr(val))
+ else:
+ l.append(c)
+ return ''.join(l)
+
+ def isprintable(self):
+ raise NotImplementedError('fixme')
+
+ def isidentifier(self):
+ raise NotImplementedError('fixme')
+
+ def format_map(self):
+ raise NotImplementedError('fixme')
+
+
+__all__ = ['newstr']
diff --git a/contrib/python/future/future/utils/__init__.py b/contrib/python/future/future/utils/__init__.py
index c17edb213c..46bd96def3 100644
--- a/contrib/python/future/future/utils/__init__.py
+++ b/contrib/python/future/future/utils/__init__.py
@@ -1,413 +1,413 @@
-"""
-A selection of cross-compatible functions for Python 2 and 3.
-
-This module exports useful functions for 2/3 compatible code:
-
- * bind_method: binds functions to classes
- * ``native_str_to_bytes`` and ``bytes_to_native_str``
- * ``native_str``: always equal to the native platform string object (because
- this may be shadowed by imports from future.builtins)
- * lists: lrange(), lmap(), lzip(), lfilter()
- * iterable method compatibility:
- - iteritems, iterkeys, itervalues
- - viewitems, viewkeys, viewvalues
-
- These use the original method if available, otherwise they use items,
- keys, values.
-
- * types:
-
- * text_type: unicode in Python 2, str in Python 3
- * string_types: basestring in Python 2, str in Python 3
+"""
+A selection of cross-compatible functions for Python 2 and 3.
+
+This module exports useful functions for 2/3 compatible code:
+
+ * bind_method: binds functions to classes
+ * ``native_str_to_bytes`` and ``bytes_to_native_str``
+ * ``native_str``: always equal to the native platform string object (because
+ this may be shadowed by imports from future.builtins)
+ * lists: lrange(), lmap(), lzip(), lfilter()
+ * iterable method compatibility:
+ - iteritems, iterkeys, itervalues
+ - viewitems, viewkeys, viewvalues
+
+ These use the original method if available, otherwise they use items,
+ keys, values.
+
+ * types:
+
+ * text_type: unicode in Python 2, str in Python 3
+ * string_types: basestring in Python 2, str in Python 3
* binary_type: str in Python 2, bytes in Python 3
* integer_types: (int, long) in Python 2, int in Python 3
* class_types: (type, types.ClassType) in Python 2, type in Python 3
-
- * bchr(c):
- Take an integer and make a 1-character byte string
- * bord(c)
- Take the result of indexing on a byte string and make an integer
- * tobytes(s)
- Take a text string, a byte string, or a sequence of characters taken
- from a byte string, and make a byte string.
-
- * raise_from()
- * raise_with_traceback()
-
-This module also defines these decorators:
-
- * ``python_2_unicode_compatible``
- * ``with_metaclass``
- * ``implements_iterator``
-
-Some of the functions in this module come from the following sources:
-
- * Jinja2 (BSD licensed: see
- https://github.com/mitsuhiko/jinja2/blob/master/LICENSE)
- * Pandas compatibility module pandas.compat
- * six.py by Benjamin Peterson
- * Django
-"""
-
-import types
-import sys
-import numbers
-import functools
-import copy
-import inspect
-
-
+
+ * bchr(c):
+ Take an integer and make a 1-character byte string
+ * bord(c)
+ Take the result of indexing on a byte string and make an integer
+ * tobytes(s)
+ Take a text string, a byte string, or a sequence of characters taken
+ from a byte string, and make a byte string.
+
+ * raise_from()
+ * raise_with_traceback()
+
+This module also defines these decorators:
+
+ * ``python_2_unicode_compatible``
+ * ``with_metaclass``
+ * ``implements_iterator``
+
+Some of the functions in this module come from the following sources:
+
+ * Jinja2 (BSD licensed: see
+ https://github.com/mitsuhiko/jinja2/blob/master/LICENSE)
+ * Pandas compatibility module pandas.compat
+ * six.py by Benjamin Peterson
+ * Django
+"""
+
+import types
+import sys
+import numbers
+import functools
+import copy
+import inspect
+
+
PY3 = sys.version_info[0] >= 3
PY34_PLUS = sys.version_info[0:2] >= (3, 4)
PY35_PLUS = sys.version_info[0:2] >= (3, 5)
PY36_PLUS = sys.version_info[0:2] >= (3, 6)
-PY2 = sys.version_info[0] == 2
-PY26 = sys.version_info[0:2] == (2, 6)
-PY27 = sys.version_info[0:2] == (2, 7)
-PYPY = hasattr(sys, 'pypy_translation_info')
-
-
-def python_2_unicode_compatible(cls):
- """
- A decorator that defines __unicode__ and __str__ methods under Python
- 2. Under Python 3, this decorator is a no-op.
-
- To support Python 2 and 3 with a single code base, define a __str__
- method returning unicode text and apply this decorator to the class, like
- this::
-
- >>> from future.utils import python_2_unicode_compatible
-
- >>> @python_2_unicode_compatible
- ... class MyClass(object):
- ... def __str__(self):
- ... return u'Unicode string: \u5b54\u5b50'
-
- >>> a = MyClass()
-
- Then, after this import:
-
- >>> from future.builtins import str
-
- the following is ``True`` on both Python 3 and 2::
-
- >>> str(a) == a.encode('utf-8').decode('utf-8')
- True
-
- and, on a Unicode-enabled terminal with the right fonts, these both print the
- Chinese characters for Confucius::
-
- >>> print(a)
- >>> print(str(a))
-
- The implementation comes from django.utils.encoding.
- """
- if not PY3:
- cls.__unicode__ = cls.__str__
- cls.__str__ = lambda self: self.__unicode__().encode('utf-8')
- return cls
-
-
-def with_metaclass(meta, *bases):
- """
- Function from jinja2/_compat.py. License: BSD.
-
- Use it like this::
-
- class BaseForm(object):
- pass
-
- class FormType(type):
- pass
-
- class Form(with_metaclass(FormType, BaseForm)):
- pass
-
- This requires a bit of explanation: the basic idea is to make a
- dummy metaclass for one level of class instantiation that replaces
- itself with the actual metaclass. Because of internal type checks
- we also need to make sure that we downgrade the custom metaclass
- for one level to something closer to type (that's why __call__ and
- __init__ comes back from type etc.).
-
- This has the advantage over six.with_metaclass of not introducing
- dummy classes into the final MRO.
- """
- class metaclass(meta):
- __call__ = type.__call__
- __init__ = type.__init__
- def __new__(cls, name, this_bases, d):
- if this_bases is None:
- return type.__new__(cls, name, (), d)
- return meta(name, bases, d)
- return metaclass('temporary_class', None, {})
-
-
-# Definitions from pandas.compat and six.py follow:
-if PY3:
- def bchr(s):
- return bytes([s])
- def bstr(s):
- if isinstance(s, str):
- return bytes(s, 'latin-1')
- else:
- return bytes(s)
- def bord(s):
- return s
-
- string_types = str,
- integer_types = int,
- class_types = type,
- text_type = str
- binary_type = bytes
-
-else:
- # Python 2
- def bchr(s):
- return chr(s)
- def bstr(s):
- return str(s)
- def bord(s):
- return ord(s)
-
- string_types = basestring,
- integer_types = (int, long)
- class_types = (type, types.ClassType)
- text_type = unicode
- binary_type = str
-
-###
-
-if PY3:
- def tobytes(s):
- if isinstance(s, bytes):
- return s
- else:
- if isinstance(s, str):
- return s.encode('latin-1')
- else:
- return bytes(s)
-else:
- # Python 2
- def tobytes(s):
- if isinstance(s, unicode):
- return s.encode('latin-1')
- else:
- return ''.join(s)
-
-tobytes.__doc__ = """
- Encodes to latin-1 (where the first 256 chars are the same as
- ASCII.)
- """
-
-if PY3:
- def native_str_to_bytes(s, encoding='utf-8'):
- return s.encode(encoding)
-
- def bytes_to_native_str(b, encoding='utf-8'):
- return b.decode(encoding)
-
- def text_to_native_str(t, encoding=None):
- return t
-else:
- # Python 2
- def native_str_to_bytes(s, encoding=None):
- from future.types import newbytes # to avoid a circular import
- return newbytes(s)
-
- def bytes_to_native_str(b, encoding=None):
- return native(b)
-
- def text_to_native_str(t, encoding='ascii'):
- """
- Use this to create a Py2 native string when "from __future__ import
- unicode_literals" is in effect.
- """
- return unicode(t).encode(encoding)
-
-native_str_to_bytes.__doc__ = """
- On Py3, returns an encoded string.
- On Py2, returns a newbytes type, ignoring the ``encoding`` argument.
- """
-
-if PY3:
- # list-producing versions of the major Python iterating functions
- def lrange(*args, **kwargs):
- return list(range(*args, **kwargs))
-
- def lzip(*args, **kwargs):
- return list(zip(*args, **kwargs))
-
- def lmap(*args, **kwargs):
- return list(map(*args, **kwargs))
-
- def lfilter(*args, **kwargs):
- return list(filter(*args, **kwargs))
-else:
- import __builtin__
- # Python 2-builtin ranges produce lists
- lrange = __builtin__.range
- lzip = __builtin__.zip
- lmap = __builtin__.map
- lfilter = __builtin__.filter
-
-
-def isidentifier(s, dotted=False):
- '''
- A function equivalent to the str.isidentifier method on Py3
- '''
- if dotted:
- return all(isidentifier(a) for a in s.split('.'))
- if PY3:
- return s.isidentifier()
- else:
- import re
- _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
- return bool(_name_re.match(s))
-
-
-def viewitems(obj, **kwargs):
- """
- Function for iterating over dictionary items with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewitems", None)
- if not func:
- func = obj.items
- return func(**kwargs)
-
-
-def viewkeys(obj, **kwargs):
- """
- Function for iterating over dictionary keys with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewkeys", None)
- if not func:
- func = obj.keys
- return func(**kwargs)
-
-
-def viewvalues(obj, **kwargs):
- """
- Function for iterating over dictionary values with the same set-like
- behaviour on Py2.7 as on Py3.
-
- Passes kwargs to method."""
- func = getattr(obj, "viewvalues", None)
- if not func:
- func = obj.values
- return func(**kwargs)
-
-
-def iteritems(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewitems().
- """
- func = getattr(obj, "iteritems", None)
- if not func:
- func = obj.items
- return func(**kwargs)
-
-
-def iterkeys(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewkeys().
- """
- func = getattr(obj, "iterkeys", None)
- if not func:
- func = obj.keys
- return func(**kwargs)
-
-
-def itervalues(obj, **kwargs):
- """Use this only if compatibility with Python versions before 2.7 is
- required. Otherwise, prefer viewvalues().
- """
- func = getattr(obj, "itervalues", None)
- if not func:
- func = obj.values
- return func(**kwargs)
-
-
-def bind_method(cls, name, func):
- """Bind a method to class, python 2 and python 3 compatible.
-
- Parameters
- ----------
-
- cls : type
- class to receive bound method
- name : basestring
- name of method on class instance
- func : function
- function to be bound as method
-
- Returns
- -------
- None
- """
- # only python 2 has an issue with bound/unbound methods
- if not PY3:
- setattr(cls, name, types.MethodType(func, None, cls))
- else:
- setattr(cls, name, func)
-
-
-def getexception():
- return sys.exc_info()[1]
-
-
-def _get_caller_globals_and_locals():
- """
- Returns the globals and locals of the calling frame.
-
- Is there an alternative to frame hacking here?
- """
- caller_frame = inspect.stack()[2]
- myglobals = caller_frame[0].f_globals
- mylocals = caller_frame[0].f_locals
- return myglobals, mylocals
-
-
-def _repr_strip(mystring):
- """
- Returns the string without any initial or final quotes.
- """
- r = repr(mystring)
- if r.startswith("'") and r.endswith("'"):
- return r[1:-1]
- else:
- return r
-
-
-if PY3:
- def raise_from(exc, cause):
- """
- Equivalent to:
-
- raise EXCEPTION from CAUSE
-
- on Python 3. (See PEP 3134).
- """
- myglobals, mylocals = _get_caller_globals_and_locals()
-
- # We pass the exception and cause along with other globals
- # when we exec():
- myglobals = myglobals.copy()
- myglobals['__python_future_raise_from_exc'] = exc
- myglobals['__python_future_raise_from_cause'] = cause
- execstr = "raise __python_future_raise_from_exc from __python_future_raise_from_cause"
- exec(execstr, myglobals, mylocals)
-
- def raise_(tp, value=None, tb=None):
- """
- A function that matches the Python 2.x ``raise`` statement. This
- allows re-raising exceptions with the cls value and traceback on
- Python 2 and 3.
- """
+PY2 = sys.version_info[0] == 2
+PY26 = sys.version_info[0:2] == (2, 6)
+PY27 = sys.version_info[0:2] == (2, 7)
+PYPY = hasattr(sys, 'pypy_translation_info')
+
+
+def python_2_unicode_compatible(cls):
+ """
+ A decorator that defines __unicode__ and __str__ methods under Python
+ 2. Under Python 3, this decorator is a no-op.
+
+ To support Python 2 and 3 with a single code base, define a __str__
+ method returning unicode text and apply this decorator to the class, like
+ this::
+
+ >>> from future.utils import python_2_unicode_compatible
+
+ >>> @python_2_unicode_compatible
+ ... class MyClass(object):
+ ... def __str__(self):
+ ... return u'Unicode string: \u5b54\u5b50'
+
+ >>> a = MyClass()
+
+ Then, after this import:
+
+ >>> from future.builtins import str
+
+ the following is ``True`` on both Python 3 and 2::
+
+ >>> str(a) == a.encode('utf-8').decode('utf-8')
+ True
+
+ and, on a Unicode-enabled terminal with the right fonts, these both print the
+ Chinese characters for Confucius::
+
+ >>> print(a)
+ >>> print(str(a))
+
+ The implementation comes from django.utils.encoding.
+ """
+ if not PY3:
+ cls.__unicode__ = cls.__str__
+ cls.__str__ = lambda self: self.__unicode__().encode('utf-8')
+ return cls
+
+
+def with_metaclass(meta, *bases):
+ """
+ Function from jinja2/_compat.py. License: BSD.
+
+ Use it like this::
+
+ class BaseForm(object):
+ pass
+
+ class FormType(type):
+ pass
+
+ class Form(with_metaclass(FormType, BaseForm)):
+ pass
+
+ This requires a bit of explanation: the basic idea is to make a
+ dummy metaclass for one level of class instantiation that replaces
+ itself with the actual metaclass. Because of internal type checks
+ we also need to make sure that we downgrade the custom metaclass
+ for one level to something closer to type (that's why __call__ and
+ __init__ comes back from type etc.).
+
+ This has the advantage over six.with_metaclass of not introducing
+ dummy classes into the final MRO.
+ """
+ class metaclass(meta):
+ __call__ = type.__call__
+ __init__ = type.__init__
+ def __new__(cls, name, this_bases, d):
+ if this_bases is None:
+ return type.__new__(cls, name, (), d)
+ return meta(name, bases, d)
+ return metaclass('temporary_class', None, {})
+
+
+# Definitions from pandas.compat and six.py follow:
+if PY3:
+ def bchr(s):
+ return bytes([s])
+ def bstr(s):
+ if isinstance(s, str):
+ return bytes(s, 'latin-1')
+ else:
+ return bytes(s)
+ def bord(s):
+ return s
+
+ string_types = str,
+ integer_types = int,
+ class_types = type,
+ text_type = str
+ binary_type = bytes
+
+else:
+ # Python 2
+ def bchr(s):
+ return chr(s)
+ def bstr(s):
+ return str(s)
+ def bord(s):
+ return ord(s)
+
+ string_types = basestring,
+ integer_types = (int, long)
+ class_types = (type, types.ClassType)
+ text_type = unicode
+ binary_type = str
+
+###
+
+if PY3:
+ def tobytes(s):
+ if isinstance(s, bytes):
+ return s
+ else:
+ if isinstance(s, str):
+ return s.encode('latin-1')
+ else:
+ return bytes(s)
+else:
+ # Python 2
+ def tobytes(s):
+ if isinstance(s, unicode):
+ return s.encode('latin-1')
+ else:
+ return ''.join(s)
+
+tobytes.__doc__ = """
+ Encodes to latin-1 (where the first 256 chars are the same as
+ ASCII.)
+ """
+
+if PY3:
+ def native_str_to_bytes(s, encoding='utf-8'):
+ return s.encode(encoding)
+
+ def bytes_to_native_str(b, encoding='utf-8'):
+ return b.decode(encoding)
+
+ def text_to_native_str(t, encoding=None):
+ return t
+else:
+ # Python 2
+ def native_str_to_bytes(s, encoding=None):
+ from future.types import newbytes # to avoid a circular import
+ return newbytes(s)
+
+ def bytes_to_native_str(b, encoding=None):
+ return native(b)
+
+ def text_to_native_str(t, encoding='ascii'):
+ """
+ Use this to create a Py2 native string when "from __future__ import
+ unicode_literals" is in effect.
+ """
+ return unicode(t).encode(encoding)
+
+native_str_to_bytes.__doc__ = """
+ On Py3, returns an encoded string.
+ On Py2, returns a newbytes type, ignoring the ``encoding`` argument.
+ """
+
+if PY3:
+ # list-producing versions of the major Python iterating functions
+ def lrange(*args, **kwargs):
+ return list(range(*args, **kwargs))
+
+ def lzip(*args, **kwargs):
+ return list(zip(*args, **kwargs))
+
+ def lmap(*args, **kwargs):
+ return list(map(*args, **kwargs))
+
+ def lfilter(*args, **kwargs):
+ return list(filter(*args, **kwargs))
+else:
+ import __builtin__
+ # Python 2-builtin ranges produce lists
+ lrange = __builtin__.range
+ lzip = __builtin__.zip
+ lmap = __builtin__.map
+ lfilter = __builtin__.filter
+
+
+def isidentifier(s, dotted=False):
+ '''
+ A function equivalent to the str.isidentifier method on Py3
+ '''
+ if dotted:
+ return all(isidentifier(a) for a in s.split('.'))
+ if PY3:
+ return s.isidentifier()
+ else:
+ import re
+ _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
+ return bool(_name_re.match(s))
+
+
+def viewitems(obj, **kwargs):
+ """
+ Function for iterating over dictionary items with the same set-like
+ behaviour on Py2.7 as on Py3.
+
+ Passes kwargs to method."""
+ func = getattr(obj, "viewitems", None)
+ if not func:
+ func = obj.items
+ return func(**kwargs)
+
+
+def viewkeys(obj, **kwargs):
+ """
+ Function for iterating over dictionary keys with the same set-like
+ behaviour on Py2.7 as on Py3.
+
+ Passes kwargs to method."""
+ func = getattr(obj, "viewkeys", None)
+ if not func:
+ func = obj.keys
+ return func(**kwargs)
+
+
+def viewvalues(obj, **kwargs):
+ """
+ Function for iterating over dictionary values with the same set-like
+ behaviour on Py2.7 as on Py3.
+
+ Passes kwargs to method."""
+ func = getattr(obj, "viewvalues", None)
+ if not func:
+ func = obj.values
+ return func(**kwargs)
+
+
+def iteritems(obj, **kwargs):
+ """Use this only if compatibility with Python versions before 2.7 is
+ required. Otherwise, prefer viewitems().
+ """
+ func = getattr(obj, "iteritems", None)
+ if not func:
+ func = obj.items
+ return func(**kwargs)
+
+
+def iterkeys(obj, **kwargs):
+ """Use this only if compatibility with Python versions before 2.7 is
+ required. Otherwise, prefer viewkeys().
+ """
+ func = getattr(obj, "iterkeys", None)
+ if not func:
+ func = obj.keys
+ return func(**kwargs)
+
+
+def itervalues(obj, **kwargs):
+ """Use this only if compatibility with Python versions before 2.7 is
+ required. Otherwise, prefer viewvalues().
+ """
+ func = getattr(obj, "itervalues", None)
+ if not func:
+ func = obj.values
+ return func(**kwargs)
+
+
+def bind_method(cls, name, func):
+ """Bind a method to class, python 2 and python 3 compatible.
+
+ Parameters
+ ----------
+
+ cls : type
+ class to receive bound method
+ name : basestring
+ name of method on class instance
+ func : function
+ function to be bound as method
+
+ Returns
+ -------
+ None
+ """
+ # only python 2 has an issue with bound/unbound methods
+ if not PY3:
+ setattr(cls, name, types.MethodType(func, None, cls))
+ else:
+ setattr(cls, name, func)
+
+
+def getexception():
+ return sys.exc_info()[1]
+
+
+def _get_caller_globals_and_locals():
+ """
+ Returns the globals and locals of the calling frame.
+
+ Is there an alternative to frame hacking here?
+ """
+ caller_frame = inspect.stack()[2]
+ myglobals = caller_frame[0].f_globals
+ mylocals = caller_frame[0].f_locals
+ return myglobals, mylocals
+
+
+def _repr_strip(mystring):
+ """
+ Returns the string without any initial or final quotes.
+ """
+ r = repr(mystring)
+ if r.startswith("'") and r.endswith("'"):
+ return r[1:-1]
+ else:
+ return r
+
+
+if PY3:
+ def raise_from(exc, cause):
+ """
+ Equivalent to:
+
+ raise EXCEPTION from CAUSE
+
+ on Python 3. (See PEP 3134).
+ """
+ myglobals, mylocals = _get_caller_globals_and_locals()
+
+ # We pass the exception and cause along with other globals
+ # when we exec():
+ myglobals = myglobals.copy()
+ myglobals['__python_future_raise_from_exc'] = exc
+ myglobals['__python_future_raise_from_cause'] = cause
+ execstr = "raise __python_future_raise_from_exc from __python_future_raise_from_cause"
+ exec(execstr, myglobals, mylocals)
+
+ def raise_(tp, value=None, tb=None):
+ """
+ A function that matches the Python 2.x ``raise`` statement. This
+ allows re-raising exceptions with the cls value and traceback on
+ Python 2 and 3.
+ """
if isinstance(tp, BaseException):
# If the first object is an instance, the type of the exception
# is the class of the instance, the instance itself is the value,
@@ -419,7 +419,7 @@ if PY3:
# If the first object is a class, it becomes the type of the
# exception.
raise TypeError("class must derive from BaseException, not %s" % tp.__name__)
- else:
+ else:
# The second object is used to determine the exception value: If it
# is an instance of the class, the instance becomes the exception
# value. If the second object is a tuple, it is used as the argument
@@ -436,322 +436,322 @@ if PY3:
else:
exc = tp(value)
- if exc.__traceback__ is not tb:
- raise exc.with_traceback(tb)
- raise exc
-
- def raise_with_traceback(exc, traceback=Ellipsis):
- if traceback == Ellipsis:
- _, _, traceback = sys.exc_info()
- raise exc.with_traceback(traceback)
-
-else:
- def raise_from(exc, cause):
- """
- Equivalent to:
-
- raise EXCEPTION from CAUSE
-
- on Python 3. (See PEP 3134).
- """
- # Is either arg an exception class (e.g. IndexError) rather than
- # instance (e.g. IndexError('my message here')? If so, pass the
- # name of the class undisturbed through to "raise ... from ...".
- if isinstance(exc, type) and issubclass(exc, Exception):
- e = exc()
- # exc = exc.__name__
- # execstr = "e = " + _repr_strip(exc) + "()"
- # myglobals, mylocals = _get_caller_globals_and_locals()
- # exec(execstr, myglobals, mylocals)
- else:
- e = exc
- e.__suppress_context__ = False
- if isinstance(cause, type) and issubclass(cause, Exception):
- e.__cause__ = cause()
+ if exc.__traceback__ is not tb:
+ raise exc.with_traceback(tb)
+ raise exc
+
+ def raise_with_traceback(exc, traceback=Ellipsis):
+ if traceback == Ellipsis:
+ _, _, traceback = sys.exc_info()
+ raise exc.with_traceback(traceback)
+
+else:
+ def raise_from(exc, cause):
+ """
+ Equivalent to:
+
+ raise EXCEPTION from CAUSE
+
+ on Python 3. (See PEP 3134).
+ """
+ # Is either arg an exception class (e.g. IndexError) rather than
+ # instance (e.g. IndexError('my message here')? If so, pass the
+ # name of the class undisturbed through to "raise ... from ...".
+ if isinstance(exc, type) and issubclass(exc, Exception):
+ e = exc()
+ # exc = exc.__name__
+ # execstr = "e = " + _repr_strip(exc) + "()"
+ # myglobals, mylocals = _get_caller_globals_and_locals()
+ # exec(execstr, myglobals, mylocals)
+ else:
+ e = exc
+ e.__suppress_context__ = False
+ if isinstance(cause, type) and issubclass(cause, Exception):
+ e.__cause__ = cause()
e.__cause__.__traceback__ = sys.exc_info()[2]
- e.__suppress_context__ = True
- elif cause is None:
- e.__cause__ = None
- e.__suppress_context__ = True
- elif isinstance(cause, BaseException):
- e.__cause__ = cause
+ e.__suppress_context__ = True
+ elif cause is None:
+ e.__cause__ = None
+ e.__suppress_context__ = True
+ elif isinstance(cause, BaseException):
+ e.__cause__ = cause
object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2])
- e.__suppress_context__ = True
- else:
- raise TypeError("exception causes must derive from BaseException")
- e.__context__ = sys.exc_info()[1]
- raise e
-
- exec('''
-def raise_(tp, value=None, tb=None):
- raise tp, value, tb
-
-def raise_with_traceback(exc, traceback=Ellipsis):
- if traceback == Ellipsis:
- _, _, traceback = sys.exc_info()
- raise exc, None, traceback
-'''.strip())
-
-
-raise_with_traceback.__doc__ = (
-"""Raise exception with existing traceback.
-If traceback is not passed, uses sys.exc_info() to get traceback."""
-)
-
-
-# Deprecated alias for backward compatibility with ``future`` versions < 0.11:
-reraise = raise_
-
-
-def implements_iterator(cls):
- '''
- From jinja2/_compat.py. License: BSD.
-
- Use as a decorator like this::
-
- @implements_iterator
- class UppercasingIterator(object):
- def __init__(self, iterable):
- self._iter = iter(iterable)
- def __iter__(self):
- return self
- def __next__(self):
- return next(self._iter).upper()
-
- '''
- if PY3:
- return cls
- else:
- cls.next = cls.__next__
- del cls.__next__
- return cls
-
-if PY3:
- get_next = lambda x: x.next
-else:
- get_next = lambda x: x.__next__
-
-
-def encode_filename(filename):
- if PY3:
- return filename
- else:
- if isinstance(filename, unicode):
- return filename.encode('utf-8')
- return filename
-
-
-def is_new_style(cls):
- """
- Python 2.7 has both new-style and old-style classes. Old-style classes can
- be pesky in some circumstances, such as when using inheritance. Use this
- function to test for whether a class is new-style. (Python 3 only has
- new-style classes.)
- """
- return hasattr(cls, '__class__') and ('__dict__' in dir(cls)
- or hasattr(cls, '__slots__'))
-
-# The native platform string and bytes types. Useful because ``str`` and
-# ``bytes`` are redefined on Py2 by ``from future.builtins import *``.
-native_str = str
-native_bytes = bytes
-
-
-def istext(obj):
- """
- Deprecated. Use::
- >>> isinstance(obj, str)
- after this import:
- >>> from future.builtins import str
- """
- return isinstance(obj, type(u''))
-
-
-def isbytes(obj):
- """
- Deprecated. Use::
- >>> isinstance(obj, bytes)
- after this import:
- >>> from future.builtins import bytes
- """
- return isinstance(obj, type(b''))
-
-
-def isnewbytes(obj):
- """
+ e.__suppress_context__ = True
+ else:
+ raise TypeError("exception causes must derive from BaseException")
+ e.__context__ = sys.exc_info()[1]
+ raise e
+
+ exec('''
+def raise_(tp, value=None, tb=None):
+ raise tp, value, tb
+
+def raise_with_traceback(exc, traceback=Ellipsis):
+ if traceback == Ellipsis:
+ _, _, traceback = sys.exc_info()
+ raise exc, None, traceback
+'''.strip())
+
+
+raise_with_traceback.__doc__ = (
+"""Raise exception with existing traceback.
+If traceback is not passed, uses sys.exc_info() to get traceback."""
+)
+
+
+# Deprecated alias for backward compatibility with ``future`` versions < 0.11:
+reraise = raise_
+
+
+def implements_iterator(cls):
+ '''
+ From jinja2/_compat.py. License: BSD.
+
+ Use as a decorator like this::
+
+ @implements_iterator
+ class UppercasingIterator(object):
+ def __init__(self, iterable):
+ self._iter = iter(iterable)
+ def __iter__(self):
+ return self
+ def __next__(self):
+ return next(self._iter).upper()
+
+ '''
+ if PY3:
+ return cls
+ else:
+ cls.next = cls.__next__
+ del cls.__next__
+ return cls
+
+if PY3:
+ get_next = lambda x: x.next
+else:
+ get_next = lambda x: x.__next__
+
+
+def encode_filename(filename):
+ if PY3:
+ return filename
+ else:
+ if isinstance(filename, unicode):
+ return filename.encode('utf-8')
+ return filename
+
+
+def is_new_style(cls):
+ """
+ Python 2.7 has both new-style and old-style classes. Old-style classes can
+ be pesky in some circumstances, such as when using inheritance. Use this
+ function to test for whether a class is new-style. (Python 3 only has
+ new-style classes.)
+ """
+ return hasattr(cls, '__class__') and ('__dict__' in dir(cls)
+ or hasattr(cls, '__slots__'))
+
+# The native platform string and bytes types. Useful because ``str`` and
+# ``bytes`` are redefined on Py2 by ``from future.builtins import *``.
+native_str = str
+native_bytes = bytes
+
+
+def istext(obj):
+ """
+ Deprecated. Use::
+ >>> isinstance(obj, str)
+ after this import:
+ >>> from future.builtins import str
+ """
+ return isinstance(obj, type(u''))
+
+
+def isbytes(obj):
+ """
+ Deprecated. Use::
+ >>> isinstance(obj, bytes)
+ after this import:
+ >>> from future.builtins import bytes
+ """
+ return isinstance(obj, type(b''))
+
+
+def isnewbytes(obj):
+ """
Equivalent to the result of ``type(obj) == type(newbytes)``
in other words, it is REALLY a newbytes instance, not a Py2 native str
- object?
+ object?
Note that this does not cover subclasses of newbytes, and it is not
equivalent to ininstance(obj, newbytes)
- """
+ """
return type(obj).__name__ == 'newbytes'
-
-
-def isint(obj):
- """
- Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or
- ``long``.
-
- Instead of using this function, you can use:
-
- >>> from future.builtins import int
- >>> isinstance(obj, int)
-
- The following idiom is equivalent:
-
- >>> from numbers import Integral
- >>> isinstance(obj, Integral)
- """
-
- return isinstance(obj, numbers.Integral)
-
-
-def native(obj):
- """
- On Py3, this is a no-op: native(obj) -> obj
-
- On Py2, returns the corresponding native Py2 types that are
- superclasses for backported objects from Py3:
-
- >>> from builtins import str, bytes, int
-
- >>> native(str(u'ABC'))
- u'ABC'
- >>> type(native(str(u'ABC')))
- unicode
-
- >>> native(bytes(b'ABC'))
- b'ABC'
- >>> type(native(bytes(b'ABC')))
- bytes
-
- >>> native(int(10**20))
- 100000000000000000000L
- >>> type(native(int(10**20)))
- long
-
- Existing native types on Py2 will be returned unchanged:
-
- >>> type(native(u'ABC'))
- unicode
- """
- if hasattr(obj, '__native__'):
- return obj.__native__()
- else:
- return obj
-
-
-# Implementation of exec_ is from ``six``:
-if PY3:
- import builtins
- exec_ = getattr(builtins, "exec")
-else:
- def exec_(code, globs=None, locs=None):
- """Execute code in a namespace."""
- if globs is None:
- frame = sys._getframe(1)
- globs = frame.f_globals
- if locs is None:
- locs = frame.f_locals
- del frame
- elif locs is None:
- locs = globs
- exec("""exec code in globs, locs""")
-
-
-# Defined here for backward compatibility:
-def old_div(a, b):
- """
- DEPRECATED: import ``old_div`` from ``past.utils`` instead.
-
- Equivalent to ``a / b`` on Python 2 without ``from __future__ import
- division``.
-
- TODO: generalize this to other objects (like arrays etc.)
- """
- if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral):
- return a // b
- else:
- return a / b
-
-
-def as_native_str(encoding='utf-8'):
- '''
- A decorator to turn a function or method call that returns text, i.e.
- unicode, into one that returns a native platform str.
-
- Use it as a decorator like this::
-
- from __future__ import unicode_literals
-
- class MyClass(object):
- @as_native_str(encoding='ascii')
- def __repr__(self):
- return next(self._iter).upper()
- '''
- if PY3:
- return lambda f: f
- else:
- def encoder(f):
- @functools.wraps(f)
- def wrapper(*args, **kwargs):
- return f(*args, **kwargs).encode(encoding=encoding)
- return wrapper
- return encoder
-
-# listvalues and listitems definitions from Nick Coghlan's (withdrawn)
-# PEP 496:
-try:
- dict.iteritems
-except AttributeError:
- # Python 3
- def listvalues(d):
- return list(d.values())
- def listitems(d):
- return list(d.items())
-else:
- # Python 2
- def listvalues(d):
- return d.values()
- def listitems(d):
- return d.items()
-
-if PY3:
- def ensure_new_type(obj):
- return obj
-else:
- def ensure_new_type(obj):
- from future.types.newbytes import newbytes
- from future.types.newstr import newstr
- from future.types.newint import newint
- from future.types.newdict import newdict
-
- native_type = type(native(obj))
-
- # Upcast only if the type is already a native (non-future) type
- if issubclass(native_type, type(obj)):
- # Upcast
- if native_type == str: # i.e. Py2 8-bit str
- return newbytes(obj)
- elif native_type == unicode:
- return newstr(obj)
- elif native_type == int:
- return newint(obj)
- elif native_type == long:
- return newint(obj)
- elif native_type == dict:
- return newdict(obj)
- else:
- return obj
- else:
- # Already a new type
- assert type(obj) in [newbytes, newstr]
- return obj
-
-
-__all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
+
+
+def isint(obj):
+ """
+ Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or
+ ``long``.
+
+ Instead of using this function, you can use:
+
+ >>> from future.builtins import int
+ >>> isinstance(obj, int)
+
+ The following idiom is equivalent:
+
+ >>> from numbers import Integral
+ >>> isinstance(obj, Integral)
+ """
+
+ return isinstance(obj, numbers.Integral)
+
+
+def native(obj):
+ """
+ On Py3, this is a no-op: native(obj) -> obj
+
+ On Py2, returns the corresponding native Py2 types that are
+ superclasses for backported objects from Py3:
+
+ >>> from builtins import str, bytes, int
+
+ >>> native(str(u'ABC'))
+ u'ABC'
+ >>> type(native(str(u'ABC')))
+ unicode
+
+ >>> native(bytes(b'ABC'))
+ b'ABC'
+ >>> type(native(bytes(b'ABC')))
+ bytes
+
+ >>> native(int(10**20))
+ 100000000000000000000L
+ >>> type(native(int(10**20)))
+ long
+
+ Existing native types on Py2 will be returned unchanged:
+
+ >>> type(native(u'ABC'))
+ unicode
+ """
+ if hasattr(obj, '__native__'):
+ return obj.__native__()
+ else:
+ return obj
+
+
+# Implementation of exec_ is from ``six``:
+if PY3:
+ import builtins
+ exec_ = getattr(builtins, "exec")
+else:
+ def exec_(code, globs=None, locs=None):
+ """Execute code in a namespace."""
+ if globs is None:
+ frame = sys._getframe(1)
+ globs = frame.f_globals
+ if locs is None:
+ locs = frame.f_locals
+ del frame
+ elif locs is None:
+ locs = globs
+ exec("""exec code in globs, locs""")
+
+
+# Defined here for backward compatibility:
+def old_div(a, b):
+ """
+ DEPRECATED: import ``old_div`` from ``past.utils`` instead.
+
+ Equivalent to ``a / b`` on Python 2 without ``from __future__ import
+ division``.
+
+ TODO: generalize this to other objects (like arrays etc.)
+ """
+ if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral):
+ return a // b
+ else:
+ return a / b
+
+
+def as_native_str(encoding='utf-8'):
+ '''
+ A decorator to turn a function or method call that returns text, i.e.
+ unicode, into one that returns a native platform str.
+
+ Use it as a decorator like this::
+
+ from __future__ import unicode_literals
+
+ class MyClass(object):
+ @as_native_str(encoding='ascii')
+ def __repr__(self):
+ return next(self._iter).upper()
+ '''
+ if PY3:
+ return lambda f: f
+ else:
+ def encoder(f):
+ @functools.wraps(f)
+ def wrapper(*args, **kwargs):
+ return f(*args, **kwargs).encode(encoding=encoding)
+ return wrapper
+ return encoder
+
+# listvalues and listitems definitions from Nick Coghlan's (withdrawn)
+# PEP 496:
+try:
+ dict.iteritems
+except AttributeError:
+ # Python 3
+ def listvalues(d):
+ return list(d.values())
+ def listitems(d):
+ return list(d.items())
+else:
+ # Python 2
+ def listvalues(d):
+ return d.values()
+ def listitems(d):
+ return d.items()
+
+if PY3:
+ def ensure_new_type(obj):
+ return obj
+else:
+ def ensure_new_type(obj):
+ from future.types.newbytes import newbytes
+ from future.types.newstr import newstr
+ from future.types.newint import newint
+ from future.types.newdict import newdict
+
+ native_type = type(native(obj))
+
+ # Upcast only if the type is already a native (non-future) type
+ if issubclass(native_type, type(obj)):
+ # Upcast
+ if native_type == str: # i.e. Py2 8-bit str
+ return newbytes(obj)
+ elif native_type == unicode:
+ return newstr(obj)
+ elif native_type == int:
+ return newint(obj)
+ elif native_type == long:
+ return newint(obj)
+ elif native_type == dict:
+ return newdict(obj)
+ else:
+ return obj
+ else:
+ # Already a new type
+ assert type(obj) in [newbytes, newstr]
+ return obj
+
+
+__all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr',
'bytes_to_native_str', 'class_types', 'encode_filename',
'ensure_new_type', 'exec_', 'get_next', 'getexception',
@@ -759,8 +759,8 @@ __all__ = ['PY2', 'PY26', 'PY3', 'PYPY',
'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems',
'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues',
'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str',
- 'native_str_to_bytes', 'old_div',
- 'python_2_unicode_compatible', 'raise_',
+ 'native_str_to_bytes', 'old_div',
+ 'python_2_unicode_compatible', 'raise_',
'raise_with_traceback', 'reraise', 'string_types',
'text_to_native_str', 'text_type', 'tobytes', 'viewitems',
'viewkeys', 'viewvalues', 'with_metaclass'
diff --git a/contrib/python/future/future/utils/surrogateescape.py b/contrib/python/future/future/utils/surrogateescape.py
index d9b7982e81..0dcc9fa6e6 100644
--- a/contrib/python/future/future/utils/surrogateescape.py
+++ b/contrib/python/future/future/utils/surrogateescape.py
@@ -1,198 +1,198 @@
-"""
-This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
-handler of Python 3.
-
-Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
-"""
-
-# This code is released under the Python license and the BSD 2-clause license
-
-import codecs
-import sys
-
-from future import utils
-
-
-FS_ERRORS = 'surrogateescape'
-
-# # -- Python 2/3 compatibility -------------------------------------
-# FS_ERRORS = 'my_surrogateescape'
-
-def u(text):
- if utils.PY3:
- return text
- else:
- return text.decode('unicode_escape')
-
-def b(data):
- if utils.PY3:
- return data.encode('latin1')
- else:
- return data
-
-if utils.PY3:
- _unichr = chr
- bytes_chr = lambda code: bytes((code,))
-else:
- _unichr = unichr
- bytes_chr = chr
-
-def surrogateescape_handler(exc):
- """
- Pure Python implementation of the PEP 383: the "surrogateescape" error
- handler of Python 3. Undecodable bytes will be replaced by a Unicode
- character U+DCxx on decoding, and these are translated into the
- original bytes on encoding.
- """
- mystring = exc.object[exc.start:exc.end]
-
- try:
- if isinstance(exc, UnicodeDecodeError):
- # mystring is a byte-string in this case
- decoded = replace_surrogate_decode(mystring)
- elif isinstance(exc, UnicodeEncodeError):
- # In the case of u'\udcc3'.encode('ascii',
- # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
- # exception anyway after this function is called, even though I think
- # it's doing what it should. It seems that the strict encoder is called
- # to encode the unicode string that this function returns ...
- decoded = replace_surrogate_encode(mystring)
- else:
- raise exc
- except NotASurrogateError:
- raise exc
- return (decoded, exc.end)
-
-
-class NotASurrogateError(Exception):
- pass
-
-
-def replace_surrogate_encode(mystring):
- """
- Returns a (unicode) string, not the more logical bytes, because the codecs
- register_error functionality expects this.
- """
- decoded = []
- for ch in mystring:
- # if utils.PY3:
- # code = ch
- # else:
- code = ord(ch)
-
- # The following magic comes from Py3.3's Python/codecs.c file:
- if not 0xD800 <= code <= 0xDCFF:
- # Not a surrogate. Fail with the original exception.
+"""
+This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
+handler of Python 3.
+
+Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
+"""
+
+# This code is released under the Python license and the BSD 2-clause license
+
+import codecs
+import sys
+
+from future import utils
+
+
+FS_ERRORS = 'surrogateescape'
+
+# # -- Python 2/3 compatibility -------------------------------------
+# FS_ERRORS = 'my_surrogateescape'
+
+def u(text):
+ if utils.PY3:
+ return text
+ else:
+ return text.decode('unicode_escape')
+
+def b(data):
+ if utils.PY3:
+ return data.encode('latin1')
+ else:
+ return data
+
+if utils.PY3:
+ _unichr = chr
+ bytes_chr = lambda code: bytes((code,))
+else:
+ _unichr = unichr
+ bytes_chr = chr
+
+def surrogateescape_handler(exc):
+ """
+ Pure Python implementation of the PEP 383: the "surrogateescape" error
+ handler of Python 3. Undecodable bytes will be replaced by a Unicode
+ character U+DCxx on decoding, and these are translated into the
+ original bytes on encoding.
+ """
+ mystring = exc.object[exc.start:exc.end]
+
+ try:
+ if isinstance(exc, UnicodeDecodeError):
+ # mystring is a byte-string in this case
+ decoded = replace_surrogate_decode(mystring)
+ elif isinstance(exc, UnicodeEncodeError):
+ # In the case of u'\udcc3'.encode('ascii',
+ # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
+ # exception anyway after this function is called, even though I think
+ # it's doing what it should. It seems that the strict encoder is called
+ # to encode the unicode string that this function returns ...
+ decoded = replace_surrogate_encode(mystring)
+ else:
+ raise exc
+ except NotASurrogateError:
+ raise exc
+ return (decoded, exc.end)
+
+
+class NotASurrogateError(Exception):
+ pass
+
+
+def replace_surrogate_encode(mystring):
+ """
+ Returns a (unicode) string, not the more logical bytes, because the codecs
+ register_error functionality expects this.
+ """
+ decoded = []
+ for ch in mystring:
+ # if utils.PY3:
+ # code = ch
+ # else:
+ code = ord(ch)
+
+ # The following magic comes from Py3.3's Python/codecs.c file:
+ if not 0xD800 <= code <= 0xDCFF:
+ # Not a surrogate. Fail with the original exception.
raise NotASurrogateError
- # mybytes = [0xe0 | (code >> 12),
- # 0x80 | ((code >> 6) & 0x3f),
- # 0x80 | (code & 0x3f)]
- # Is this a good idea?
- if 0xDC00 <= code <= 0xDC7F:
- decoded.append(_unichr(code - 0xDC00))
- elif code <= 0xDCFF:
- decoded.append(_unichr(code - 0xDC00))
- else:
- raise NotASurrogateError
- return str().join(decoded)
-
-
-def replace_surrogate_decode(mybytes):
- """
- Returns a (unicode) string
- """
- decoded = []
- for ch in mybytes:
- # We may be parsing newbytes (in which case ch is an int) or a native
- # str on Py2
- if isinstance(ch, int):
- code = ch
- else:
- code = ord(ch)
- if 0x80 <= code <= 0xFF:
- decoded.append(_unichr(0xDC00 + code))
- elif code <= 0x7F:
- decoded.append(_unichr(code))
- else:
- # # It may be a bad byte
- # # Try swallowing it.
- # continue
- # print("RAISE!")
- raise NotASurrogateError
- return str().join(decoded)
-
-
-def encodefilename(fn):
- if FS_ENCODING == 'ascii':
- # ASCII encoder of Python 2 expects that the error handler returns a
- # Unicode string encodable to ASCII, whereas our surrogateescape error
- # handler has to return bytes in 0x80-0xFF range.
- encoded = []
- for index, ch in enumerate(fn):
- code = ord(ch)
- if code < 128:
- ch = bytes_chr(code)
- elif 0xDC80 <= code <= 0xDCFF:
- ch = bytes_chr(code - 0xDC00)
- else:
- raise UnicodeEncodeError(FS_ENCODING,
- fn, index, index+1,
- 'ordinal not in range(128)')
- encoded.append(ch)
- return bytes().join(encoded)
- elif FS_ENCODING == 'utf-8':
- # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
- # doesn't go through our error handler
- encoded = []
- for index, ch in enumerate(fn):
- code = ord(ch)
- if 0xD800 <= code <= 0xDFFF:
- if 0xDC80 <= code <= 0xDCFF:
- ch = bytes_chr(code - 0xDC00)
- encoded.append(ch)
- else:
- raise UnicodeEncodeError(
- FS_ENCODING,
- fn, index, index+1, 'surrogates not allowed')
- else:
- ch_utf8 = ch.encode('utf-8')
- encoded.append(ch_utf8)
- return bytes().join(encoded)
- else:
- return fn.encode(FS_ENCODING, FS_ERRORS)
-
-def decodefilename(fn):
- return fn.decode(FS_ENCODING, FS_ERRORS)
-
-FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
-# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
-# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
-
-
-# normalize the filesystem encoding name.
-# For example, we expect "utf-8", not "UTF8".
-FS_ENCODING = codecs.lookup(FS_ENCODING).name
-
-
-def register_surrogateescape():
- """
- Registers the surrogateescape error handler on Python 2 (only)
- """
- if utils.PY3:
- return
- try:
- codecs.lookup_error(FS_ERRORS)
- except LookupError:
- codecs.register_error(FS_ERRORS, surrogateescape_handler)
-
-
-if __name__ == '__main__':
- pass
- # # Tests:
- # register_surrogateescape()
-
- # b = decodefilename(fn)
- # assert b == encoded, "%r != %r" % (b, encoded)
- # c = encodefilename(b)
- # assert c == fn, '%r != %r' % (c, fn)
- # # print("ok")
+ # mybytes = [0xe0 | (code >> 12),
+ # 0x80 | ((code >> 6) & 0x3f),
+ # 0x80 | (code & 0x3f)]
+ # Is this a good idea?
+ if 0xDC00 <= code <= 0xDC7F:
+ decoded.append(_unichr(code - 0xDC00))
+ elif code <= 0xDCFF:
+ decoded.append(_unichr(code - 0xDC00))
+ else:
+ raise NotASurrogateError
+ return str().join(decoded)
+
+
+def replace_surrogate_decode(mybytes):
+ """
+ Returns a (unicode) string
+ """
+ decoded = []
+ for ch in mybytes:
+ # We may be parsing newbytes (in which case ch is an int) or a native
+ # str on Py2
+ if isinstance(ch, int):
+ code = ch
+ else:
+ code = ord(ch)
+ if 0x80 <= code <= 0xFF:
+ decoded.append(_unichr(0xDC00 + code))
+ elif code <= 0x7F:
+ decoded.append(_unichr(code))
+ else:
+ # # It may be a bad byte
+ # # Try swallowing it.
+ # continue
+ # print("RAISE!")
+ raise NotASurrogateError
+ return str().join(decoded)
+
+
+def encodefilename(fn):
+ if FS_ENCODING == 'ascii':
+ # ASCII encoder of Python 2 expects that the error handler returns a
+ # Unicode string encodable to ASCII, whereas our surrogateescape error
+ # handler has to return bytes in 0x80-0xFF range.
+ encoded = []
+ for index, ch in enumerate(fn):
+ code = ord(ch)
+ if code < 128:
+ ch = bytes_chr(code)
+ elif 0xDC80 <= code <= 0xDCFF:
+ ch = bytes_chr(code - 0xDC00)
+ else:
+ raise UnicodeEncodeError(FS_ENCODING,
+ fn, index, index+1,
+ 'ordinal not in range(128)')
+ encoded.append(ch)
+ return bytes().join(encoded)
+ elif FS_ENCODING == 'utf-8':
+ # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
+ # doesn't go through our error handler
+ encoded = []
+ for index, ch in enumerate(fn):
+ code = ord(ch)
+ if 0xD800 <= code <= 0xDFFF:
+ if 0xDC80 <= code <= 0xDCFF:
+ ch = bytes_chr(code - 0xDC00)
+ encoded.append(ch)
+ else:
+ raise UnicodeEncodeError(
+ FS_ENCODING,
+ fn, index, index+1, 'surrogates not allowed')
+ else:
+ ch_utf8 = ch.encode('utf-8')
+ encoded.append(ch_utf8)
+ return bytes().join(encoded)
+ else:
+ return fn.encode(FS_ENCODING, FS_ERRORS)
+
+def decodefilename(fn):
+ return fn.decode(FS_ENCODING, FS_ERRORS)
+
+FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
+# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
+# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
+
+
+# normalize the filesystem encoding name.
+# For example, we expect "utf-8", not "UTF8".
+FS_ENCODING = codecs.lookup(FS_ENCODING).name
+
+
+def register_surrogateescape():
+ """
+ Registers the surrogateescape error handler on Python 2 (only)
+ """
+ if utils.PY3:
+ return
+ try:
+ codecs.lookup_error(FS_ERRORS)
+ except LookupError:
+ codecs.register_error(FS_ERRORS, surrogateescape_handler)
+
+
+if __name__ == '__main__':
+ pass
+ # # Tests:
+ # register_surrogateescape()
+
+ # b = decodefilename(fn)
+ # assert b == encoded, "%r != %r" % (b, encoded)
+ # c = encodefilename(b)
+ # assert c == fn, '%r != %r' % (c, fn)
+ # # print("ok")
diff --git a/contrib/python/future/html/__init__.py b/contrib/python/future/html/__init__.py
index 5ba852e022..e957e74570 100644
--- a/contrib/python/future/html/__init__.py
+++ b/contrib/python/future/html/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- from future.moves.html import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ from future.moves.html import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/html/entities.py b/contrib/python/future/html/entities.py
index 33e6e4c67f..211649e531 100644
--- a/contrib/python/future/html/entities.py
+++ b/contrib/python/future/html/entities.py
@@ -1,7 +1,7 @@
-from __future__ import absolute_import
-from future.utils import PY3
-
-if PY3:
- from html.entities import *
-else:
- from future.moves.html.entities import *
+from __future__ import absolute_import
+from future.utils import PY3
+
+if PY3:
+ from html.entities import *
+else:
+ from future.moves.html.entities import *
diff --git a/contrib/python/future/html/parser.py b/contrib/python/future/html/parser.py
index 7095e94d18..e39488797e 100644
--- a/contrib/python/future/html/parser.py
+++ b/contrib/python/future/html/parser.py
@@ -1,8 +1,8 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
if sys.version_info[0] >= 3:
- raise ImportError('Cannot import module from python-future source folder')
-else:
- from future.moves.html.parser import *
+ raise ImportError('Cannot import module from python-future source folder')
+else:
+ from future.moves.html.parser import *
diff --git a/contrib/python/future/http/__init__.py b/contrib/python/future/http/__init__.py
index dba271f0f8..e4f853e53c 100644
--- a/contrib/python/future/http/__init__.py
+++ b/contrib/python/future/http/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- pass
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ pass
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/http/client.py b/contrib/python/future/http/client.py
index 6153229d5e..a6a31006bd 100644
--- a/contrib/python/future/http/client.py
+++ b/contrib/python/future/http/client.py
@@ -1,90 +1,90 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-
-from httplib import *
-from httplib import HTTPMessage
-
-# These constants aren't included in __all__ in httplib.py:
-
-from httplib import (HTTP_PORT,
- HTTPS_PORT,
-
- CONTINUE,
- SWITCHING_PROTOCOLS,
- PROCESSING,
-
- OK,
- CREATED,
- ACCEPTED,
- NON_AUTHORITATIVE_INFORMATION,
- NO_CONTENT,
- RESET_CONTENT,
- PARTIAL_CONTENT,
- MULTI_STATUS,
- IM_USED,
-
- MULTIPLE_CHOICES,
- MOVED_PERMANENTLY,
- FOUND,
- SEE_OTHER,
- NOT_MODIFIED,
- USE_PROXY,
- TEMPORARY_REDIRECT,
-
- BAD_REQUEST,
- UNAUTHORIZED,
- PAYMENT_REQUIRED,
- FORBIDDEN,
- NOT_FOUND,
- METHOD_NOT_ALLOWED,
- NOT_ACCEPTABLE,
- PROXY_AUTHENTICATION_REQUIRED,
- REQUEST_TIMEOUT,
- CONFLICT,
- GONE,
- LENGTH_REQUIRED,
- PRECONDITION_FAILED,
- REQUEST_ENTITY_TOO_LARGE,
- REQUEST_URI_TOO_LONG,
- UNSUPPORTED_MEDIA_TYPE,
- REQUESTED_RANGE_NOT_SATISFIABLE,
- EXPECTATION_FAILED,
- UNPROCESSABLE_ENTITY,
- LOCKED,
- FAILED_DEPENDENCY,
- UPGRADE_REQUIRED,
-
- INTERNAL_SERVER_ERROR,
- NOT_IMPLEMENTED,
- BAD_GATEWAY,
- SERVICE_UNAVAILABLE,
- GATEWAY_TIMEOUT,
- HTTP_VERSION_NOT_SUPPORTED,
- INSUFFICIENT_STORAGE,
- NOT_EXTENDED,
-
- MAXAMOUNT,
- )
-
-# These are not available on Python 2.6.x:
-try:
- from httplib import LineTooLong, LineAndFileWrapper
-except ImportError:
- pass
-
-# These may not be available on all versions of Python 2.6.x or 2.7.x
-try:
- from httplib import (
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+
+from httplib import *
+from httplib import HTTPMessage
+
+# These constants aren't included in __all__ in httplib.py:
+
+from httplib import (HTTP_PORT,
+ HTTPS_PORT,
+
+ CONTINUE,
+ SWITCHING_PROTOCOLS,
+ PROCESSING,
+
+ OK,
+ CREATED,
+ ACCEPTED,
+ NON_AUTHORITATIVE_INFORMATION,
+ NO_CONTENT,
+ RESET_CONTENT,
+ PARTIAL_CONTENT,
+ MULTI_STATUS,
+ IM_USED,
+
+ MULTIPLE_CHOICES,
+ MOVED_PERMANENTLY,
+ FOUND,
+ SEE_OTHER,
+ NOT_MODIFIED,
+ USE_PROXY,
+ TEMPORARY_REDIRECT,
+
+ BAD_REQUEST,
+ UNAUTHORIZED,
+ PAYMENT_REQUIRED,
+ FORBIDDEN,
+ NOT_FOUND,
+ METHOD_NOT_ALLOWED,
+ NOT_ACCEPTABLE,
+ PROXY_AUTHENTICATION_REQUIRED,
+ REQUEST_TIMEOUT,
+ CONFLICT,
+ GONE,
+ LENGTH_REQUIRED,
+ PRECONDITION_FAILED,
+ REQUEST_ENTITY_TOO_LARGE,
+ REQUEST_URI_TOO_LONG,
+ UNSUPPORTED_MEDIA_TYPE,
+ REQUESTED_RANGE_NOT_SATISFIABLE,
+ EXPECTATION_FAILED,
+ UNPROCESSABLE_ENTITY,
+ LOCKED,
+ FAILED_DEPENDENCY,
+ UPGRADE_REQUIRED,
+
+ INTERNAL_SERVER_ERROR,
+ NOT_IMPLEMENTED,
+ BAD_GATEWAY,
+ SERVICE_UNAVAILABLE,
+ GATEWAY_TIMEOUT,
+ HTTP_VERSION_NOT_SUPPORTED,
+ INSUFFICIENT_STORAGE,
+ NOT_EXTENDED,
+
+ MAXAMOUNT,
+ )
+
+# These are not available on Python 2.6.x:
+try:
+ from httplib import LineTooLong, LineAndFileWrapper
+except ImportError:
+ pass
+
+# These may not be available on all versions of Python 2.6.x or 2.7.x
+try:
+ from httplib import (
_CS_IDLE,
_CS_REQ_STARTED,
_CS_REQ_SENT,
- _MAXLINE,
- _MAXHEADERS,
- _is_legal_header_name,
- _is_illegal_header_value,
- _METHODS_EXPECTING_BODY
- )
-except ImportError:
- pass
+ _MAXLINE,
+ _MAXHEADERS,
+ _is_legal_header_name,
+ _is_illegal_header_value,
+ _METHODS_EXPECTING_BODY
+ )
+except ImportError:
+ pass
diff --git a/contrib/python/future/http/cookiejar.py b/contrib/python/future/http/cookiejar.py
index b53fa56663..d847b2bf2d 100644
--- a/contrib/python/future/http/cookiejar.py
+++ b/contrib/python/future/http/cookiejar.py
@@ -1,6 +1,6 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-
-from cookielib import *
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+
+from cookielib import *
diff --git a/contrib/python/future/http/cookies.py b/contrib/python/future/http/cookies.py
index d2bba70b34..eb2a82388b 100644
--- a/contrib/python/future/http/cookies.py
+++ b/contrib/python/future/http/cookies.py
@@ -1,7 +1,7 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-
-from Cookie import *
-from Cookie import Morsel # left out of __all__ on Py2.7!
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+
+from Cookie import *
+from Cookie import Morsel # left out of __all__ on Py2.7!
diff --git a/contrib/python/future/http/server.py b/contrib/python/future/http/server.py
index 755155c3d7..2971055782 100644
--- a/contrib/python/future/http/server.py
+++ b/contrib/python/future/http/server.py
@@ -1,18 +1,18 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-
-from BaseHTTPServer import *
-from CGIHTTPServer import *
-from SimpleHTTPServer import *
-try:
- from CGIHTTPServer import _url_collapse_path # needed for a test
-except ImportError:
- try:
- # Python 2.7.0 to 2.7.3
- from CGIHTTPServer import (
- _url_collapse_path_split as _url_collapse_path)
- except ImportError:
- # Doesn't exist on Python 2.6.x. Ignore it.
- pass
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+
+from BaseHTTPServer import *
+from CGIHTTPServer import *
+from SimpleHTTPServer import *
+try:
+ from CGIHTTPServer import _url_collapse_path # needed for a test
+except ImportError:
+ try:
+ # Python 2.7.0 to 2.7.3
+ from CGIHTTPServer import (
+ _url_collapse_path_split as _url_collapse_path)
+ except ImportError:
+ # Doesn't exist on Python 2.6.x. Ignore it.
+ pass
diff --git a/contrib/python/future/queue/__init__.py b/contrib/python/future/queue/__init__.py
index 6c81bdfcd2..22bd296b63 100644
--- a/contrib/python/future/queue/__init__.py
+++ b/contrib/python/future/queue/__init__.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from Queue import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from Queue import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/reprlib/__init__.py b/contrib/python/future/reprlib/__init__.py
index b3b5cc2bdd..6ccf9c006f 100644
--- a/contrib/python/future/reprlib/__init__.py
+++ b/contrib/python/future/reprlib/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- from repr import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ from repr import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/socketserver/__init__.py b/contrib/python/future/socketserver/__init__.py
index a4ef9b4812..c5b8c9c28b 100644
--- a/contrib/python/future/socketserver/__init__.py
+++ b/contrib/python/future/socketserver/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- from SocketServer import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ from SocketServer import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/winreg/__init__.py b/contrib/python/future/winreg/__init__.py
index 350d251746..97243bbb8f 100644
--- a/contrib/python/future/winreg/__init__.py
+++ b/contrib/python/future/winreg/__init__.py
@@ -1,10 +1,10 @@
-from __future__ import absolute_import
-import sys
-__future_module__ = True
-
-if sys.version_info[0] < 3:
- from _winreg import *
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+__future_module__ = True
+
+if sys.version_info[0] < 3:
+ from _winreg import *
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/xmlrpc/__init__.py b/contrib/python/future/xmlrpc/__init__.py
index dba271f0f8..e4f853e53c 100644
--- a/contrib/python/future/xmlrpc/__init__.py
+++ b/contrib/python/future/xmlrpc/__init__.py
@@ -1,9 +1,9 @@
-from __future__ import absolute_import
-import sys
-
-if sys.version_info[0] < 3:
- pass
-else:
- raise ImportError('This package should not be accessible on Python 3. '
- 'Either you are trying to run from the python-future src folder '
- 'or your installation of python-future is corrupted.')
+from __future__ import absolute_import
+import sys
+
+if sys.version_info[0] < 3:
+ pass
+else:
+ raise ImportError('This package should not be accessible on Python 3. '
+ 'Either you are trying to run from the python-future src folder '
+ 'or your installation of python-future is corrupted.')
diff --git a/contrib/python/future/xmlrpc/client.py b/contrib/python/future/xmlrpc/client.py
index 233bfe8b87..a8d0827e9b 100644
--- a/contrib/python/future/xmlrpc/client.py
+++ b/contrib/python/future/xmlrpc/client.py
@@ -1,5 +1,5 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-from xmlrpclib import *
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+from xmlrpclib import *
diff --git a/contrib/python/future/xmlrpc/server.py b/contrib/python/future/xmlrpc/server.py
index 233bfe8b87..a8d0827e9b 100644
--- a/contrib/python/future/xmlrpc/server.py
+++ b/contrib/python/future/xmlrpc/server.py
@@ -1,5 +1,5 @@
-from __future__ import absolute_import
-import sys
-
-assert sys.version_info[0] < 3
-from xmlrpclib import *
+from __future__ import absolute_import
+import sys
+
+assert sys.version_info[0] < 3
+from xmlrpclib import *
diff --git a/contrib/python/future/ya.make b/contrib/python/future/ya.make
index 539508451d..ba24f13341 100644
--- a/contrib/python/future/ya.make
+++ b/contrib/python/future/ya.make
@@ -1,131 +1,131 @@
PY23_LIBRARY()
-
+
LICENSE(MIT)
-OWNER(g:python-contrib)
-
+OWNER(g:python-contrib)
+
VERSION(0.18.2)
-
-NO_CHECK_IMPORTS(
- future.backports.email.policy # email backport is incomplete in v0.16.0.
- future.moves.dbm.ndbm
-)
-
-NO_LINT()
+
+NO_CHECK_IMPORTS(
+ future.backports.email.policy # email backport is incomplete in v0.16.0.
+ future.moves.dbm.ndbm
+)
+
+NO_LINT()
NO_EXTENDED_SOURCE_SEARCH()
-
-PY_SRCS(
- TOP_LEVEL
- future/backports/__init__.py
- future/backports/_markupbase.py
- future/backports/datetime.py
- future/backports/email/__init__.py
- future/backports/email/_encoded_words.py
- future/backports/email/_header_value_parser.py
- future/backports/email/_parseaddr.py
- future/backports/email/_policybase.py
- future/backports/email/base64mime.py
- future/backports/email/charset.py
- future/backports/email/encoders.py
- future/backports/email/errors.py
- future/backports/email/feedparser.py
- future/backports/email/generator.py
- future/backports/email/header.py
- future/backports/email/headerregistry.py
- future/backports/email/iterators.py
- future/backports/email/message.py
- future/backports/email/mime/__init__.py
- future/backports/email/mime/application.py
- future/backports/email/mime/audio.py
- future/backports/email/mime/base.py
- future/backports/email/mime/image.py
- future/backports/email/mime/message.py
- future/backports/email/mime/multipart.py
- future/backports/email/mime/nonmultipart.py
- future/backports/email/mime/text.py
- future/backports/email/parser.py
- future/backports/email/policy.py
- future/backports/email/quoprimime.py
- future/backports/email/utils.py
- future/backports/html/__init__.py
- future/backports/html/entities.py
- future/backports/html/parser.py
- future/backports/http/__init__.py
- future/backports/http/client.py
- future/backports/http/cookiejar.py
- future/backports/http/cookies.py
- future/backports/http/server.py
- future/backports/misc.py
- future/backports/socket.py
- future/backports/socketserver.py
- future/backports/total_ordering.py
- future/backports/urllib/__init__.py
- future/backports/urllib/error.py
- future/backports/urllib/parse.py
- future/backports/urllib/request.py
- future/backports/urllib/response.py
- future/backports/urllib/robotparser.py
- future/backports/xmlrpc/__init__.py
- future/backports/xmlrpc/client.py
- future/backports/xmlrpc/server.py
- future/builtins/__init__.py
- future/builtins/disabled.py
- future/builtins/iterators.py
- future/builtins/misc.py
+
+PY_SRCS(
+ TOP_LEVEL
+ future/backports/__init__.py
+ future/backports/_markupbase.py
+ future/backports/datetime.py
+ future/backports/email/__init__.py
+ future/backports/email/_encoded_words.py
+ future/backports/email/_header_value_parser.py
+ future/backports/email/_parseaddr.py
+ future/backports/email/_policybase.py
+ future/backports/email/base64mime.py
+ future/backports/email/charset.py
+ future/backports/email/encoders.py
+ future/backports/email/errors.py
+ future/backports/email/feedparser.py
+ future/backports/email/generator.py
+ future/backports/email/header.py
+ future/backports/email/headerregistry.py
+ future/backports/email/iterators.py
+ future/backports/email/message.py
+ future/backports/email/mime/__init__.py
+ future/backports/email/mime/application.py
+ future/backports/email/mime/audio.py
+ future/backports/email/mime/base.py
+ future/backports/email/mime/image.py
+ future/backports/email/mime/message.py
+ future/backports/email/mime/multipart.py
+ future/backports/email/mime/nonmultipart.py
+ future/backports/email/mime/text.py
+ future/backports/email/parser.py
+ future/backports/email/policy.py
+ future/backports/email/quoprimime.py
+ future/backports/email/utils.py
+ future/backports/html/__init__.py
+ future/backports/html/entities.py
+ future/backports/html/parser.py
+ future/backports/http/__init__.py
+ future/backports/http/client.py
+ future/backports/http/cookiejar.py
+ future/backports/http/cookies.py
+ future/backports/http/server.py
+ future/backports/misc.py
+ future/backports/socket.py
+ future/backports/socketserver.py
+ future/backports/total_ordering.py
+ future/backports/urllib/__init__.py
+ future/backports/urllib/error.py
+ future/backports/urllib/parse.py
+ future/backports/urllib/request.py
+ future/backports/urllib/response.py
+ future/backports/urllib/robotparser.py
+ future/backports/xmlrpc/__init__.py
+ future/backports/xmlrpc/client.py
+ future/backports/xmlrpc/server.py
+ future/builtins/__init__.py
+ future/builtins/disabled.py
+ future/builtins/iterators.py
+ future/builtins/misc.py
future/builtins/new_min_max.py
- future/builtins/newnext.py
- future/builtins/newround.py
- future/builtins/newsuper.py
- future/moves/__init__.py
- future/moves/_dummy_thread.py
- future/moves/_markupbase.py
- future/moves/_thread.py
- future/moves/builtins.py
- future/moves/collections.py
- future/moves/configparser.py
- future/moves/copyreg.py
- future/moves/dbm/__init__.py
- future/moves/dbm/dumb.py
- future/moves/dbm/ndbm.py
- future/moves/html/__init__.py
- future/moves/html/entities.py
- future/moves/html/parser.py
- future/moves/http/__init__.py
- future/moves/http/client.py
- future/moves/http/cookiejar.py
- future/moves/http/cookies.py
- future/moves/http/server.py
- future/moves/itertools.py
- future/moves/pickle.py
- future/moves/queue.py
- future/moves/reprlib.py
- future/moves/socketserver.py
- future/moves/subprocess.py
- future/moves/sys.py
- future/moves/urllib/__init__.py
- future/moves/urllib/error.py
- future/moves/urllib/parse.py
- future/moves/urllib/request.py
- future/moves/urllib/response.py
- future/moves/urllib/robotparser.py
- future/moves/xmlrpc/__init__.py
- future/moves/xmlrpc/client.py
- future/moves/xmlrpc/server.py
- future/standard_library/__init__.py
- future/tests/__init__.py
- future/tests/base.py
- future/types/__init__.py
- future/types/newbytes.py
- future/types/newdict.py
- future/types/newint.py
- future/types/newlist.py
- future/types/newmemoryview.py
- future/types/newobject.py
- future/types/newopen.py
- future/types/newrange.py
- future/types/newstr.py
- future/utils/__init__.py
- future/utils/surrogateescape.py
+ future/builtins/newnext.py
+ future/builtins/newround.py
+ future/builtins/newsuper.py
+ future/moves/__init__.py
+ future/moves/_dummy_thread.py
+ future/moves/_markupbase.py
+ future/moves/_thread.py
+ future/moves/builtins.py
+ future/moves/collections.py
+ future/moves/configparser.py
+ future/moves/copyreg.py
+ future/moves/dbm/__init__.py
+ future/moves/dbm/dumb.py
+ future/moves/dbm/ndbm.py
+ future/moves/html/__init__.py
+ future/moves/html/entities.py
+ future/moves/html/parser.py
+ future/moves/http/__init__.py
+ future/moves/http/client.py
+ future/moves/http/cookiejar.py
+ future/moves/http/cookies.py
+ future/moves/http/server.py
+ future/moves/itertools.py
+ future/moves/pickle.py
+ future/moves/queue.py
+ future/moves/reprlib.py
+ future/moves/socketserver.py
+ future/moves/subprocess.py
+ future/moves/sys.py
+ future/moves/urllib/__init__.py
+ future/moves/urllib/error.py
+ future/moves/urllib/parse.py
+ future/moves/urllib/request.py
+ future/moves/urllib/response.py
+ future/moves/urllib/robotparser.py
+ future/moves/xmlrpc/__init__.py
+ future/moves/xmlrpc/client.py
+ future/moves/xmlrpc/server.py
+ future/standard_library/__init__.py
+ future/tests/__init__.py
+ future/tests/base.py
+ future/types/__init__.py
+ future/types/newbytes.py
+ future/types/newdict.py
+ future/types/newint.py
+ future/types/newlist.py
+ future/types/newmemoryview.py
+ future/types/newobject.py
+ future/types/newopen.py
+ future/types/newrange.py
+ future/types/newstr.py
+ future/utils/__init__.py
+ future/utils/surrogateescape.py
past/builtins/__init__.py
past/builtins/misc.py
past/builtins/noniterators.py
@@ -135,8 +135,8 @@ PY_SRCS(
past/types/olddict.py
past/types/oldstr.py
past/utils/__init__.py
-)
-
+)
+
IF (MODULE_TAG == "PY2")
PY_SRCS(
TOP_LEVEL
@@ -164,12 +164,12 @@ IF (MODULE_TAG == "PY2")
)
ENDIF()
-IF (OS_WINDOWS)
- PY_SRCS(
- TOP_LEVEL
- future/moves/winreg.py
- winreg/__init__.py
- )
-ENDIF()
-
-END()
+IF (OS_WINDOWS)
+ PY_SRCS(
+ TOP_LEVEL
+ future/moves/winreg.py
+ winreg/__init__.py
+ )
+ENDIF()
+
+END()