diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /contrib/python/toolz | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'contrib/python/toolz')
45 files changed, 8544 insertions, 0 deletions
diff --git a/contrib/python/toolz/py2/.dist-info/METADATA b/contrib/python/toolz/py2/.dist-info/METADATA new file mode 100644 index 0000000000..c43bc308d4 --- /dev/null +++ b/contrib/python/toolz/py2/.dist-info/METADATA @@ -0,0 +1,159 @@ +Metadata-Version: 2.1 +Name: toolz +Version: 0.10.0 +Summary: List processing tools and functional utilities +Home-page: https://github.com/pytoolz/toolz/ +Author: https://raw.github.com/pytoolz/toolz/master/AUTHORS.md +Maintainer: Matthew Rocklin +Maintainer-email: mrocklin@gmail.com +License: BSD +Keywords: functional utility itertools functools +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* + +Toolz +===== + +|Build Status| |Coverage Status| |Version Status| + +A set of utility functions for iterators, functions, and dictionaries. + +See the PyToolz documentation at https://toolz.readthedocs.io + +LICENSE +------- + +New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__. + +Install +------- + +``toolz`` is on the Python Package Index (PyPI): + +:: + + pip install toolz + +Structure and Heritage +---------------------- + +``toolz`` is implemented in three parts: + +|literal itertoolz|_, for operations on iterables. Examples: ``groupby``, +``unique``, ``interpose``, + +|literal functoolz|_, for higher-order functions. Examples: ``memoize``, +``curry``, ``compose``, + +|literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, +``update-in``, ``merge``. + +.. |literal itertoolz| replace:: ``itertoolz`` +.. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py + +.. |literal functoolz| replace:: ``functoolz`` +.. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py + +.. |literal dicttoolz| replace:: ``dicttoolz`` +.. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py + +These functions come from the legacy of functional languages for list +processing. They interoperate well to accomplish common complex tasks. + +Read our `API +Documentation <https://toolz.readthedocs.io/en/latest/api.html>`__ for +more details. + +Example +------- + +This builds a standard wordcount function from pieces within ``toolz``: + +.. code:: python + + >>> def stem(word): + ... """ Stem word to primitive form """ + ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") + + >>> from toolz import compose, frequencies, partial + >>> from toolz.curried import map + >>> wordcount = compose(frequencies, map(stem), str.split) + + >>> sentence = "This cat jumped over this other cat!" + >>> wordcount(sentence) + {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} + +Dependencies +------------ + +``toolz`` supports Python 2.7 and Python 3.4+ with a common codebase. +It is pure Python and requires no dependencies beyond the standard +library. + +It is, in short, a lightweight dependency. + + +CyToolz +------- + +The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__. +The ``cytoolz`` project is a drop-in replacement for the Pure Python +implementation. +See `CyToolz GitHub Page <https://github.com/pytoolz/cytoolz/>`__ for more +details. + +See Also +-------- + +- `Underscore.js <https://underscorejs.org/>`__: A similar library for + JavaScript +- `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A + similar library for Ruby +- `Clojure <https://clojure.org/>`__: A functional language whose + standard library has several counterparts in ``toolz`` +- `itertools <https://docs.python.org/2/library/itertools.html>`__: The + Python standard library for iterator tools +- `functools <https://docs.python.org/2/library/functools.html>`__: The + Python standard library for function tools + +Contributions Welcome +--------------------- + +``toolz`` aims to be a repository for utility functions, particularly +those that come from the functional programming and list processing +traditions. We welcome contributions that fall within this scope. + +We also try to keep the API small to keep ``toolz`` manageable. The ideal +contribution is significantly different from existing functions and has +precedent in a few other functional systems. + +Please take a look at our +`issue page <https://github.com/pytoolz/toolz/issues>`__ +for contribution ideas. + +Community +--------- + +See our `mailing list <https://groups.google.com/forum/#!forum/pytoolz>`__. +We're friendly. + +.. |Build Status| image:: https://travis-ci.org/pytoolz/toolz.svg?branch=master + :target: https://travis-ci.org/pytoolz/toolz +.. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master + :target: https://coveralls.io/r/pytoolz/toolz +.. |Version Status| image:: https://badge.fury.io/py/toolz.svg + :target: https://badge.fury.io/py/toolz + + diff --git a/contrib/python/toolz/py2/.dist-info/top_level.txt b/contrib/python/toolz/py2/.dist-info/top_level.txt new file mode 100644 index 0000000000..e58ef014ac --- /dev/null +++ b/contrib/python/toolz/py2/.dist-info/top_level.txt @@ -0,0 +1,2 @@ +tlz +toolz diff --git a/contrib/python/toolz/py2/LICENSE.txt b/contrib/python/toolz/py2/LICENSE.txt new file mode 100644 index 0000000000..eeb91b202c --- /dev/null +++ b/contrib/python/toolz/py2/LICENSE.txt @@ -0,0 +1,28 @@ +Copyright (c) 2013 Matthew Rocklin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of toolz nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/contrib/python/toolz/py2/README.rst b/contrib/python/toolz/py2/README.rst new file mode 100644 index 0000000000..099c3ff807 --- /dev/null +++ b/contrib/python/toolz/py2/README.rst @@ -0,0 +1,132 @@ +Toolz +===== + +|Build Status| |Coverage Status| |Version Status| + +A set of utility functions for iterators, functions, and dictionaries. + +See the PyToolz documentation at https://toolz.readthedocs.io + +LICENSE +------- + +New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__. + +Install +------- + +``toolz`` is on the Python Package Index (PyPI): + +:: + + pip install toolz + +Structure and Heritage +---------------------- + +``toolz`` is implemented in three parts: + +|literal itertoolz|_, for operations on iterables. Examples: ``groupby``, +``unique``, ``interpose``, + +|literal functoolz|_, for higher-order functions. Examples: ``memoize``, +``curry``, ``compose``, + +|literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, +``update-in``, ``merge``. + +.. |literal itertoolz| replace:: ``itertoolz`` +.. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py + +.. |literal functoolz| replace:: ``functoolz`` +.. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py + +.. |literal dicttoolz| replace:: ``dicttoolz`` +.. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py + +These functions come from the legacy of functional languages for list +processing. They interoperate well to accomplish common complex tasks. + +Read our `API +Documentation <https://toolz.readthedocs.io/en/latest/api.html>`__ for +more details. + +Example +------- + +This builds a standard wordcount function from pieces within ``toolz``: + +.. code:: python + + >>> def stem(word): + ... """ Stem word to primitive form """ + ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") + + >>> from toolz import compose, frequencies, partial + >>> from toolz.curried import map + >>> wordcount = compose(frequencies, map(stem), str.split) + + >>> sentence = "This cat jumped over this other cat!" + >>> wordcount(sentence) + {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} + +Dependencies +------------ + +``toolz`` supports Python 2.7 and Python 3.4+ with a common codebase. +It is pure Python and requires no dependencies beyond the standard +library. + +It is, in short, a lightweight dependency. + + +CyToolz +------- + +The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__. +The ``cytoolz`` project is a drop-in replacement for the Pure Python +implementation. +See `CyToolz GitHub Page <https://github.com/pytoolz/cytoolz/>`__ for more +details. + +See Also +-------- + +- `Underscore.js <https://underscorejs.org/>`__: A similar library for + JavaScript +- `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A + similar library for Ruby +- `Clojure <https://clojure.org/>`__: A functional language whose + standard library has several counterparts in ``toolz`` +- `itertools <https://docs.python.org/2/library/itertools.html>`__: The + Python standard library for iterator tools +- `functools <https://docs.python.org/2/library/functools.html>`__: The + Python standard library for function tools + +Contributions Welcome +--------------------- + +``toolz`` aims to be a repository for utility functions, particularly +those that come from the functional programming and list processing +traditions. We welcome contributions that fall within this scope. + +We also try to keep the API small to keep ``toolz`` manageable. The ideal +contribution is significantly different from existing functions and has +precedent in a few other functional systems. + +Please take a look at our +`issue page <https://github.com/pytoolz/toolz/issues>`__ +for contribution ideas. + +Community +--------- + +See our `mailing list <https://groups.google.com/forum/#!forum/pytoolz>`__. +We're friendly. + +.. |Build Status| image:: https://travis-ci.org/pytoolz/toolz.svg?branch=master + :target: https://travis-ci.org/pytoolz/toolz +.. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master + :target: https://coveralls.io/r/pytoolz/toolz +.. |Version Status| image:: https://badge.fury.io/py/toolz.svg + :target: https://badge.fury.io/py/toolz diff --git a/contrib/python/toolz/py2/tlz/__init__.py b/contrib/python/toolz/py2/tlz/__init__.py new file mode 100644 index 0000000000..9c9c84afe1 --- /dev/null +++ b/contrib/python/toolz/py2/tlz/__init__.py @@ -0,0 +1,9 @@ +"""``tlz`` mirrors the ``toolz`` API and uses ``cytoolz`` if possible. + +The ``tlz`` package is installed when ``toolz`` is installed. It provides +a convenient way to use functions from ``cytoolz``--a faster Cython +implementation of ``toolz``--if it is installed, otherwise it uses +functions from ``toolz``. +""" + +from . import _build_tlz diff --git a/contrib/python/toolz/py2/tlz/_build_tlz.py b/contrib/python/toolz/py2/tlz/_build_tlz.py new file mode 100644 index 0000000000..3c017a542c --- /dev/null +++ b/contrib/python/toolz/py2/tlz/_build_tlz.py @@ -0,0 +1,100 @@ +import sys +import types +import toolz +from importlib import import_module + + +class TlzLoader(object): + """ Finds and loads ``tlz`` modules when added to sys.meta_path""" + def __init__(self): + self.always_from_toolz = { + toolz.pipe, + } + + def _load_toolz(self, fullname): + rv = {} + package, dot, submodules = fullname.partition('.') + try: + module_name = ''.join(['cytoolz', dot, submodules]) + rv['cytoolz'] = import_module(module_name) + except ImportError: + pass + try: + module_name = ''.join(['toolz', dot, submodules]) + rv['toolz'] = import_module(module_name) + except ImportError: + pass + if not rv: + raise ImportError(fullname) + return rv + + def find_module(self, fullname, path=None): # pragma: py3 no cover + package, dot, submodules = fullname.partition('.') + if package == 'tlz': + return self + + def load_module(self, fullname): # pragma: py3 no cover + if fullname in sys.modules: # pragma: no cover + return sys.modules[fullname] + spec = TlzSpec(fullname, self) + module = self.create_module(spec) + sys.modules[fullname] = module + self.exec_module(module) + return module + + def find_spec(self, fullname, path, target=None): # pragma: no cover + package, dot, submodules = fullname.partition('.') + if package == 'tlz': + return TlzSpec(fullname, self) + + def create_module(self, spec): + return types.ModuleType(spec.name) + + def exec_module(self, module): + toolz_mods = self._load_toolz(module.__name__) + fast_mod = toolz_mods.get('cytoolz') or toolz_mods['toolz'] + slow_mod = toolz_mods.get('toolz') or toolz_mods['cytoolz'] + module.__dict__.update(toolz.merge(fast_mod.__dict__, module.__dict__)) + package = fast_mod.__package__ + if package is not None: + package, dot, submodules = package.partition('.') + module.__package__ = ''.join(['tlz', dot, submodules]) + if not module.__doc__: + module.__doc__ = fast_mod.__doc__ + + # show file from toolz during introspection + module.__file__ = slow_mod.__file__ + + for k, v in fast_mod.__dict__.items(): + tv = slow_mod.__dict__.get(k) + try: + hash(tv) + except TypeError: + tv = None + if tv in self.always_from_toolz: + module.__dict__[k] = tv + elif ( + isinstance(v, types.ModuleType) + and v.__package__ == fast_mod.__name__ + ): + package, dot, submodules = v.__name__.partition('.') + module_name = ''.join(['tlz', dot, submodules]) + submodule = import_module(module_name) + module.__dict__[k] = submodule + + +class TlzSpec(object): + def __init__(self, name, loader): + self.name = name + self.loader = loader + self.origin = None + self.submodule_search_locations = [] + self.loader_state = None + self.cached = None + self.parent = None + self.has_location = False + + +tlz_loader = TlzLoader() +sys.meta_path.append(tlz_loader) +tlz_loader.exec_module(sys.modules['tlz']) diff --git a/contrib/python/toolz/py2/toolz/__init__.py b/contrib/python/toolz/py2/toolz/__init__.py new file mode 100644 index 0000000000..7fa86ab473 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/__init__.py @@ -0,0 +1,22 @@ +from .itertoolz import * + +from .functoolz import * + +from .dicttoolz import * + +from .recipes import * + +from .compatibility import map, filter + +from functools import partial, reduce + +sorted = sorted + +# Aliases +comp = compose + +from . import curried, sandbox + +functoolz._sigs.create_signature_registry() + +__version__ = '0.10.0' diff --git a/contrib/python/toolz/py2/toolz/_signatures.py b/contrib/python/toolz/py2/toolz/_signatures.py new file mode 100644 index 0000000000..c55a778b3b --- /dev/null +++ b/contrib/python/toolz/py2/toolz/_signatures.py @@ -0,0 +1,832 @@ +"""Internal module for better introspection of builtins. + +The main functions are ``is_builtin_valid_args``, ``is_builtin_partial_args``, +and ``has_unknown_args``. Other functions in this module support these three. + +Notably, we create a ``signatures`` registry to enable introspection of +builtin functions in any Python version. This includes builtins that +have more than one valid signature. Currently, the registry includes +builtins from ``builtins``, ``functools``, ``itertools``, and ``operator`` +modules. More can be added as requested. We don't guarantee full coverage. + +Everything in this module should be regarded as implementation details. +Users should try to not use this module directly. +""" +import functools +import inspect +import itertools +import operator +from importlib import import_module + +from .compatibility import PY3 +from .functoolz import (is_partial_args, is_arity, has_varargs, + has_keywords, num_required_args) + +if PY3: # pragma: py2 no cover + import builtins +else: # pragma: py3 no cover + import __builtin__ as builtins + +# We mock builtin callables using lists of tuples with lambda functions. +# +# The tuple spec is (num_position_args, lambda_func, keyword_only_args). +# +# num_position_args: +# - The number of positional-only arguments. If not specified, +# all positional arguments are considered positional-only. +# +# lambda_func: +# - lambda function that matches a signature of a builtin, but does +# not include keyword-only arguments. +# +# keyword_only_args: (optional) +# - Tuple of keyword-only argumemts. + +module_info = {} + +module_info[builtins] = dict( + abs=[ + lambda x: None], + all=[ + lambda iterable: None], + any=[ + lambda iterable: None], + apply=[ + lambda object: None, + lambda object, args: None, + lambda object, args, kwargs: None], + ascii=[ + lambda obj: None], + bin=[ + lambda number: None], + bool=[ + lambda x=False: None], + buffer=[ + lambda object: None, + lambda object, offset: None, + lambda object, offset, size: None], + bytearray=[ + lambda: None, + lambda int: None, + lambda string, encoding='utf8', errors='strict': None], + callable=[ + lambda obj: None], + chr=[ + lambda i: None], + classmethod=[ + lambda function: None], + cmp=[ + lambda x, y: None], + coerce=[ + lambda x, y: None], + complex=[ + lambda real=0, imag=0: None], + delattr=[ + lambda obj, name: None], + dict=[ + lambda **kwargs: None, + lambda mapping, **kwargs: None], + dir=[ + lambda: None, + lambda object: None], + divmod=[ + lambda x, y: None], + enumerate=[ + (0, lambda iterable, start=0: None)], + eval=[ + lambda source: None, + lambda source, globals: None, + lambda source, globals, locals: None], + execfile=[ + lambda filename: None, + lambda filename, globals: None, + lambda filename, globals, locals: None], + file=[ + (0, lambda name, mode='r', buffering=-1: None)], + filter=[ + lambda function, iterable: None], + float=[ + lambda x=0.0: None], + format=[ + lambda value: None, + lambda value, format_spec: None], + frozenset=[ + lambda: None, + lambda iterable: None], + getattr=[ + lambda object, name: None, + lambda object, name, default: None], + globals=[ + lambda: None], + hasattr=[ + lambda obj, name: None], + hash=[ + lambda obj: None], + hex=[ + lambda number: None], + id=[ + lambda obj: None], + input=[ + lambda: None, + lambda prompt: None], + int=[ + lambda x=0: None, + (0, lambda x, base=10: None)], + intern=[ + lambda string: None], + isinstance=[ + lambda obj, class_or_tuple: None], + issubclass=[ + lambda cls, class_or_tuple: None], + iter=[ + lambda iterable: None, + lambda callable, sentinel: None], + len=[ + lambda obj: None], + list=[ + lambda: None, + lambda iterable: None], + locals=[ + lambda: None], + long=[ + lambda x=0: None, + (0, lambda x, base=10: None)], + map=[ + lambda func, sequence, *iterables: None], + memoryview=[ + (0, lambda object: None)], + next=[ + lambda iterator: None, + lambda iterator, default: None], + object=[ + lambda: None], + oct=[ + lambda number: None], + ord=[ + lambda c: None], + pow=[ + lambda x, y: None, + lambda x, y, z: None], + property=[ + lambda fget=None, fset=None, fdel=None, doc=None: None], + range=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + raw_input=[ + lambda: None, + lambda prompt: None], + reduce=[ + lambda function, sequence: None, + lambda function, sequence, initial: None], + reload=[ + lambda module: None], + repr=[ + lambda obj: None], + reversed=[ + lambda sequence: None], + round=[ + (0, lambda number, ndigits=0: None)], + set=[ + lambda: None, + lambda iterable: None], + setattr=[ + lambda obj, name, value: None], + slice=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + staticmethod=[ + lambda function: None], + sum=[ + lambda iterable: None, + lambda iterable, start: None], + super=[ + lambda type: None, + lambda type, obj: None], + tuple=[ + lambda: None, + lambda iterable: None], + type=[ + lambda object: None, + lambda name, bases, dict: None], + unichr=[ + lambda i: None], + unicode=[ + lambda object: None, + lambda string='', encoding='utf8', errors='strict': None], + vars=[ + lambda: None, + lambda object: None], + xrange=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + zip=[ + lambda *iterables: None], + __build_class__=[ + (2, lambda func, name, *bases, **kwds: None, ('metaclass',))], + __import__=[ + (0, lambda name, globals=None, locals=None, fromlist=None, + level=None: None)], +) +module_info[builtins]['exec'] = [ + lambda source: None, + lambda source, globals: None, + lambda source, globals, locals: None] + +if PY3: # pragma: py2 no cover + module_info[builtins].update( + breakpoint=[ + lambda *args, **kws: None], + bytes=[ + lambda: None, + lambda int: None, + lambda string, encoding='utf8', errors='strict': None], + compile=[ + (0, lambda source, filename, mode, flags=0, + dont_inherit=False, optimize=-1: None)], + max=[ + (1, lambda iterable: None, ('default', 'key',)), + (1, lambda arg1, arg2, *args: None, ('key',))], + min=[ + (1, lambda iterable: None, ('default', 'key',)), + (1, lambda arg1, arg2, *args: None, ('key',))], + open=[ + (0, lambda file, mode='r', buffering=-1, encoding=None, + errors=None, newline=None, closefd=True, opener=None: None)], + sorted=[ + (1, lambda iterable: None, ('key', 'reverse'))], + str=[ + lambda object='', encoding='utf', errors='strict': None], + ) + module_info[builtins]['print'] = [ + (0, lambda *args: None, ('sep', 'end', 'file', 'flush',))] + +else: # pragma: py3 no cover + module_info[builtins].update( + bytes=[ + lambda object='': None], + compile=[ + (0, lambda source, filename, mode, flags=0, + dont_inherit=False: None)], + max=[ + (1, lambda iterable, *args: None, ('key',))], + min=[ + (1, lambda iterable, *args: None, ('key',))], + open=[ + (0, lambda file, mode='r', buffering=-1: None)], + sorted=[ + lambda iterable, cmp=None, key=None, reverse=False: None], + str=[ + lambda object='': None], + ) + module_info[builtins]['print'] = [ + (0, lambda *args: None, ('sep', 'end', 'file',))] + +module_info[functools] = dict( + cmp_to_key=[ + (0, lambda mycmp: None)], + partial=[ + lambda func, *args, **kwargs: None], + partialmethod=[ + lambda func, *args, **kwargs: None], + reduce=[ + lambda function, sequence: None, + lambda function, sequence, initial: None], +) + +module_info[itertools] = dict( + accumulate=[ + (0, lambda iterable, func=None: None)], + chain=[ + lambda *iterables: None], + combinations=[ + (0, lambda iterable, r: None)], + combinations_with_replacement=[ + (0, lambda iterable, r: None)], + compress=[ + (0, lambda data, selectors: None)], + count=[ + lambda start=0, step=1: None], + cycle=[ + lambda iterable: None], + dropwhile=[ + lambda predicate, iterable: None], + filterfalse=[ + lambda function, sequence: None], + groupby=[ + (0, lambda iterable, key=None: None)], + ifilter=[ + lambda function, sequence: None], + ifilterfalse=[ + lambda function, sequence: None], + imap=[ + lambda func, sequence, *iterables: None], + islice=[ + lambda iterable, stop: None, + lambda iterable, start, stop: None, + lambda iterable, start, stop, step: None], + izip=[ + lambda *iterables: None], + izip_longest=[ + (0, lambda *iterables: None, ('fillvalue',))], + permutations=[ + (0, lambda iterable, r=0: None)], + repeat=[ + (0, lambda object, times=0: None)], + starmap=[ + lambda function, sequence: None], + takewhile=[ + lambda predicate, iterable: None], + tee=[ + lambda iterable: None, + lambda iterable, n: None], + zip_longest=[ + (0, lambda *iterables: None, ('fillvalue',))], +) + +if PY3: # pragma: py2 no cover + module_info[itertools].update( + product=[ + (0, lambda *iterables: None, ('repeat',))], + ) +else: # pragma: py3 no cover + module_info[itertools].update( + product=[ + lambda *iterables: None], + ) + +module_info[operator] = dict( + __abs__=[ + lambda a: None], + __add__=[ + lambda a, b: None], + __and__=[ + lambda a, b: None], + __concat__=[ + lambda a, b: None], + __contains__=[ + lambda a, b: None], + __delitem__=[ + lambda a, b: None], + __delslice__=[ + lambda a, b, c: None], + __div__=[ + lambda a, b: None], + __eq__=[ + lambda a, b: None], + __floordiv__=[ + lambda a, b: None], + __ge__=[ + lambda a, b: None], + __getitem__=[ + lambda a, b: None], + __getslice__=[ + lambda a, b, c: None], + __gt__=[ + lambda a, b: None], + __iadd__=[ + lambda a, b: None], + __iand__=[ + lambda a, b: None], + __iconcat__=[ + lambda a, b: None], + __idiv__=[ + lambda a, b: None], + __ifloordiv__=[ + lambda a, b: None], + __ilshift__=[ + lambda a, b: None], + __imatmul__=[ + lambda a, b: None], + __imod__=[ + lambda a, b: None], + __imul__=[ + lambda a, b: None], + __index__=[ + lambda a: None], + __inv__=[ + lambda a: None], + __invert__=[ + lambda a: None], + __ior__=[ + lambda a, b: None], + __ipow__=[ + lambda a, b: None], + __irepeat__=[ + lambda a, b: None], + __irshift__=[ + lambda a, b: None], + __isub__=[ + lambda a, b: None], + __itruediv__=[ + lambda a, b: None], + __ixor__=[ + lambda a, b: None], + __le__=[ + lambda a, b: None], + __lshift__=[ + lambda a, b: None], + __lt__=[ + lambda a, b: None], + __matmul__=[ + lambda a, b: None], + __mod__=[ + lambda a, b: None], + __mul__=[ + lambda a, b: None], + __ne__=[ + lambda a, b: None], + __neg__=[ + lambda a: None], + __not__=[ + lambda a: None], + __or__=[ + lambda a, b: None], + __pos__=[ + lambda a: None], + __pow__=[ + lambda a, b: None], + __repeat__=[ + lambda a, b: None], + __rshift__=[ + lambda a, b: None], + __setitem__=[ + lambda a, b, c: None], + __setslice__=[ + lambda a, b, c, d: None], + __sub__=[ + lambda a, b: None], + __truediv__=[ + lambda a, b: None], + __xor__=[ + lambda a, b: None], + _abs=[ + lambda x: None], + _compare_digest=[ + lambda a, b: None], + abs=[ + lambda a: None], + add=[ + lambda a, b: None], + and_=[ + lambda a, b: None], + attrgetter=[ + lambda attr, *args: None], + concat=[ + lambda a, b: None], + contains=[ + lambda a, b: None], + countOf=[ + lambda a, b: None], + delitem=[ + lambda a, b: None], + delslice=[ + lambda a, b, c: None], + div=[ + lambda a, b: None], + eq=[ + lambda a, b: None], + floordiv=[ + lambda a, b: None], + ge=[ + lambda a, b: None], + getitem=[ + lambda a, b: None], + getslice=[ + lambda a, b, c: None], + gt=[ + lambda a, b: None], + iadd=[ + lambda a, b: None], + iand=[ + lambda a, b: None], + iconcat=[ + lambda a, b: None], + idiv=[ + lambda a, b: None], + ifloordiv=[ + lambda a, b: None], + ilshift=[ + lambda a, b: None], + imatmul=[ + lambda a, b: None], + imod=[ + lambda a, b: None], + imul=[ + lambda a, b: None], + index=[ + lambda a: None], + indexOf=[ + lambda a, b: None], + inv=[ + lambda a: None], + invert=[ + lambda a: None], + ior=[ + lambda a, b: None], + ipow=[ + lambda a, b: None], + irepeat=[ + lambda a, b: None], + irshift=[ + lambda a, b: None], + is_=[ + lambda a, b: None], + is_not=[ + lambda a, b: None], + isCallable=[ + lambda a: None], + isMappingType=[ + lambda a: None], + isNumberType=[ + lambda a: None], + isSequenceType=[ + lambda a: None], + isub=[ + lambda a, b: None], + itemgetter=[ + lambda item, *args: None], + itruediv=[ + lambda a, b: None], + ixor=[ + lambda a, b: None], + le=[ + lambda a, b: None], + length_hint=[ + lambda obj: None, + lambda obj, default: None], + lshift=[ + lambda a, b: None], + lt=[ + lambda a, b: None], + matmul=[ + lambda a, b: None], + methodcaller=[ + lambda name, *args, **kwargs: None], + mod=[ + lambda a, b: None], + mul=[ + lambda a, b: None], + ne=[ + lambda a, b: None], + neg=[ + lambda a: None], + not_=[ + lambda a: None], + or_=[ + lambda a, b: None], + pos=[ + lambda a: None], + pow=[ + lambda a, b: None], + repeat=[ + lambda a, b: None], + rshift=[ + lambda a, b: None], + sequenceIncludes=[ + lambda a, b: None], + setitem=[ + lambda a, b, c: None], + setslice=[ + lambda a, b, c, d: None], + sub=[ + lambda a, b: None], + truediv=[ + lambda a, b: None], + truth=[ + lambda a: None], + xor=[ + lambda a, b: None], +) + +module_info['toolz'] = dict( + curry=[ + (0, lambda *args, **kwargs: None)], + excepts=[ + (0, lambda exc, func, handler=None: None)], + flip=[ + (0, lambda func=None, a=None, b=None: None)], + juxt=[ + (0, lambda *funcs: None)], + memoize=[ + (0, lambda func=None, cache=None, key=None: None)], +) + +module_info['toolz.functoolz'] = dict( + Compose=[ + (0, lambda funcs: None)], + InstanceProperty=[ + (0, lambda fget=None, fset=None, fdel=None, doc=None, + classval=None: None)], +) + +if PY3: # pragma: py2 no cover + def num_pos_args(sigspec): + """ Return the number of positional arguments. ``f(x, y=1)`` has 1""" + return sum(1 for x in sigspec.parameters.values() + if x.kind == x.POSITIONAL_OR_KEYWORD + and x.default is x.empty) + + def get_exclude_keywords(num_pos_only, sigspec): + """ Return the names of position-only arguments if func has **kwargs""" + if num_pos_only == 0: + return () + has_kwargs = any(x.kind == x.VAR_KEYWORD + for x in sigspec.parameters.values()) + if not has_kwargs: + return () + pos_args = list(sigspec.parameters.values())[:num_pos_only] + return tuple(x.name for x in pos_args) + + def signature_or_spec(func): + try: + return inspect.signature(func) + except (ValueError, TypeError): + return None + +else: # pragma: py3 no cover + def num_pos_args(sigspec): + """ Return the number of positional arguments. ``f(x, y=1)`` has 1""" + if sigspec.defaults: + return len(sigspec.args) - len(sigspec.defaults) + return len(sigspec.args) + + def get_exclude_keywords(num_pos_only, sigspec): + """ Return the names of position-only arguments if func has **kwargs""" + if num_pos_only == 0: + return () + has_kwargs = sigspec.keywords is not None + if not has_kwargs: + return () + return tuple(sigspec.args[:num_pos_only]) + + def signature_or_spec(func): + try: + return inspect.getargspec(func) + except TypeError: + return None + + +def expand_sig(sig): + """ Convert the signature spec in ``module_info`` to add to ``signatures`` + + The input signature spec is one of: + - ``lambda_func`` + - ``(num_position_args, lambda_func)`` + - ``(num_position_args, lambda_func, keyword_only_args)`` + + The output signature spec is: + ``(num_position_args, lambda_func, keyword_exclude, sigspec)`` + + where ``keyword_exclude`` includes keyword only arguments and, if variadic + keywords is present, the names of position-only argument. The latter is + included to support builtins such as ``partial(func, *args, **kwargs)``, + which allows ``func=`` to be used as a keyword even though it's the name + of a positional argument. + """ + if isinstance(sig, tuple): + if len(sig) == 3: + num_pos_only, func, keyword_only = sig + assert isinstance(sig[-1], tuple) + else: + num_pos_only, func = sig + keyword_only = () + sigspec = signature_or_spec(func) + else: + func = sig + sigspec = signature_or_spec(func) + num_pos_only = num_pos_args(sigspec) + keyword_only = () + keyword_exclude = get_exclude_keywords(num_pos_only, sigspec) + return num_pos_only, func, keyword_only + keyword_exclude, sigspec + + +signatures = {} + + +def create_signature_registry(module_info=module_info, signatures=signatures): + for module, info in module_info.items(): + if isinstance(module, str): + module = import_module(module) + for name, sigs in info.items(): + if hasattr(module, name): + new_sigs = tuple(expand_sig(sig) for sig in sigs) + signatures[getattr(module, name)] = new_sigs + + +def check_valid(sig, args, kwargs): + """ Like ``is_valid_args`` for the given signature spec""" + num_pos_only, func, keyword_exclude, sigspec = sig + if len(args) < num_pos_only: + return False + if keyword_exclude: + kwargs = dict(kwargs) + for item in keyword_exclude: + kwargs.pop(item, None) + try: + func(*args, **kwargs) + return True + except TypeError: + return False + + +def _is_valid_args(func, args, kwargs): + """ Like ``is_valid_args`` for builtins in our ``signatures`` registry""" + if func not in signatures: + return None + sigs = signatures[func] + return any(check_valid(sig, args, kwargs) for sig in sigs) + + +def check_partial(sig, args, kwargs): + """ Like ``is_partial_args`` for the given signature spec""" + num_pos_only, func, keyword_exclude, sigspec = sig + if len(args) < num_pos_only: + pad = (None,) * (num_pos_only - len(args)) + args = args + pad + if keyword_exclude: + kwargs = dict(kwargs) + for item in keyword_exclude: + kwargs.pop(item, None) + return is_partial_args(func, args, kwargs, sigspec) + + +def _is_partial_args(func, args, kwargs): + """ Like ``is_partial_args`` for builtins in our ``signatures`` registry""" + if func not in signatures: + return None + sigs = signatures[func] + return any(check_partial(sig, args, kwargs) for sig in sigs) + + +def check_arity(n, sig): + num_pos_only, func, keyword_exclude, sigspec = sig + if keyword_exclude or num_pos_only > n: + return False + return is_arity(n, func, sigspec) + + +def _is_arity(n, func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_arity(n, sig) for sig in sigs] + if all(checks): + return True + elif any(checks): + return None + return False + + +def check_varargs(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + return has_varargs(func, sigspec) + + +def _has_varargs(func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_varargs(sig) for sig in sigs] + if all(checks): + return True + elif any(checks): # pragma: py2 no cover + return None + return False + + +def check_keywords(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + if keyword_exclude: + return True + return has_keywords(func, sigspec) + + +def _has_keywords(func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_keywords(sig) for sig in sigs] + if all(checks): + return True + elif any(checks): + return None + return False + + +def check_required_args(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + return num_required_args(func, sigspec) + + +def _num_required_args(func): + if func not in signatures: + return None + sigs = signatures[func] + vals = [check_required_args(sig) for sig in sigs] + val = vals[0] + if all(x == val for x in vals): + return val + return None diff --git a/contrib/python/toolz/py2/toolz/compatibility.py b/contrib/python/toolz/py2/toolz/compatibility.py new file mode 100644 index 0000000000..51e3673fad --- /dev/null +++ b/contrib/python/toolz/py2/toolz/compatibility.py @@ -0,0 +1,34 @@ +import operator +import sys +PY3 = sys.version_info[0] > 2 +PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4 +PYPY = hasattr(sys, 'pypy_version_info') + +__all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', + 'iteritems', 'iterkeys', 'itervalues', 'filterfalse', + 'PY3', 'PY34', 'PYPY') + +if PY3: + map = map + filter = filter + range = range + zip = zip + from functools import reduce + from itertools import zip_longest + from itertools import filterfalse + iteritems = operator.methodcaller('items') + iterkeys = operator.methodcaller('keys') + itervalues = operator.methodcaller('values') + from collections.abc import Sequence +else: + range = xrange + reduce = reduce + from itertools import imap as map + from itertools import ifilter as filter + from itertools import ifilterfalse as filterfalse + from itertools import izip as zip + from itertools import izip_longest as zip_longest + iteritems = operator.methodcaller('iteritems') + iterkeys = operator.methodcaller('iterkeys') + itervalues = operator.methodcaller('itervalues') + from collections import Sequence diff --git a/contrib/python/toolz/py2/toolz/curried/__init__.py b/contrib/python/toolz/py2/toolz/curried/__init__.py new file mode 100644 index 0000000000..356eddbd3b --- /dev/null +++ b/contrib/python/toolz/py2/toolz/curried/__init__.py @@ -0,0 +1,103 @@ +""" +Alternate namespace for toolz such that all functions are curried + +Currying provides implicit partial evaluation of all functions + +Example: + + Get usually requires two arguments, an index and a collection + >>> from toolz.curried import get + >>> get(0, ('a', 'b')) + 'a' + + When we use it in higher order functions we often want to pass a partially + evaluated form + >>> data = [(1, 2), (11, 22), (111, 222)] + >>> list(map(lambda seq: get(0, seq), data)) + [1, 11, 111] + + The curried version allows simple expression of partial evaluation + >>> list(map(get(0), data)) + [1, 11, 111] + +See Also: + toolz.functoolz.curry +""" +import toolz +from . import operator +from toolz import ( + apply, + comp, + complement, + compose, + compose_left, + concat, + concatv, + count, + curry, + diff, + first, + flip, + frequencies, + identity, + interleave, + isdistinct, + isiterable, + juxt, + last, + memoize, + merge_sorted, + peek, + pipe, + second, + thread_first, + thread_last, +) +from .exceptions import merge, merge_with + +accumulate = toolz.curry(toolz.accumulate) +assoc = toolz.curry(toolz.assoc) +assoc_in = toolz.curry(toolz.assoc_in) +cons = toolz.curry(toolz.cons) +countby = toolz.curry(toolz.countby) +dissoc = toolz.curry(toolz.dissoc) +do = toolz.curry(toolz.do) +drop = toolz.curry(toolz.drop) +excepts = toolz.curry(toolz.excepts) +filter = toolz.curry(toolz.filter) +get = toolz.curry(toolz.get) +get_in = toolz.curry(toolz.get_in) +groupby = toolz.curry(toolz.groupby) +interpose = toolz.curry(toolz.interpose) +itemfilter = toolz.curry(toolz.itemfilter) +itemmap = toolz.curry(toolz.itemmap) +iterate = toolz.curry(toolz.iterate) +join = toolz.curry(toolz.join) +keyfilter = toolz.curry(toolz.keyfilter) +keymap = toolz.curry(toolz.keymap) +map = toolz.curry(toolz.map) +mapcat = toolz.curry(toolz.mapcat) +nth = toolz.curry(toolz.nth) +partial = toolz.curry(toolz.partial) +partition = toolz.curry(toolz.partition) +partition_all = toolz.curry(toolz.partition_all) +partitionby = toolz.curry(toolz.partitionby) +peekn = toolz.curry(toolz.peekn) +pluck = toolz.curry(toolz.pluck) +random_sample = toolz.curry(toolz.random_sample) +reduce = toolz.curry(toolz.reduce) +reduceby = toolz.curry(toolz.reduceby) +remove = toolz.curry(toolz.remove) +sliding_window = toolz.curry(toolz.sliding_window) +sorted = toolz.curry(toolz.sorted) +tail = toolz.curry(toolz.tail) +take = toolz.curry(toolz.take) +take_nth = toolz.curry(toolz.take_nth) +topk = toolz.curry(toolz.topk) +unique = toolz.curry(toolz.unique) +update_in = toolz.curry(toolz.update_in) +valfilter = toolz.curry(toolz.valfilter) +valmap = toolz.curry(toolz.valmap) + +del exceptions +del toolz diff --git a/contrib/python/toolz/py2/toolz/curried/exceptions.py b/contrib/python/toolz/py2/toolz/curried/exceptions.py new file mode 100644 index 0000000000..75a52bbbf2 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/curried/exceptions.py @@ -0,0 +1,18 @@ +import toolz + + +__all__ = ['merge_with', 'merge'] + + +@toolz.curry +def merge_with(func, d, *dicts, **kwargs): + return toolz.merge_with(func, d, *dicts, **kwargs) + + +@toolz.curry +def merge(d, *dicts, **kwargs): + return toolz.merge(d, *dicts, **kwargs) + + +merge_with.__doc__ = toolz.merge_with.__doc__ +merge.__doc__ = toolz.merge.__doc__ diff --git a/contrib/python/toolz/py2/toolz/curried/operator.py b/contrib/python/toolz/py2/toolz/curried/operator.py new file mode 100644 index 0000000000..8bc9e52317 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/curried/operator.py @@ -0,0 +1,23 @@ +from __future__ import absolute_import + +import operator + +from toolz.functoolz import curry, num_required_args, has_keywords + + +def should_curry(f): + num = num_required_args(f) + return num is None or num > 1 or num == 1 and has_keywords(f) is not False + + +locals().update( + {name: curry(f) if should_curry(f) else f + for name, f in vars(operator).items() if callable(f)}, +) + +# Clean up the namespace. +del curry +del num_required_args +del has_keywords +del operator +del should_curry diff --git a/contrib/python/toolz/py2/toolz/dicttoolz.py b/contrib/python/toolz/py2/toolz/dicttoolz.py new file mode 100644 index 0000000000..91bff23cef --- /dev/null +++ b/contrib/python/toolz/py2/toolz/dicttoolz.py @@ -0,0 +1,337 @@ +import operator +from toolz.compatibility import (map, zip, iteritems, iterkeys, itervalues, + reduce) + +__all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap', + 'valfilter', 'keyfilter', 'itemfilter', + 'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in') + + +def _get_factory(f, kwargs): + factory = kwargs.pop('factory', dict) + if kwargs: + raise TypeError("{}() got an unexpected keyword argument " + "'{}'".format(f.__name__, kwargs.popitem()[0])) + return factory + + +def merge(*dicts, **kwargs): + """ Merge a collection of dictionaries + + >>> merge({1: 'one'}, {2: 'two'}) + {1: 'one', 2: 'two'} + + Later dictionaries have precedence + + >>> merge({1: 2, 3: 4}, {3: 3, 4: 4}) + {1: 2, 3: 3, 4: 4} + + See Also: + merge_with + """ + if len(dicts) == 1 and not isinstance(dicts[0], dict): + dicts = dicts[0] + factory = _get_factory(merge, kwargs) + + rv = factory() + for d in dicts: + rv.update(d) + return rv + + +def merge_with(func, *dicts, **kwargs): + """ Merge dictionaries and apply function to combined values + + A key may occur in more than one dict, and all values mapped from the key + will be passed to the function as a list, such as func([val1, val2, ...]). + + >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) + {1: 11, 2: 22} + + >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP + {1: 1, 2: 2, 3: 30} + + See Also: + merge + """ + if len(dicts) == 1 and not isinstance(dicts[0], dict): + dicts = dicts[0] + factory = _get_factory(merge_with, kwargs) + + result = factory() + for d in dicts: + for k, v in iteritems(d): + if k not in result: + result[k] = [v] + else: + result[k].append(v) + return valmap(func, result, factory) + + +def valmap(func, d, factory=dict): + """ Apply function to values of dictionary + + >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} + >>> valmap(sum, bills) # doctest: +SKIP + {'Alice': 65, 'Bob': 45} + + See Also: + keymap + itemmap + """ + rv = factory() + rv.update(zip(iterkeys(d), map(func, itervalues(d)))) + return rv + + +def keymap(func, d, factory=dict): + """ Apply function to keys of dictionary + + >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} + >>> keymap(str.lower, bills) # doctest: +SKIP + {'alice': [20, 15, 30], 'bob': [10, 35]} + + See Also: + valmap + itemmap + """ + rv = factory() + rv.update(zip(map(func, iterkeys(d)), itervalues(d))) + return rv + + +def itemmap(func, d, factory=dict): + """ Apply function to items of dictionary + + >>> accountids = {"Alice": 10, "Bob": 20} + >>> itemmap(reversed, accountids) # doctest: +SKIP + {10: "Alice", 20: "Bob"} + + See Also: + keymap + valmap + """ + rv = factory() + rv.update(map(func, iteritems(d))) + return rv + + +def valfilter(predicate, d, factory=dict): + """ Filter items in dictionary by value + + >>> iseven = lambda x: x % 2 == 0 + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> valfilter(iseven, d) + {1: 2, 3: 4} + + See Also: + keyfilter + itemfilter + valmap + """ + rv = factory() + for k, v in iteritems(d): + if predicate(v): + rv[k] = v + return rv + + +def keyfilter(predicate, d, factory=dict): + """ Filter items in dictionary by key + + >>> iseven = lambda x: x % 2 == 0 + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> keyfilter(iseven, d) + {2: 3, 4: 5} + + See Also: + valfilter + itemfilter + keymap + """ + rv = factory() + for k, v in iteritems(d): + if predicate(k): + rv[k] = v + return rv + + +def itemfilter(predicate, d, factory=dict): + """ Filter items in dictionary by item + + >>> def isvalid(item): + ... k, v = item + ... return k % 2 == 0 and v < 4 + + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> itemfilter(isvalid, d) + {2: 3} + + See Also: + keyfilter + valfilter + itemmap + """ + rv = factory() + for item in iteritems(d): + if predicate(item): + k, v = item + rv[k] = v + return rv + + +def assoc(d, key, value, factory=dict): + """ Return a new dict with new key value pair + + New dict has d[key] set to value. Does not modify the initial dictionary. + + >>> assoc({'x': 1}, 'x', 2) + {'x': 2} + >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP + {'x': 1, 'y': 3} + """ + d2 = factory() + d2.update(d) + d2[key] = value + return d2 + + +def dissoc(d, *keys, **kwargs): + """ Return a new dict with the given key(s) removed. + + New dict has d[key] deleted for each supplied key. + Does not modify the initial dictionary. + + >>> dissoc({'x': 1, 'y': 2}, 'y') + {'x': 1} + >>> dissoc({'x': 1, 'y': 2}, 'y', 'x') + {} + >>> dissoc({'x': 1}, 'y') # Ignores missing keys + {'x': 1} + """ + factory = _get_factory(dissoc, kwargs) + d2 = factory() + + if len(keys) < len(d) * .6: + d2.update(d) + for key in keys: + if key in d2: + del d2[key] + else: + remaining = set(d) + remaining.difference_update(keys) + for k in remaining: + d2[k] = d[k] + return d2 + + +def assoc_in(d, keys, value, factory=dict): + """ Return a new dict with new, potentially nested, key value pair + + >>> purchase = {'name': 'Alice', + ... 'order': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP + {'credit card': '5555-1234-1234-1234', + 'name': 'Alice', + 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}} + """ + return update_in(d, keys, lambda x: value, value, factory) + + +def update_in(d, keys, func, default=None, factory=dict): + """ Update value in a (potentially) nested dictionary + + inputs: + d - dictionary on which to operate + keys - list or tuple giving the location of the value to be changed in d + func - function to operate on that value + + If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the + original dictionary with v replaced by func(v), but does not mutate the + original dictionary. + + If k0 is not a key in d, update_in creates nested dictionaries to the depth + specified by the keys, with the innermost value set to func(default). + + >>> inc = lambda x: x + 1 + >>> update_in({'a': 0}, ['a'], inc) + {'a': 1} + + >>> transaction = {'name': 'Alice', + ... 'purchase': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP + {'credit card': '5555-1234-1234-1234', + 'name': 'Alice', + 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}} + + >>> # updating a value when k0 is not in d + >>> update_in({}, [1, 2, 3], str, default="bar") + {1: {2: {3: 'bar'}}} + >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) + {1: 'foo', 2: {3: {4: 1}}} + """ + ks = iter(keys) + k = next(ks) + + rv = inner = factory() + rv.update(d) + + for key in ks: + if k in d: + d = d[k] + dtemp = factory() + dtemp.update(d) + else: + d = dtemp = factory() + + inner[k] = inner = dtemp + k = key + + if k in d: + inner[k] = func(d[k]) + else: + inner[k] = func(default) + return rv + + +def get_in(keys, coll, default=None, no_default=False): + """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. + + If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless + ``no_default`` is specified, then it raises KeyError or IndexError. + + ``get_in`` is a generalization of ``operator.getitem`` for nested data + structures such as dictionaries and lists. + + >>> transaction = {'name': 'Alice', + ... 'purchase': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> get_in(['purchase', 'items', 0], transaction) + 'Apple' + >>> get_in(['name'], transaction) + 'Alice' + >>> get_in(['purchase', 'total'], transaction) + >>> get_in(['purchase', 'items', 'apple'], transaction) + >>> get_in(['purchase', 'items', 10], transaction) + >>> get_in(['purchase', 'total'], transaction, 0) + 0 + >>> get_in(['y'], {}, no_default=True) + Traceback (most recent call last): + ... + KeyError: 'y' + + See Also: + itertoolz.get + operator.getitem + """ + try: + return reduce(operator.getitem, keys, coll) + except (KeyError, IndexError, TypeError): + if no_default: + raise + return default diff --git a/contrib/python/toolz/py2/toolz/functoolz.py b/contrib/python/toolz/py2/toolz/functoolz.py new file mode 100644 index 0000000000..01d3857a19 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/functoolz.py @@ -0,0 +1,1152 @@ +from functools import reduce, partial +import inspect +import operator +from operator import attrgetter +from importlib import import_module +from textwrap import dedent +from types import MethodType + +from .compatibility import PY3, PY34, PYPY +from .utils import no_default + + +__all__ = ('identity', 'apply', 'thread_first', 'thread_last', 'memoize', + 'compose', 'compose_left', 'pipe', 'complement', 'juxt', 'do', + 'curry', 'flip', 'excepts') + + +def identity(x): + """ Identity function. Return x + + >>> identity(3) + 3 + """ + return x + + +def apply(*func_and_args, **kwargs): + """ Applies a function and returns the results + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> apply(double, 5) + 10 + + >>> tuple(map(apply, [double, inc, double], [10, 500, 8000])) + (20, 501, 16000) + """ + if not func_and_args: + raise TypeError('func argument is required') + func, args = func_and_args[0], func_and_args[1:] + return func(*args, **kwargs) + + +def thread_first(val, *forms): + """ Thread value through a sequence of functions/forms + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> thread_first(1, inc, double) + 4 + + If the function expects more than one input you can specify those inputs + in a tuple. The value is used as the first input. + + >>> def add(x, y): return x + y + >>> def pow(x, y): return x**y + >>> thread_first(1, (add, 4), (pow, 2)) # pow(add(1, 4), 2) + 25 + + So in general + thread_first(x, f, (g, y, z)) + expands to + g(f(x), y, z) + + See Also: + thread_last + """ + def evalform_front(val, form): + if callable(form): + return form(val) + if isinstance(form, tuple): + func, args = form[0], form[1:] + args = (val,) + args + return func(*args) + return reduce(evalform_front, forms, val) + + +def thread_last(val, *forms): + """ Thread value through a sequence of functions/forms + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> thread_last(1, inc, double) + 4 + + If the function expects more than one input you can specify those inputs + in a tuple. The value is used as the last input. + + >>> def add(x, y): return x + y + >>> def pow(x, y): return x**y + >>> thread_last(1, (add, 4), (pow, 2)) # pow(2, add(4, 1)) + 32 + + So in general + thread_last(x, f, (g, y, z)) + expands to + g(y, z, f(x)) + + >>> def iseven(x): + ... return x % 2 == 0 + >>> list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) + [2, 4] + + See Also: + thread_first + """ + def evalform_back(val, form): + if callable(form): + return form(val) + if isinstance(form, tuple): + func, args = form[0], form[1:] + args = args + (val,) + return func(*args) + return reduce(evalform_back, forms, val) + + +def instanceproperty(fget=None, fset=None, fdel=None, doc=None, classval=None): + """ Like @property, but returns ``classval`` when used as a class attribute + + >>> class MyClass(object): + ... '''The class docstring''' + ... @instanceproperty(classval=__doc__) + ... def __doc__(self): + ... return 'An object docstring' + ... @instanceproperty + ... def val(self): + ... return 42 + ... + >>> MyClass.__doc__ + 'The class docstring' + >>> MyClass.val is None + True + >>> obj = MyClass() + >>> obj.__doc__ + 'An object docstring' + >>> obj.val + 42 + """ + if fget is None: + return partial(instanceproperty, fset=fset, fdel=fdel, doc=doc, + classval=classval) + return InstanceProperty(fget=fget, fset=fset, fdel=fdel, doc=doc, + classval=classval) + + +class InstanceProperty(property): + """ Like @property, but returns ``classval`` when used as a class attribute + + Should not be used directly. Use ``instanceproperty`` instead. + """ + def __init__(self, fget=None, fset=None, fdel=None, doc=None, + classval=None): + self.classval = classval + property.__init__(self, fget=fget, fset=fset, fdel=fdel, doc=doc) + + def __get__(self, obj, type=None): + if obj is None: + return self.classval + return property.__get__(self, obj, type) + + def __reduce__(self): + state = (self.fget, self.fset, self.fdel, self.__doc__, self.classval) + return InstanceProperty, state + + +class curry(object): + """ Curry a callable function + + Enables partial application of arguments through calling a function with an + incomplete set of arguments. + + >>> def mul(x, y): + ... return x * y + >>> mul = curry(mul) + + >>> double = mul(2) + >>> double(10) + 20 + + Also supports keyword arguments + + >>> @curry # Can use curry as a decorator + ... def f(x, y, a=10): + ... return a * (x + y) + + >>> add = f(a=1) + >>> add(2, 3) + 5 + + See Also: + toolz.curried - namespace of curried functions + https://toolz.readthedocs.io/en/latest/curry.html + """ + def __init__(self, *args, **kwargs): + if not args: + raise TypeError('__init__() takes at least 2 arguments (1 given)') + func, args = args[0], args[1:] + if not callable(func): + raise TypeError("Input must be callable") + + # curry- or functools.partial-like object? Unpack and merge arguments + if ( + hasattr(func, 'func') + and hasattr(func, 'args') + and hasattr(func, 'keywords') + and isinstance(func.args, tuple) + ): + _kwargs = {} + if func.keywords: + _kwargs.update(func.keywords) + _kwargs.update(kwargs) + kwargs = _kwargs + args = func.args + args + func = func.func + + if kwargs: + self._partial = partial(func, *args, **kwargs) + else: + self._partial = partial(func, *args) + + self.__doc__ = getattr(func, '__doc__', None) + self.__name__ = getattr(func, '__name__', '<curry>') + self.__module__ = getattr(func, '__module__', None) + self.__qualname__ = getattr(func, '__qualname__', None) + self._sigspec = None + self._has_unknown_args = None + + @instanceproperty + def func(self): + return self._partial.func + + if PY3: # pragma: py2 no cover + @instanceproperty + def __signature__(self): + sig = inspect.signature(self.func) + args = self.args or () + keywords = self.keywords or {} + if is_partial_args(self.func, args, keywords, sig) is False: + raise TypeError('curry object has incorrect arguments') + + params = list(sig.parameters.values()) + skip = 0 + for param in params[:len(args)]: + if param.kind == param.VAR_POSITIONAL: + break + skip += 1 + + kwonly = False + newparams = [] + for param in params[skip:]: + kind = param.kind + default = param.default + if kind == param.VAR_KEYWORD: + pass + elif kind == param.VAR_POSITIONAL: + if kwonly: + continue + elif param.name in keywords: + default = keywords[param.name] + kind = param.KEYWORD_ONLY + kwonly = True + else: + if kwonly: + kind = param.KEYWORD_ONLY + if default is param.empty: + default = no_default + newparams.append(param.replace(default=default, kind=kind)) + + return sig.replace(parameters=newparams) + + @instanceproperty + def args(self): + return self._partial.args + + @instanceproperty + def keywords(self): + return self._partial.keywords + + @instanceproperty + def func_name(self): + return self.__name__ + + def __str__(self): + return str(self.func) + + def __repr__(self): + return repr(self.func) + + def __hash__(self): + return hash((self.func, self.args, + frozenset(self.keywords.items()) if self.keywords + else None)) + + def __eq__(self, other): + return (isinstance(other, curry) and self.func == other.func and + self.args == other.args and self.keywords == other.keywords) + + def __ne__(self, other): + return not self.__eq__(other) + + def __call__(self, *args, **kwargs): + try: + return self._partial(*args, **kwargs) + except TypeError as exc: + if self._should_curry(args, kwargs, exc): + return self.bind(*args, **kwargs) + raise + + def _should_curry(self, args, kwargs, exc=None): + func = self.func + args = self.args + args + if self.keywords: + kwargs = dict(self.keywords, **kwargs) + if self._sigspec is None: + sigspec = self._sigspec = _sigs.signature_or_spec(func) + self._has_unknown_args = has_varargs(func, sigspec) is not False + else: + sigspec = self._sigspec + + if is_partial_args(func, args, kwargs, sigspec) is False: + # Nothing can make the call valid + return False + elif self._has_unknown_args: + # The call may be valid and raised a TypeError, but we curry + # anyway because the function may have `*args`. This is useful + # for decorators with signature `func(*args, **kwargs)`. + return True + elif not is_valid_args(func, args, kwargs, sigspec): + # Adding more arguments may make the call valid + return True + else: + # There was a genuine TypeError + return False + + def bind(self, *args, **kwargs): + return type(self)(self, *args, **kwargs) + + def call(self, *args, **kwargs): + return self._partial(*args, **kwargs) + + def __get__(self, instance, owner): + if instance is None: + return self + return curry(self, instance) + + def __reduce__(self): + func = self.func + modname = getattr(func, '__module__', None) + qualname = getattr(func, '__qualname__', None) + if qualname is None: # pragma: py3 no cover + qualname = getattr(func, '__name__', None) + is_decorated = None + if modname and qualname: + attrs = [] + obj = import_module(modname) + for attr in qualname.split('.'): + if isinstance(obj, curry): # pragma: py2 no cover + attrs.append('func') + obj = obj.func + obj = getattr(obj, attr, None) + if obj is None: + break + attrs.append(attr) + if isinstance(obj, curry) and obj.func is func: + is_decorated = obj is self + qualname = '.'.join(attrs) + func = '%s:%s' % (modname, qualname) + + # functools.partial objects can't be pickled + userdict = tuple((k, v) for k, v in self.__dict__.items() + if k not in ('_partial', '_sigspec')) + state = (type(self), func, self.args, self.keywords, userdict, + is_decorated) + return _restore_curry, state + + +def _restore_curry(cls, func, args, kwargs, userdict, is_decorated): + if isinstance(func, str): + modname, qualname = func.rsplit(':', 1) + obj = import_module(modname) + for attr in qualname.split('.'): + obj = getattr(obj, attr) + if is_decorated: + return obj + func = obj.func + obj = cls(func, *args, **(kwargs or {})) + obj.__dict__.update(userdict) + return obj + + +@curry +def memoize(func, cache=None, key=None): + """ Cache a function's result for speedy future evaluation + + Considerations: + Trades memory for speed. + Only use on pure functions. + + >>> def add(x, y): return x + y + >>> add = memoize(add) + + Or use as a decorator + + >>> @memoize + ... def add(x, y): + ... return x + y + + Use the ``cache`` keyword to provide a dict-like object as an initial cache + + >>> @memoize(cache={(1, 2): 3}) + ... def add(x, y): + ... return x + y + + Note that the above works as a decorator because ``memoize`` is curried. + + It is also possible to provide a ``key(args, kwargs)`` function that + calculates keys used for the cache, which receives an ``args`` tuple and + ``kwargs`` dict as input, and must return a hashable value. However, + the default key function should be sufficient most of the time. + + >>> # Use key function that ignores extraneous keyword arguments + >>> @memoize(key=lambda args, kwargs: args) + ... def add(x, y, verbose=False): + ... if verbose: + ... print('Calculating %s + %s' % (x, y)) + ... return x + y + """ + if cache is None: + cache = {} + + try: + may_have_kwargs = has_keywords(func) is not False + # Is unary function (single arg, no variadic argument or keywords)? + is_unary = is_arity(1, func) + except TypeError: # pragma: no cover + may_have_kwargs = True + is_unary = False + + if key is None: + if is_unary: + def key(args, kwargs): + return args[0] + elif may_have_kwargs: + def key(args, kwargs): + return ( + args or None, + frozenset(kwargs.items()) if kwargs else None, + ) + else: + def key(args, kwargs): + return args + + def memof(*args, **kwargs): + k = key(args, kwargs) + try: + return cache[k] + except TypeError: + raise TypeError("Arguments to memoized function must be hashable") + except KeyError: + cache[k] = result = func(*args, **kwargs) + return result + + try: + memof.__name__ = func.__name__ + except AttributeError: + pass + memof.__doc__ = func.__doc__ + memof.__wrapped__ = func + return memof + + +class Compose(object): + """ A composition of functions + + See Also: + compose + """ + __slots__ = 'first', 'funcs' + + def __init__(self, funcs): + funcs = tuple(reversed(funcs)) + self.first = funcs[0] + self.funcs = funcs[1:] + + def __call__(self, *args, **kwargs): + ret = self.first(*args, **kwargs) + for f in self.funcs: + ret = f(ret) + return ret + + def __getstate__(self): + return self.first, self.funcs + + def __setstate__(self, state): + self.first, self.funcs = state + + @instanceproperty(classval=__doc__) + def __doc__(self): + def composed_doc(*fs): + """Generate a docstring for the composition of fs. + """ + if not fs: + # Argument name for the docstring. + return '*args, **kwargs' + + return '{f}({g})'.format(f=fs[0].__name__, g=composed_doc(*fs[1:])) + + try: + return ( + 'lambda *args, **kwargs: ' + + composed_doc(*reversed((self.first,) + self.funcs)) + ) + except AttributeError: + # One of our callables does not have a `__name__`, whatever. + return 'A composition of functions' + + @property + def __name__(self): + try: + return '_of_'.join( + (f.__name__ for f in reversed((self.first,) + self.funcs)) + ) + except AttributeError: + return type(self).__name__ + + def __repr__(self): + return '{.__class__.__name__}{!r}'.format( + self, tuple(reversed((self.first, ) + self.funcs))) + + def __eq__(self, other): + if isinstance(other, Compose): + return other.first == self.first and other.funcs == self.funcs + return NotImplemented + + def __ne__(self, other): + equality = self.__eq__(other) + return NotImplemented if equality is NotImplemented else not equality + + def __hash__(self): + return hash(self.first) ^ hash(self.funcs) + + # Mimic the descriptor behavior of python functions. + # i.e. let Compose be called as a method when bound to a class. + if PY3: # pragma: py2 no cover + # adapted from + # docs.python.org/3/howto/descriptor.html#functions-and-methods + def __get__(self, obj, objtype=None): + return self if obj is None else MethodType(self, obj) + else: # pragma: py3 no cover + # adapted from + # docs.python.org/2/howto/descriptor.html#functions-and-methods + def __get__(self, obj, objtype=None): + return self if obj is None else MethodType(self, obj, objtype) + + # introspection with Signature is only possible from py3.3+ + if PY3: # pragma: py2 no cover + @instanceproperty + def __signature__(self): + base = inspect.signature(self.first) + last = inspect.signature(self.funcs[-1]) + return base.replace(return_annotation=last.return_annotation) + + __wrapped__ = instanceproperty(attrgetter('first')) + + +def compose(*funcs): + """ Compose functions to operate in series. + + Returns a function that applies other functions in sequence. + + Functions are applied from right to left so that + ``compose(f, g, h)(x, y)`` is the same as ``f(g(h(x, y)))``. + + If no arguments are provided, the identity function (f(x) = x) is returned. + + >>> inc = lambda i: i + 1 + >>> compose(str, inc)(3) + '4' + + See Also: + compose_left + pipe + """ + if not funcs: + return identity + if len(funcs) == 1: + return funcs[0] + else: + return Compose(funcs) + + +def compose_left(*funcs): + """ Compose functions to operate in series. + + Returns a function that applies other functions in sequence. + + Functions are applied from left to right so that + ``compose_left(f, g, h)(x, y)`` is the same as ``h(g(f(x, y)))``. + + If no arguments are provided, the identity function (f(x) = x) is returned. + + >>> inc = lambda i: i + 1 + >>> compose_left(inc, str)(3) + '4' + + See Also: + compose + pipe + """ + return compose(*reversed(funcs)) + + +def pipe(data, *funcs): + """ Pipe a value through a sequence of functions + + I.e. ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))`` + + We think of the value as progressing through a pipe of several + transformations, much like pipes in UNIX + + ``$ cat data | f | g | h`` + + >>> double = lambda i: 2 * i + >>> pipe(3, double, str) + '6' + + See Also: + compose + compose_left + thread_first + thread_last + """ + for func in funcs: + data = func(data) + return data + + +def complement(func): + """ Convert a predicate function to its logical complement. + + In other words, return a function that, for inputs that normally + yield True, yields False, and vice-versa. + + >>> def iseven(n): return n % 2 == 0 + >>> isodd = complement(iseven) + >>> iseven(2) + True + >>> isodd(2) + False + """ + return compose(operator.not_, func) + + +class juxt(object): + """ Creates a function that calls several functions with the same arguments + + Takes several functions and returns a function that applies its arguments + to each of those functions then returns a tuple of the results. + + Name comes from juxtaposition: the fact of two things being seen or placed + close together with contrasting effect. + + >>> inc = lambda x: x + 1 + >>> double = lambda x: x * 2 + >>> juxt(inc, double)(10) + (11, 20) + >>> juxt([inc, double])(10) + (11, 20) + """ + __slots__ = ['funcs'] + + def __init__(self, *funcs): + if len(funcs) == 1 and not callable(funcs[0]): + funcs = funcs[0] + self.funcs = tuple(funcs) + + def __call__(self, *args, **kwargs): + return tuple(func(*args, **kwargs) for func in self.funcs) + + def __getstate__(self): + return self.funcs + + def __setstate__(self, state): + self.funcs = state + + +def do(func, x): + """ Runs ``func`` on ``x``, returns ``x`` + + Because the results of ``func`` are not returned, only the side + effects of ``func`` are relevant. + + Logging functions can be made by composing ``do`` with a storage function + like ``list.append`` or ``file.write`` + + >>> from toolz import compose + >>> from toolz.curried import do + + >>> log = [] + >>> inc = lambda x: x + 1 + >>> inc = compose(inc, do(log.append)) + >>> inc(1) + 2 + >>> inc(11) + 12 + >>> log + [1, 11] + """ + func(x) + return x + + +@curry +def flip(func, a, b): + """ Call the function call with the arguments flipped + + This function is curried. + + >>> def div(a, b): + ... return a // b + ... + >>> flip(div, 2, 6) + 3 + >>> div_by_two = flip(div, 2) + >>> div_by_two(4) + 2 + + This is particularly useful for built in functions and functions defined + in C extensions that accept positional only arguments. For example: + isinstance, issubclass. + + >>> data = [1, 'a', 'b', 2, 1.5, object(), 3] + >>> only_ints = list(filter(flip(isinstance, int), data)) + >>> only_ints + [1, 2, 3] + """ + return func(b, a) + + +def return_none(exc): + """ Returns None. + """ + return None + + +class excepts(object): + """A wrapper around a function to catch exceptions and + dispatch to a handler. + + This is like a functional try/except block, in the same way that + ifexprs are functional if/else blocks. + + Examples + -------- + >>> excepting = excepts( + ... ValueError, + ... lambda a: [1, 2].index(a), + ... lambda _: -1, + ... ) + >>> excepting(1) + 0 + >>> excepting(3) + -1 + + Multiple exceptions and default except clause. + >>> excepting = excepts((IndexError, KeyError), lambda a: a[0]) + >>> excepting([]) + >>> excepting([1]) + 1 + >>> excepting({}) + >>> excepting({0: 1}) + 1 + """ + def __init__(self, exc, func, handler=return_none): + self.exc = exc + self.func = func + self.handler = handler + + def __call__(self, *args, **kwargs): + try: + return self.func(*args, **kwargs) + except self.exc as e: + return self.handler(e) + + @instanceproperty(classval=__doc__) + def __doc__(self): + exc = self.exc + try: + if isinstance(exc, tuple): + exc_name = '(%s)' % ', '.join( + map(attrgetter('__name__'), exc), + ) + else: + exc_name = exc.__name__ + + return dedent( + """\ + A wrapper around {inst.func.__name__!r} that will except: + {exc} + and handle any exceptions with {inst.handler.__name__!r}. + + Docs for {inst.func.__name__!r}: + {inst.func.__doc__} + + Docs for {inst.handler.__name__!r}: + {inst.handler.__doc__} + """ + ).format( + inst=self, + exc=exc_name, + ) + except AttributeError: + return type(self).__doc__ + + @property + def __name__(self): + exc = self.exc + try: + if isinstance(exc, tuple): + exc_name = '_or_'.join(map(attrgetter('__name__'), exc)) + else: + exc_name = exc.__name__ + return '%s_excepting_%s' % (self.func.__name__, exc_name) + except AttributeError: + return 'excepting' + + +if PY3: # pragma: py2 no cover + def _check_sigspec(sigspec, func, builtin_func, *builtin_args): + if sigspec is None: + try: + sigspec = inspect.signature(func) + except (ValueError, TypeError) as e: + sigspec = e + if isinstance(sigspec, ValueError): + return None, builtin_func(*builtin_args) + elif not isinstance(sigspec, inspect.Signature): + if ( + func in _sigs.signatures + and (( + hasattr(func, '__signature__') + and hasattr(func.__signature__, '__get__') + )) + ): # pragma: no cover (not covered in Python 3.4) + val = builtin_func(*builtin_args) + return None, val + return None, False + return sigspec, None + +else: # pragma: py3 no cover + def _check_sigspec(sigspec, func, builtin_func, *builtin_args): + if sigspec is None: + try: + sigspec = inspect.getargspec(func) + except TypeError as e: + sigspec = e + if isinstance(sigspec, TypeError): + if not callable(func): + return None, False + return None, builtin_func(*builtin_args) + return sigspec, None + + +if PY34 or PYPY: # pragma: no cover + _check_sigspec_orig = _check_sigspec + + def _check_sigspec(sigspec, func, builtin_func, *builtin_args): + # Python 3.4 and PyPy may lie, so use our registry for builtins instead + if func in _sigs.signatures: + val = builtin_func(*builtin_args) + return None, val + return _check_sigspec_orig(sigspec, func, builtin_func, *builtin_args) + +_check_sigspec.__doc__ = """ \ +Private function to aid in introspection compatibly across Python versions. + +If a callable doesn't have a signature (Python 3) or an argspec (Python 2), +the signature registry in toolz._signatures is used. +""" + +if PY3: # pragma: py2 no cover + def num_required_args(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._num_required_args, + func) + if sigspec is None: + return rv + return sum(1 for p in sigspec.parameters.values() + if p.default is p.empty + and p.kind in (p.POSITIONAL_OR_KEYWORD, p.POSITIONAL_ONLY)) + + def has_varargs(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_varargs, func) + if sigspec is None: + return rv + return any(p.kind == p.VAR_POSITIONAL + for p in sigspec.parameters.values()) + + def has_keywords(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_keywords, func) + if sigspec is None: + return rv + return any(p.default is not p.empty + or p.kind in (p.KEYWORD_ONLY, p.VAR_KEYWORD) + for p in sigspec.parameters.values()) + + def is_valid_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_valid_args, + func, args, kwargs) + if sigspec is None: + return rv + try: + sigspec.bind(*args, **kwargs) + except TypeError: + return False + return True + + def is_partial_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_partial_args, + func, args, kwargs) + if sigspec is None: + return rv + try: + sigspec.bind_partial(*args, **kwargs) + except TypeError: + return False + return True + +else: # pragma: py3 no cover + def num_required_args(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._num_required_args, + func) + if sigspec is None: + return rv + num_defaults = len(sigspec.defaults) if sigspec.defaults else 0 + return len(sigspec.args) - num_defaults + + def has_varargs(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_varargs, func) + if sigspec is None: + return rv + return sigspec.varargs is not None + + def has_keywords(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_keywords, func) + if sigspec is None: + return rv + return sigspec.defaults is not None or sigspec.keywords is not None + + def is_valid_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_valid_args, + func, args, kwargs) + if sigspec is None: + return rv + spec = sigspec + defaults = spec.defaults or () + num_pos = len(spec.args) - len(defaults) + missing_pos = spec.args[len(args):num_pos] + if any(arg not in kwargs for arg in missing_pos): + return False + + if spec.varargs is None: + num_extra_pos = max(0, len(args) - num_pos) + else: + num_extra_pos = 0 + + kwargs = dict(kwargs) + + # Add missing keyword arguments (unless already included in `args`) + missing_kwargs = spec.args[num_pos + num_extra_pos:] + kwargs.update(zip(missing_kwargs, defaults[num_extra_pos:])) + + # Convert call to use positional arguments + args = args + tuple(kwargs.pop(key) for key in spec.args[len(args):]) + + if ( + not spec.keywords and kwargs + or not spec.varargs and len(args) > len(spec.args) + or set(spec.args[:len(args)]) & set(kwargs) + ): + return False + else: + return True + + def is_partial_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_partial_args, + func, args, kwargs) + if sigspec is None: + return rv + spec = sigspec + defaults = spec.defaults or () + num_pos = len(spec.args) - len(defaults) + if spec.varargs is None: + num_extra_pos = max(0, len(args) - num_pos) + else: + num_extra_pos = 0 + + kwargs = dict(kwargs) + + # Add missing keyword arguments (unless already included in `args`) + missing_kwargs = spec.args[num_pos + num_extra_pos:] + kwargs.update(zip(missing_kwargs, defaults[num_extra_pos:])) + + # Add missing position arguments as keywords (may already be in kwargs) + missing_args = spec.args[len(args):num_pos + num_extra_pos] + kwargs.update((x, None) for x in missing_args) + + # Convert call to use positional arguments + args = args + tuple(kwargs.pop(key) for key in spec.args[len(args):]) + + if ( + not spec.keywords and kwargs + or not spec.varargs and len(args) > len(spec.args) + or set(spec.args[:len(args)]) & set(kwargs) + ): + return False + else: + return True + + +def is_arity(n, func, sigspec=None): + """ Does a function have only n positional arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x): + ... return x + >>> is_arity(1, f) + True + >>> def g(x, y=1): + ... return x + y + >>> is_arity(1, g) + False + """ + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_arity, n, func) + if sigspec is None: + return rv + num = num_required_args(func, sigspec) + if num is not None: + num = num == n + if not num: + return False + varargs = has_varargs(func, sigspec) + if varargs: + return False + keywords = has_keywords(func, sigspec) + if keywords: + return False + if num is None or varargs is None or keywords is None: # pragma: no cover + return None + return True + + +num_required_args.__doc__ = """ \ +Number of required positional arguments + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x, y, z=3): + ... return x + y + z + >>> num_required_args(f) + 2 + >>> def g(*args, **kwargs): + ... pass + >>> num_required_args(g) + 0 + """ + +has_varargs.__doc__ = """ \ +Does a function have variadic positional arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(*args): + ... return args + >>> has_varargs(f) + True + >>> def g(**kwargs): + ... return kwargs + >>> has_varargs(g) + False + """ + +has_keywords.__doc__ = """ \ +Does a function have keyword arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x, y=0): + ... return x + y + + >>> has_keywords(f) + True + """ + +is_valid_args.__doc__ = """ \ +Is ``func(*args, **kwargs)`` a valid function call? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def add(x, y): + ... return x + y + + >>> is_valid_args(add, (1,), {}) + False + >>> is_valid_args(add, (1, 2), {}) + True + >>> is_valid_args(map, (), {}) + False + + **Implementation notes** + Python 2 relies on ``inspect.getargspec``, which only works for + user-defined functions. Python 3 uses ``inspect.signature``, which + works for many more types of callables. + + Many builtins in the standard library are also supported. + """ + +is_partial_args.__doc__ = """ \ +Can partial(func, *args, **kwargs)(*args2, **kwargs2) be a valid call? + + Returns True *only* if the call is valid or if it is possible for the + call to become valid by adding more positional or keyword arguments. + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def add(x, y): + ... return x + y + + >>> is_partial_args(add, (1,), {}) + True + >>> is_partial_args(add, (1, 2), {}) + True + >>> is_partial_args(add, (1, 2, 3), {}) + False + >>> is_partial_args(map, (), {}) + True + + **Implementation notes** + Python 2 relies on ``inspect.getargspec``, which only works for + user-defined functions. Python 3 uses ``inspect.signature``, which + works for many more types of callables. + + Many builtins in the standard library are also supported. + """ + +from . import _signatures as _sigs diff --git a/contrib/python/toolz/py2/toolz/itertoolz.py b/contrib/python/toolz/py2/toolz/itertoolz.py new file mode 100644 index 0000000000..e71f1eeef0 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/itertoolz.py @@ -0,0 +1,1056 @@ +import itertools +import heapq +import collections +import operator +from functools import partial +from random import Random +from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems, + filter, Sequence) +from toolz.utils import no_default + + +__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', + 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', + 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', + 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', + 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') + + +def remove(predicate, seq): + """ Return those items of sequence for which predicate(item) is False + + >>> def iseven(x): + ... return x % 2 == 0 + >>> list(remove(iseven, [1, 2, 3, 4])) + [1, 3] + """ + return filterfalse(predicate, seq) + + +def accumulate(binop, seq, initial=no_default): + """ Repeatedly apply binary function to a sequence, accumulating results + + >>> from operator import add, mul + >>> list(accumulate(add, [1, 2, 3, 4, 5])) + [1, 3, 6, 10, 15] + >>> list(accumulate(mul, [1, 2, 3, 4, 5])) + [1, 2, 6, 24, 120] + + Accumulate is similar to ``reduce`` and is good for making functions like + cumulative sum: + + >>> from functools import partial, reduce + >>> sum = partial(reduce, add) + >>> cumsum = partial(accumulate, add) + + Accumulate also takes an optional argument that will be used as the first + value. This is similar to reduce. + + >>> list(accumulate(add, [1, 2, 3], -1)) + [-1, 0, 2, 5] + >>> list(accumulate(add, [], 1)) + [1] + + See Also: + itertools.accumulate : In standard itertools for Python 3.2+ + """ + seq = iter(seq) + if initial == no_default: + try: + result = next(seq) + except StopIteration: + return + else: + result = initial + yield result + for elem in seq: + result = binop(result, elem) + yield result + + +def groupby(key, seq): + """ Group a collection by a key function + + >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] + >>> groupby(len, names) # doctest: +SKIP + {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} + + >>> iseven = lambda x: x % 2 == 0 + >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP + {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} + + Non-callable keys imply grouping on a member. + + >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, + ... {'name': 'Bob', 'gender': 'M'}, + ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP + {'F': [{'gender': 'F', 'name': 'Alice'}], + 'M': [{'gender': 'M', 'name': 'Bob'}, + {'gender': 'M', 'name': 'Charlie'}]} + + Not to be confused with ``itertools.groupby`` + + See Also: + countby + """ + if not callable(key): + key = getter(key) + d = collections.defaultdict(lambda: [].append) + for item in seq: + d[key(item)](item) + rv = {} + for k, v in iteritems(d): + rv[k] = v.__self__ + return rv + + +def merge_sorted(*seqs, **kwargs): + """ Merge and sort a collection of sorted collections + + This works lazily and only keeps one value from each iterable in memory. + + >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) + [1, 2, 3, 4, 5, 6] + + >>> ''.join(merge_sorted('abc', 'abc', 'abc')) + 'aaabbbccc' + + The "key" function used to sort the input may be passed as a keyword. + + >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) + [2, 1, 3, 3] + """ + if len(seqs) == 0: + return iter([]) + elif len(seqs) == 1: + return iter(seqs[0]) + + key = kwargs.get('key', None) + if key is None: + return _merge_sorted_binary(seqs) + else: + return _merge_sorted_binary_key(seqs, key) + + +def _merge_sorted_binary(seqs): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary(L1) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary(L2) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + + for val1 in seq1: + if val2 < val1: + yield val2 + for val2 in seq2: + if val2 < val1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def _merge_sorted_binary_key(seqs, key): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary_key(L1, key) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary_key(L2, key) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + key2 = key(val2) + + for val1 in seq1: + key1 = key(val1) + if key2 < key1: + yield val2 + for val2 in seq2: + key2 = key(val2) + if key2 < key1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def interleave(seqs): + """ Interleave a sequence of sequences + + >>> list(interleave([[1, 2], [3, 4]])) + [1, 3, 2, 4] + + >>> ''.join(interleave(('ABC', 'XY'))) + 'AXBYC' + + Both the individual sequences and the sequence of sequences may be infinite + + Returns a lazy iterator + """ + iters = itertools.cycle(map(iter, seqs)) + while True: + try: + for itr in iters: + yield next(itr) + return + except StopIteration: + predicate = partial(operator.is_not, itr) + iters = itertools.cycle(itertools.takewhile(predicate, iters)) + + +def unique(seq, key=None): + """ Return only unique elements of a sequence + + >>> tuple(unique((1, 2, 3))) + (1, 2, 3) + >>> tuple(unique((1, 2, 1, 3))) + (1, 2, 3) + + Uniqueness can be defined by key keyword + + >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) + ('cat', 'mouse') + """ + seen = set() + seen_add = seen.add + if key is None: + for item in seq: + if item not in seen: + seen_add(item) + yield item + else: # calculate key + for item in seq: + val = key(item) + if val not in seen: + seen_add(val) + yield item + + +def isiterable(x): + """ Is x iterable? + + >>> isiterable([1, 2, 3]) + True + >>> isiterable('abc') + True + >>> isiterable(5) + False + """ + try: + iter(x) + return True + except TypeError: + return False + + +def isdistinct(seq): + """ All values in sequence are distinct + + >>> isdistinct([1, 2, 3]) + True + >>> isdistinct([1, 2, 1]) + False + + >>> isdistinct("Hello") + False + >>> isdistinct("World") + True + """ + if iter(seq) is seq: + seen = set() + seen_add = seen.add + for item in seq: + if item in seen: + return False + seen_add(item) + return True + else: + return len(seq) == len(set(seq)) + + +def take(n, seq): + """ The first n elements of a sequence + + >>> list(take(2, [10, 20, 30, 40, 50])) + [10, 20] + + See Also: + drop + tail + """ + return itertools.islice(seq, n) + + +def tail(n, seq): + """ The last n elements of a sequence + + >>> tail(2, [10, 20, 30, 40, 50]) + [40, 50] + + See Also: + drop + take + """ + try: + return seq[-n:] + except (TypeError, KeyError): + return tuple(collections.deque(seq, n)) + + +def drop(n, seq): + """ The sequence following the first n elements + + >>> list(drop(2, [10, 20, 30, 40, 50])) + [30, 40, 50] + + See Also: + take + tail + """ + return itertools.islice(seq, n, None) + + +def take_nth(n, seq): + """ Every nth item in seq + + >>> list(take_nth(2, [10, 20, 30, 40, 50])) + [10, 30, 50] + """ + return itertools.islice(seq, 0, None, n) + + +def first(seq): + """ The first element in a sequence + + >>> first('ABC') + 'A' + """ + return next(iter(seq)) + + +def second(seq): + """ The second element in a sequence + + >>> second('ABC') + 'B' + """ + seq = iter(seq) + next(seq) + return next(seq) + + +def nth(n, seq): + """ The nth element in a sequence + + >>> nth(1, 'ABC') + 'B' + """ + if isinstance(seq, (tuple, list, Sequence)): + return seq[n] + else: + return next(itertools.islice(seq, n, None)) + + +def last(seq): + """ The last element in a sequence + + >>> last('ABC') + 'C' + """ + return tail(1, seq)[0] + + +rest = partial(drop, 1) + + +def _get(ind, seq, default): + try: + return seq[ind] + except (KeyError, IndexError): + return default + + +def get(ind, seq, default=no_default): + """ Get element in a sequence or dict + + Provides standard indexing + + >>> get(1, 'ABC') # Same as 'ABC'[1] + 'B' + + Pass a list to get multiple values + + >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) + ('B', 'C') + + Works on any value that supports indexing/getitem + For example here we see that it works with dictionaries + + >>> phonebook = {'Alice': '555-1234', + ... 'Bob': '555-5678', + ... 'Charlie':'555-9999'} + >>> get('Alice', phonebook) + '555-1234' + + >>> get(['Alice', 'Bob'], phonebook) + ('555-1234', '555-5678') + + Provide a default for missing values + + >>> get(['Alice', 'Dennis'], phonebook, None) + ('555-1234', None) + + See Also: + pluck + """ + try: + return seq[ind] + except TypeError: # `ind` may be a list + if isinstance(ind, list): + if default == no_default: + if len(ind) > 1: + return operator.itemgetter(*ind)(seq) + elif ind: + return seq[ind[0]], + else: + return () + else: + return tuple(_get(i, seq, default) for i in ind) + elif default != no_default: + return default + else: + raise + except (KeyError, IndexError): # we know `ind` is not a list + if default == no_default: + raise + else: + return default + + +def concat(seqs): + """ Concatenate zero or more iterables, any of which may be infinite. + + An infinite sequence will prevent the rest of the arguments from + being included. + + We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs + can be a generator. + + >>> list(concat([[], [1], [2, 3]])) + [1, 2, 3] + + See also: + itertools.chain.from_iterable equivalent + """ + return itertools.chain.from_iterable(seqs) + + +def concatv(*seqs): + """ Variadic version of concat + + >>> list(concatv([], ["a"], ["b", "c"])) + ['a', 'b', 'c'] + + See also: + itertools.chain + """ + return concat(seqs) + + +def mapcat(func, seqs): + """ Apply func to each sequence in seqs, concatenating results. + + >>> list(mapcat(lambda s: [c.upper() for c in s], + ... [["a", "b"], ["c", "d", "e"]])) + ['A', 'B', 'C', 'D', 'E'] + """ + return concat(map(func, seqs)) + + +def cons(el, seq): + """ Add el to beginning of (possibly infinite) sequence seq. + + >>> list(cons(1, [2, 3])) + [1, 2, 3] + """ + return itertools.chain([el], seq) + + +def interpose(el, seq): + """ Introduce element between each pair of elements in seq + + >>> list(interpose("a", [1, 2, 3])) + [1, 'a', 2, 'a', 3] + """ + inposed = concat(zip(itertools.repeat(el), seq)) + next(inposed) + return inposed + + +def frequencies(seq): + """ Find number of occurrences of each value in seq + + >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP + {'cat': 3, 'ox': 1, 'pig': 2} + + See Also: + countby + groupby + """ + d = collections.defaultdict(int) + for item in seq: + d[item] += 1 + return dict(d) + + +def reduceby(key, binop, seq, init=no_default): + """ Perform a simultaneous groupby and reduction + + The computation: + + >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP + + is equivalent to the following: + + >>> def reduction(group): # doctest: +SKIP + ... return reduce(binop, group, init) # doctest: +SKIP + + >>> groups = groupby(key, seq) # doctest: +SKIP + >>> result = valmap(reduction, groups) # doctest: +SKIP + + But the former does not build the intermediate groups, allowing it to + operate in much less space. This makes it suitable for larger datasets + that do not fit comfortably in memory + + The ``init`` keyword argument is the default initialization of the + reduction. This can be either a constant value like ``0`` or a callable + like ``lambda : 0`` as might be used in ``defaultdict``. + + Simple Examples + --------------- + + >>> from operator import add, mul + >>> iseven = lambda x: x % 2 == 0 + + >>> data = [1, 2, 3, 4, 5] + + >>> reduceby(iseven, add, data) # doctest: +SKIP + {False: 9, True: 6} + + >>> reduceby(iseven, mul, data) # doctest: +SKIP + {False: 15, True: 8} + + Complex Example + --------------- + + >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, + ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, + ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, + ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] + + >>> reduceby('state', # doctest: +SKIP + ... lambda acc, x: acc + x['cost'], + ... projects, 0) + {'CA': 1200000, 'IL': 2100000} + + Example Using ``init`` + ---------------------- + + >>> def set_add(s, i): + ... s.add(i) + ... return s + + >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP + {True: set([2, 4]), + False: set([1, 3])} + """ + is_no_default = init == no_default + if not is_no_default and not callable(init): + _init = init + init = lambda: _init + if not callable(key): + key = getter(key) + d = {} + for item in seq: + k = key(item) + if k not in d: + if is_no_default: + d[k] = item + continue + else: + d[k] = init() + d[k] = binop(d[k], item) + return d + + +def iterate(func, x): + """ Repeatedly apply a function func onto an original input + + Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. + + >>> def inc(x): return x + 1 + >>> counter = iterate(inc, 0) + >>> next(counter) + 0 + >>> next(counter) + 1 + >>> next(counter) + 2 + + >>> double = lambda x: x * 2 + >>> powers_of_two = iterate(double, 1) + >>> next(powers_of_two) + 1 + >>> next(powers_of_two) + 2 + >>> next(powers_of_two) + 4 + >>> next(powers_of_two) + 8 + """ + while True: + yield x + x = func(x) + + +def sliding_window(n, seq): + """ A sequence of overlapping subsequences + + >>> list(sliding_window(2, [1, 2, 3, 4])) + [(1, 2), (2, 3), (3, 4)] + + This function creates a sliding window suitable for transformations like + sliding means / smoothing + + >>> mean = lambda seq: float(sum(seq)) / len(seq) + >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) + [1.5, 2.5, 3.5] + """ + return zip(*(collections.deque(itertools.islice(it, i), 0) or it + for i, it in enumerate(itertools.tee(seq, n)))) + + +no_pad = '__no__pad__' + + +def partition(n, seq, pad=no_pad): + """ Partition sequence into tuples of length n + + >>> list(partition(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + If the length of ``seq`` is not evenly divisible by ``n``, the final tuple + is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: + + >>> list(partition(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4)] + + >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) + [(1, 2), (3, 4), (5, None)] + + See Also: + partition_all + """ + args = [iter(seq)] * n + if pad is no_pad: + return zip(*args) + else: + return zip_longest(*args, fillvalue=pad) + + +def partition_all(n, seq): + """ Partition all elements of sequence into tuples of length at most n + + The final tuple may be shorter to accommodate extra elements. + + >>> list(partition_all(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + >>> list(partition_all(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4), (5,)] + + See Also: + partition + """ + args = [iter(seq)] * n + it = zip_longest(*args, fillvalue=no_pad) + try: + prev = next(it) + except StopIteration: + return + for item in it: + yield prev + prev = item + if prev[-1] is no_pad: + try: + # If seq defines __len__, then + # we can quickly calculate where no_pad starts + yield prev[:len(seq) % n] + except TypeError: + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + # Binary search from CPython's bisect module, + # modified for identity testing. + lo, hi = 0, n + while lo < hi: + mid = (lo + hi) // 2 + if prev[mid] is no_pad: + hi = mid + else: + lo = mid + 1 + yield prev[:lo] + else: + yield prev + + +def count(seq): + """ Count the number of items in seq + + Like the builtin ``len`` but works on lazy sequencies. + + Not to be confused with ``itertools.count`` + + See also: + len + """ + if hasattr(seq, '__len__'): + return len(seq) + return sum(1 for i in seq) + + +def pluck(ind, seqs, default=no_default): + """ plucks an element or several elements from each item in a sequence. + + ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more + elements of each item in the sequence. + + This is equivalent to running `map(curried.get(ind), seqs)` + + ``ind`` can be either a single string/index or a list of strings/indices. + ``seqs`` should be sequence containing sequences or dicts. + + e.g. + + >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] + >>> list(pluck('name', data)) + ['Cheese', 'Pies'] + >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) + [(1, 2), (4, 5)] + + See Also: + get + map + """ + if default == no_default: + get = getter(ind) + return map(get, seqs) + elif isinstance(ind, list): + return (tuple(_get(item, seq, default) for item in ind) + for seq in seqs) + return (_get(ind, seq, default) for seq in seqs) + + +def getter(index): + if isinstance(index, list): + if len(index) == 1: + index = index[0] + return lambda x: (x[index],) + elif index: + return operator.itemgetter(*index) + else: + return lambda x: () + else: + return operator.itemgetter(index) + + +def join(leftkey, leftseq, rightkey, rightseq, + left_default=no_default, right_default=no_default): + """ Join two sequences on common attributes + + This is a semi-streaming operation. The LEFT sequence is fully evaluated + and placed into memory. The RIGHT sequence is evaluated lazily and so can + be arbitrarily large. + (Note: If right_default is defined, then unique keys of rightseq + will also be stored in memory.) + + >>> friends = [('Alice', 'Edith'), + ... ('Alice', 'Zhao'), + ... ('Edith', 'Alice'), + ... ('Zhao', 'Alice'), + ... ('Zhao', 'Edith')] + + >>> cities = [('Alice', 'NYC'), + ... ('Alice', 'Chicago'), + ... ('Dan', 'Syndey'), + ... ('Edith', 'Paris'), + ... ('Edith', 'Berlin'), + ... ('Zhao', 'Shanghai')] + + >>> # Vacation opportunities + >>> # In what cities do people have friends? + >>> result = join(second, friends, + ... first, cities) + >>> for ((a, b), (c, d)) in sorted(unique(result)): + ... print((a, d)) + ('Alice', 'Berlin') + ('Alice', 'Paris') + ('Alice', 'Shanghai') + ('Edith', 'Chicago') + ('Edith', 'NYC') + ('Zhao', 'Chicago') + ('Zhao', 'NYC') + ('Zhao', 'Berlin') + ('Zhao', 'Paris') + + Specify outer joins with keyword arguments ``left_default`` and/or + ``right_default``. Here is a full outer join in which unmatched elements + are paired with None. + + >>> identity = lambda x: x + >>> list(join(identity, [1, 2, 3], + ... identity, [2, 3, 4], + ... left_default=None, right_default=None)) + [(2, 2), (3, 3), (None, 4), (1, None)] + + Usually the key arguments are callables to be applied to the sequences. If + the keys are not obviously callable then it is assumed that indexing was + intended, e.g. the following is a legal change. + The join is implemented as a hash join and the keys of leftseq must be + hashable. Additionally, if right_default is defined, then keys of rightseq + must also be hashable. + + >>> # result = join(second, friends, first, cities) + >>> result = join(1, friends, 0, cities) # doctest: +SKIP + """ + if not callable(leftkey): + leftkey = getter(leftkey) + if not callable(rightkey): + rightkey = getter(rightkey) + + d = groupby(leftkey, leftseq) + + if left_default == no_default and right_default == no_default: + # Inner Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + elif left_default != no_default and right_default == no_default: + # Right Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + elif right_default != no_default: + seen_keys = set() + seen = seen_keys.add + + if left_default == no_default: + # Left Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + # Full Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + + for key, matches in iteritems(d): + if key not in seen_keys: + for match in matches: + yield (match, right_default) + + +def diff(*seqs, **kwargs): + """ Return those items that differ between sequences + + >>> list(diff([1, 2, 3], [1, 2, 10, 100])) + [(3, 10)] + + Shorter sequences may be padded with a ``default`` value: + + >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) + [(3, 10), (None, 100)] + + A ``key`` function may also be applied to each item to use during + comparisons: + + >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) + [('bananas', 'Oranges')] + """ + N = len(seqs) + if N == 1 and isinstance(seqs[0], list): + seqs = seqs[0] + N = len(seqs) + if N < 2: + raise TypeError('Too few sequences given (min 2 required)') + default = kwargs.get('default', no_default) + if default == no_default: + iters = zip(*seqs) + else: + iters = zip_longest(*seqs, fillvalue=default) + key = kwargs.get('key', None) + if key is None: + for items in iters: + if items.count(items[0]) != N: + yield items + else: + for items in iters: + vals = tuple(map(key, items)) + if vals.count(vals[0]) != N: + yield items + + +def topk(k, seq, key=None): + """ Find the k largest elements of a sequence + + Operates lazily in ``n*log(k)`` time + + >>> topk(2, [1, 100, 10, 1000]) + (1000, 100) + + Use a key function to change sorted order + + >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) + ('Charlie', 'Alice') + + See also: + heapq.nlargest + """ + if key is not None and not callable(key): + key = getter(key) + return tuple(heapq.nlargest(k, seq, key=key)) + + +def peek(seq): + """ Retrieve the next element of a sequence + + Returns the first element and an iterable equivalent to the original + sequence, still having the element retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first, seq = peek(seq) + >>> first + 0 + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + item = next(iterator) + return item, itertools.chain((item,), iterator) + + +def peekn(n, seq): + """ Retrieve the next n elements of a sequence + + Returns a tuple of the first n elements and an iterable equivalent + to the original, still having the elements retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first_two, seq = peekn(2, seq) + >>> first_two + (0, 1) + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + peeked = tuple(take(n, iterator)) + return peeked, itertools.chain(iter(peeked), iterator) + + +def random_sample(prob, seq, random_state=None): + """ Return elements from a sequence with probability of prob + + Returns a lazy iterator of random items from seq. + + ``random_sample`` considers each item independently and without + replacement. See below how the first time it returned 13 items and the + next time it returned 6 items. + + >>> seq = list(range(100)) + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 44, 54, 61, 69, 94] + + Providing an integer seed for ``random_state`` will result in + deterministic sampling. Given the same seed it will return the same sample + every time. + + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + + ``random_state`` can also be any object with a method ``random`` that + returns floats between 0.0 and 1.0 (exclusive). + + >>> from random import Random + >>> randobj = Random(2016) + >>> list(random_sample(0.1, seq, random_state=randobj)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + """ + if not hasattr(random_state, 'random'): + random_state = Random(random_state) + return filter(lambda _: random_state.random() < prob, seq) diff --git a/contrib/python/toolz/py2/toolz/recipes.py b/contrib/python/toolz/py2/toolz/recipes.py new file mode 100644 index 0000000000..08c6c8c1e2 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/recipes.py @@ -0,0 +1,47 @@ +import itertools +from .itertoolz import frequencies, pluck, getter +from .compatibility import map + + +__all__ = ('countby', 'partitionby') + + +def countby(key, seq): + """ Count elements of a collection by a key function + + >>> countby(len, ['cat', 'mouse', 'dog']) + {3: 2, 5: 1} + + >>> def iseven(x): return x % 2 == 0 + >>> countby(iseven, [1, 2, 3]) # doctest:+SKIP + {True: 1, False: 2} + + See Also: + groupby + """ + if not callable(key): + key = getter(key) + return frequencies(map(key, seq)) + + +def partitionby(func, seq): + """ Partition a sequence according to a function + + Partition `s` into a sequence of lists such that, when traversing + `s`, every time the output of `func` changes a new list is started + and that and subsequent items are collected into that list. + + >>> is_space = lambda c: c == " " + >>> list(partitionby(is_space, "I have space")) + [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')] + + >>> is_large = lambda x: x > 10 + >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5])) + [(1, 2, 1), (99, 88, 33, 99), (-1, 5)] + + See also: + partition + groupby + itertools.groupby + """ + return map(tuple, pluck(1, itertools.groupby(seq, key=func))) diff --git a/contrib/python/toolz/py2/toolz/sandbox/__init__.py b/contrib/python/toolz/py2/toolz/sandbox/__init__.py new file mode 100644 index 0000000000..0abda1cb42 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/sandbox/__init__.py @@ -0,0 +1,2 @@ +from .core import EqualityHashKey, unzip +from .parallel import fold diff --git a/contrib/python/toolz/py2/toolz/sandbox/core.py b/contrib/python/toolz/py2/toolz/sandbox/core.py new file mode 100644 index 0000000000..915f06c213 --- /dev/null +++ b/contrib/python/toolz/py2/toolz/sandbox/core.py @@ -0,0 +1,133 @@ +from toolz.itertoolz import getter, cons, pluck +from itertools import tee, starmap + + +# See #166: https://github.com/pytoolz/toolz/issues/166 +# See #173: https://github.com/pytoolz/toolz/pull/173 +class EqualityHashKey(object): + """ Create a hash key that uses equality comparisons between items. + + This may be used to create hash keys for otherwise unhashable types: + + >>> from toolz import curry + >>> EqualityHashDefault = curry(EqualityHashKey, None) + >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP + {=[]=, =()=, =[1]=} + + **Caution:** adding N ``EqualityHashKey`` items to a hash container + may require O(N**2) operations, not O(N) as for typical hashable types. + Therefore, a suitable key function such as ``tuple`` or ``frozenset`` + is usually preferred over using ``EqualityHashKey`` if possible. + + The ``key`` argument to ``EqualityHashKey`` should be a function or + index that returns a hashable object that effectively distinguishes + unequal items. This helps avoid the poor scaling that occurs when + using the default key. For example, the above example can be improved + by using a key function that distinguishes items by length or type: + + >>> EqualityHashLen = curry(EqualityHashKey, len) + >>> EqualityHashType = curry(EqualityHashKey, type) # this works too + >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP + {=[]=, =()=, =[1]=} + + ``EqualityHashKey`` is convenient to use when a suitable key function + is complicated or unavailable. For example, the following returns all + unique values based on equality: + + >>> from toolz import unique + >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}] + >>> list(unique(vals, key=EqualityHashDefault)) + [[], (), [1], [2], {}] + + **Warning:** don't change the equality value of an item already in a hash + containter. Unhashable types are unhashable for a reason. For example: + + >>> L1 = [1] ; L2 = [2] + >>> s = set(map(EqualityHashDefault, [L1, L2])) + >>> s # doctest: +SKIP + {=[1]=, =[2]=} + + >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items! + >>> s # doctest: +SKIP + {=[2]=, =[2]=} + + Although this may appear problematic, immutable data types is a common + idiom in functional programming, and``EqualityHashKey`` easily allows + the same idiom to be used by convention rather than strict requirement. + + See Also: + identity + """ + __slots__ = ['item', 'key'] + _default_hashkey = '__default__hashkey__' + + def __init__(self, key, item): + if key is None: + self.key = self._default_hashkey + elif not callable(key): + self.key = getter(key) + else: + self.key = key + self.item = item + + def __hash__(self): + if self.key == self._default_hashkey: + val = self.key + else: + val = self.key(self.item) + return hash(val) + + def __eq__(self, other): + try: + return (self._default_hashkey == other._default_hashkey and + self.item == other.item) + except AttributeError: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '=%s=' % str(self.item) + + def __repr__(self): + return '=%s=' % repr(self.item) + + +# See issue #293: https://github.com/pytoolz/toolz/issues/239 +def unzip(seq): + """Inverse of ``zip`` + + >>> a, b = unzip([('a', 1), ('b', 2)]) + >>> list(a) + ['a', 'b'] + >>> list(b) + [1, 2] + + Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this + implementation can handle an infinite sequence ``seq``. + + Caveats: + + * The implementation uses ``tee``, and so can use a significant amount + of auxiliary storage if the resulting iterators are consumed at + different times. + + * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be + used if ``seq`` is a finite sequence of infinite sequences. + + """ + + seq = iter(seq) + + # Check how many iterators we need + try: + first = tuple(next(seq)) + except StopIteration: + return tuple() + + # and create them + niters = len(first) + seqs = tee(cons(first, seq), niters) + + return tuple(starmap(pluck, enumerate(seqs))) diff --git a/contrib/python/toolz/py2/toolz/sandbox/parallel.py b/contrib/python/toolz/py2/toolz/sandbox/parallel.py new file mode 100644 index 0000000000..ef8ed39dbd --- /dev/null +++ b/contrib/python/toolz/py2/toolz/sandbox/parallel.py @@ -0,0 +1,76 @@ +import functools +from toolz.itertoolz import partition_all +from toolz.compatibility import reduce, map +from toolz.utils import no_default + + +def _reduce(func, seq, initial=None): + if initial is None: + return functools.reduce(func, seq) + else: + return functools.reduce(func, seq, initial) + + +def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): + """ + Reduce without guarantee of ordered reduction. + + inputs: + + ``binop`` - associative operator. The associative property allows us to + leverage a parallel map to perform reductions in parallel. + ``seq`` - a sequence to be aggregated + ``default`` - an identity element like 0 for ``add`` or 1 for mul + + ``map`` - an implementation of ``map``. This may be parallel and + determines how work is distributed. + ``chunksize`` - Number of elements of ``seq`` that should be handled + within a single function call + ``combine`` - Binary operator to combine two intermediate results. + If ``binop`` is of type (total, item) -> total + then ``combine`` is of type (total, total) -> total + Defaults to ``binop`` for common case of operators like add + + Fold chunks up the collection into blocks of size ``chunksize`` and then + feeds each of these to calls to ``reduce``. This work is distributed + with a call to ``map``, gathered back and then refolded to finish the + computation. In this way ``fold`` specifies only how to chunk up data but + leaves the distribution of this work to an externally provided ``map`` + function. This function can be sequential or rely on multithreading, + multiprocessing, or even distributed solutions. + + If ``map`` intends to serialize functions it should be prepared to accept + and serialize lambdas. Note that the standard ``pickle`` module fails + here. + + Example + ------- + + >>> # Provide a parallel map to accomplish a parallel sum + >>> from operator import add + >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) + 10 + """ + assert chunksize > 1 + + if combine is None: + combine = binop + + chunks = partition_all(chunksize, seq) + + # Evaluate sequence in chunks via map + if default == no_default: + results = map( + functools.partial(_reduce, binop), + chunks) + else: + results = map( + functools.partial(_reduce, binop, initial=default), + chunks) + + results = list(results) # TODO: Support complete laziness + + if len(results) == 1: # Return completed result + return results[0] + else: # Recurse to reaggregate intermediate results + return fold(combine, results, map=map, chunksize=chunksize) diff --git a/contrib/python/toolz/py2/toolz/utils.py b/contrib/python/toolz/py2/toolz/utils.py new file mode 100644 index 0000000000..1002c4649f --- /dev/null +++ b/contrib/python/toolz/py2/toolz/utils.py @@ -0,0 +1,9 @@ +def raises(err, lamda): + try: + lamda() + return False + except err: + return True + + +no_default = '__no__default__' diff --git a/contrib/python/toolz/py2/ya.make b/contrib/python/toolz/py2/ya.make new file mode 100644 index 0000000000..f64a13da67 --- /dev/null +++ b/contrib/python/toolz/py2/ya.make @@ -0,0 +1,41 @@ +# Generated by devtools/yamaker (pypi). + +PY2_LIBRARY() + +VERSION(0.10.0) + +LICENSE(BSD-3-Clause) + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + tlz/__init__.py + tlz/_build_tlz.py + toolz/__init__.py + toolz/_signatures.py + toolz/compatibility.py + toolz/curried/__init__.py + toolz/curried/exceptions.py + toolz/curried/operator.py + toolz/dicttoolz.py + toolz/functoolz.py + toolz/itertoolz.py + toolz/recipes.py + toolz/sandbox/__init__.py + toolz/sandbox/core.py + toolz/sandbox/parallel.py + toolz/utils.py +) + +RESOURCE_FILES( + PREFIX contrib/python/toolz/py2/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/toolz/py3/.dist-info/METADATA b/contrib/python/toolz/py3/.dist-info/METADATA new file mode 100644 index 0000000000..0af60db4cf --- /dev/null +++ b/contrib/python/toolz/py3/.dist-info/METADATA @@ -0,0 +1,159 @@ +Metadata-Version: 2.1 +Name: toolz +Version: 0.12.0 +Summary: List processing tools and functional utilities +Home-page: https://github.com/pytoolz/toolz/ +Author: https://raw.github.com/pytoolz/toolz/master/AUTHORS.md +Maintainer: Erik Welch +Maintainer-email: erik.n.welch@gmail.com +License: BSD +Keywords: functional utility itertools functools +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Requires-Python: >=3.5 + +Toolz +===== + +|Build Status| |Coverage Status| |Version Status| + +A set of utility functions for iterators, functions, and dictionaries. + +See the PyToolz documentation at https://toolz.readthedocs.io + +LICENSE +------- + +New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__. + +Install +------- + +``toolz`` is on the Python Package Index (PyPI): + +:: + + pip install toolz + +Structure and Heritage +---------------------- + +``toolz`` is implemented in three parts: + +|literal itertoolz|_, for operations on iterables. Examples: ``groupby``, +``unique``, ``interpose``, + +|literal functoolz|_, for higher-order functions. Examples: ``memoize``, +``curry``, ``compose``, + +|literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, +``update-in``, ``merge``. + +.. |literal itertoolz| replace:: ``itertoolz`` +.. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py + +.. |literal functoolz| replace:: ``functoolz`` +.. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py + +.. |literal dicttoolz| replace:: ``dicttoolz`` +.. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py + +These functions come from the legacy of functional languages for list +processing. They interoperate well to accomplish common complex tasks. + +Read our `API +Documentation <https://toolz.readthedocs.io/en/latest/api.html>`__ for +more details. + +Example +------- + +This builds a standard wordcount function from pieces within ``toolz``: + +.. code:: python + + >>> def stem(word): + ... """ Stem word to primitive form """ + ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") + + >>> from toolz import compose, frequencies + >>> from toolz.curried import map + >>> wordcount = compose(frequencies, map(stem), str.split) + + >>> sentence = "This cat jumped over this other cat!" + >>> wordcount(sentence) + {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} + +Dependencies +------------ + +``toolz`` supports Python 3.5+ with a common codebase. +It is pure Python and requires no dependencies beyond the standard +library. + +It is, in short, a lightweight dependency. + + +CyToolz +------- + +The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__. +The ``cytoolz`` project is a drop-in replacement for the Pure Python +implementation. +See `CyToolz GitHub Page <https://github.com/pytoolz/cytoolz/>`__ for more +details. + +See Also +-------- + +- `Underscore.js <https://underscorejs.org/>`__: A similar library for + JavaScript +- `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A + similar library for Ruby +- `Clojure <https://clojure.org/>`__: A functional language whose + standard library has several counterparts in ``toolz`` +- `itertools <https://docs.python.org/2/library/itertools.html>`__: The + Python standard library for iterator tools +- `functools <https://docs.python.org/2/library/functools.html>`__: The + Python standard library for function tools + +Contributions Welcome +--------------------- + +``toolz`` aims to be a repository for utility functions, particularly +those that come from the functional programming and list processing +traditions. We welcome contributions that fall within this scope. + +We also try to keep the API small to keep ``toolz`` manageable. The ideal +contribution is significantly different from existing functions and has +precedent in a few other functional systems. + +Please take a look at our +`issue page <https://github.com/pytoolz/toolz/issues>`__ +for contribution ideas. + +Community +--------- + +See our `mailing list <https://groups.google.com/forum/#!forum/pytoolz>`__. +We're friendly. + +.. |Build Status| image:: https://github.com/pytoolz/toolz/workflows/Test/badge.svg + :target: https://github.com/pytoolz/toolz/actions +.. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master + :target: https://coveralls.io/r/pytoolz/toolz +.. |Version Status| image:: https://badge.fury.io/py/toolz.svg + :target: https://badge.fury.io/py/toolz + + diff --git a/contrib/python/toolz/py3/.dist-info/top_level.txt b/contrib/python/toolz/py3/.dist-info/top_level.txt new file mode 100644 index 0000000000..e58ef014ac --- /dev/null +++ b/contrib/python/toolz/py3/.dist-info/top_level.txt @@ -0,0 +1,2 @@ +tlz +toolz diff --git a/contrib/python/toolz/py3/AUTHORS.md b/contrib/python/toolz/py3/AUTHORS.md new file mode 100644 index 0000000000..bd4a563d9b --- /dev/null +++ b/contrib/python/toolz/py3/AUTHORS.md @@ -0,0 +1,33 @@ +[Matthew Rocklin](http://matthewrocklin.com) [@mrocklin](http://github.com/mrocklin/) + +[John Jacobsen](http://eigenhombre.com) [@eigenhombre](http://github.com/eigenhombre/) + +Erik Welch [@eriknw](https://github.com/eriknw/) + +John Crichton [@jcrichton](https://github.com/jcrichton/) + +Han Semaj [@microamp](https://github.com/microamp/) + +[Graeme Coupar](https://twitter.com/obmarg) [@obmarg](https://github.com/obmarg/) + +[Leonid Shvechikov](http://brainstorage.me/shvechikov) [@shvechikov](https://github.com/shvechikov) + +Lars Buitinck [@larsmans](http://github.com/larsmans) + +José Ricardo [@josericardo](https://github.com/josericardo) + +Tom Prince [@tomprince](https://github.com/tomprince) + +Bart van Merriënboer [@bartvm](https://github.com/bartvm) + +Nikolaos-Digenis Karagiannis [@digenis](https://github.com/digenis/) + +[Antonio Lima](https://twitter.com/themiurgo) [@themiurgo](https://github.com/themiurgo/) + +Joe Jevnik [@llllllllll](https://github.com/llllllllll) + +Rory Kirchner [@roryk](https://github.com/roryk) + +[Steven Cutting](http://steven-cutting.github.io) [@steven_cutting](https://github.com/steven-cutting) + +Aric Coady [@coady](https://github.com/coady) diff --git a/contrib/python/toolz/py3/LICENSE.txt b/contrib/python/toolz/py3/LICENSE.txt new file mode 100644 index 0000000000..eeb91b202c --- /dev/null +++ b/contrib/python/toolz/py3/LICENSE.txt @@ -0,0 +1,28 @@ +Copyright (c) 2013 Matthew Rocklin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of toolz nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/contrib/python/toolz/py3/README.rst b/contrib/python/toolz/py3/README.rst new file mode 100644 index 0000000000..e62ac7917e --- /dev/null +++ b/contrib/python/toolz/py3/README.rst @@ -0,0 +1,132 @@ +Toolz +===== + +|Build Status| |Coverage Status| |Version Status| + +A set of utility functions for iterators, functions, and dictionaries. + +See the PyToolz documentation at https://toolz.readthedocs.io + +LICENSE +------- + +New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__. + +Install +------- + +``toolz`` is on the Python Package Index (PyPI): + +:: + + pip install toolz + +Structure and Heritage +---------------------- + +``toolz`` is implemented in three parts: + +|literal itertoolz|_, for operations on iterables. Examples: ``groupby``, +``unique``, ``interpose``, + +|literal functoolz|_, for higher-order functions. Examples: ``memoize``, +``curry``, ``compose``, + +|literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, +``update-in``, ``merge``. + +.. |literal itertoolz| replace:: ``itertoolz`` +.. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py + +.. |literal functoolz| replace:: ``functoolz`` +.. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py + +.. |literal dicttoolz| replace:: ``dicttoolz`` +.. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py + +These functions come from the legacy of functional languages for list +processing. They interoperate well to accomplish common complex tasks. + +Read our `API +Documentation <https://toolz.readthedocs.io/en/latest/api.html>`__ for +more details. + +Example +------- + +This builds a standard wordcount function from pieces within ``toolz``: + +.. code:: python + + >>> def stem(word): + ... """ Stem word to primitive form """ + ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") + + >>> from toolz import compose, frequencies + >>> from toolz.curried import map + >>> wordcount = compose(frequencies, map(stem), str.split) + + >>> sentence = "This cat jumped over this other cat!" + >>> wordcount(sentence) + {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} + +Dependencies +------------ + +``toolz`` supports Python 3.5+ with a common codebase. +It is pure Python and requires no dependencies beyond the standard +library. + +It is, in short, a lightweight dependency. + + +CyToolz +------- + +The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__. +The ``cytoolz`` project is a drop-in replacement for the Pure Python +implementation. +See `CyToolz GitHub Page <https://github.com/pytoolz/cytoolz/>`__ for more +details. + +See Also +-------- + +- `Underscore.js <https://underscorejs.org/>`__: A similar library for + JavaScript +- `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A + similar library for Ruby +- `Clojure <https://clojure.org/>`__: A functional language whose + standard library has several counterparts in ``toolz`` +- `itertools <https://docs.python.org/2/library/itertools.html>`__: The + Python standard library for iterator tools +- `functools <https://docs.python.org/2/library/functools.html>`__: The + Python standard library for function tools + +Contributions Welcome +--------------------- + +``toolz`` aims to be a repository for utility functions, particularly +those that come from the functional programming and list processing +traditions. We welcome contributions that fall within this scope. + +We also try to keep the API small to keep ``toolz`` manageable. The ideal +contribution is significantly different from existing functions and has +precedent in a few other functional systems. + +Please take a look at our +`issue page <https://github.com/pytoolz/toolz/issues>`__ +for contribution ideas. + +Community +--------- + +See our `mailing list <https://groups.google.com/forum/#!forum/pytoolz>`__. +We're friendly. + +.. |Build Status| image:: https://github.com/pytoolz/toolz/workflows/Test/badge.svg + :target: https://github.com/pytoolz/toolz/actions +.. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg?branch=master + :target: https://coveralls.io/r/pytoolz/toolz +.. |Version Status| image:: https://badge.fury.io/py/toolz.svg + :target: https://badge.fury.io/py/toolz diff --git a/contrib/python/toolz/py3/tlz/__init__.py b/contrib/python/toolz/py3/tlz/__init__.py new file mode 100644 index 0000000000..9c9c84afe1 --- /dev/null +++ b/contrib/python/toolz/py3/tlz/__init__.py @@ -0,0 +1,9 @@ +"""``tlz`` mirrors the ``toolz`` API and uses ``cytoolz`` if possible. + +The ``tlz`` package is installed when ``toolz`` is installed. It provides +a convenient way to use functions from ``cytoolz``--a faster Cython +implementation of ``toolz``--if it is installed, otherwise it uses +functions from ``toolz``. +""" + +from . import _build_tlz diff --git a/contrib/python/toolz/py3/tlz/_build_tlz.py b/contrib/python/toolz/py3/tlz/_build_tlz.py new file mode 100644 index 0000000000..3ac783699e --- /dev/null +++ b/contrib/python/toolz/py3/tlz/_build_tlz.py @@ -0,0 +1,92 @@ +import sys +import types +import toolz +from importlib import import_module +from importlib.machinery import ModuleSpec + + +class TlzLoader: + """ Finds and loads ``tlz`` modules when added to sys.meta_path""" + def __init__(self): + self.always_from_toolz = { + toolz.pipe, + } + + def _load_toolz(self, fullname): + rv = {} + package, dot, submodules = fullname.partition('.') + try: + module_name = ''.join(['cytoolz', dot, submodules]) + rv['cytoolz'] = import_module(module_name) + except ImportError: + pass + try: + module_name = ''.join(['toolz', dot, submodules]) + rv['toolz'] = import_module(module_name) + except ImportError: + pass + if not rv: + raise ImportError(fullname) + return rv + + def find_module(self, fullname, path=None): # pragma: py3 no cover + package, dot, submodules = fullname.partition('.') + if package == 'tlz': + return self + + def load_module(self, fullname): # pragma: py3 no cover + if fullname in sys.modules: # pragma: no cover + return sys.modules[fullname] + spec = ModuleSpec(fullname, self) + module = self.create_module(spec) + sys.modules[fullname] = module + self.exec_module(module) + return module + + def find_spec(self, fullname, path, target=None): # pragma: no cover + package, dot, submodules = fullname.partition('.') + if package == 'tlz': + return ModuleSpec(fullname, self) + + def create_module(self, spec): + return types.ModuleType(spec.name) + + def exec_module(self, module): + toolz_mods = self._load_toolz(module.__name__) + fast_mod = toolz_mods.get('cytoolz') or toolz_mods['toolz'] + slow_mod = toolz_mods.get('toolz') or toolz_mods['cytoolz'] + module.__dict__.update(toolz.merge(fast_mod.__dict__, module.__dict__)) + package = fast_mod.__package__ + if package is not None: + package, dot, submodules = package.partition('.') + module.__package__ = ''.join(['tlz', dot, submodules]) + if not module.__doc__: + module.__doc__ = fast_mod.__doc__ + + # show file from toolz during introspection + try: + module.__file__ = slow_mod.__file__ + except AttributeError: + pass + + for k, v in fast_mod.__dict__.items(): + tv = slow_mod.__dict__.get(k) + try: + hash(tv) + except TypeError: + tv = None + if tv in self.always_from_toolz: + module.__dict__[k] = tv + elif ( + isinstance(v, types.ModuleType) + and v.__package__ == fast_mod.__name__ + ): + package, dot, submodules = v.__name__.partition('.') + module_name = ''.join(['tlz', dot, submodules]) + submodule = import_module(module_name) + module.__dict__[k] = submodule + + +tlz_loader = TlzLoader() +sys.meta_path.append(tlz_loader) +tlz_loader.exec_module(sys.modules['tlz']) diff --git a/contrib/python/toolz/py3/toolz/__init__.py b/contrib/python/toolz/py3/toolz/__init__.py new file mode 100644 index 0000000000..ba49a662fc --- /dev/null +++ b/contrib/python/toolz/py3/toolz/__init__.py @@ -0,0 +1,26 @@ +from .itertoolz import * + +from .functoolz import * + +from .dicttoolz import * + +from .recipes import * + +from functools import partial, reduce + +sorted = sorted + +map = map + +filter = filter + +# Aliases +comp = compose + +from . import curried, sandbox + +functoolz._sigs.create_signature_registry() + +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions diff --git a/contrib/python/toolz/py3/toolz/_signatures.py b/contrib/python/toolz/py3/toolz/_signatures.py new file mode 100644 index 0000000000..3ce1616a85 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/_signatures.py @@ -0,0 +1,785 @@ +"""Internal module for better introspection of builtins. + +The main functions are ``is_builtin_valid_args``, ``is_builtin_partial_args``, +and ``has_unknown_args``. Other functions in this module support these three. + +Notably, we create a ``signatures`` registry to enable introspection of +builtin functions in any Python version. This includes builtins that +have more than one valid signature. Currently, the registry includes +builtins from ``builtins``, ``functools``, ``itertools``, and ``operator`` +modules. More can be added as requested. We don't guarantee full coverage. + +Everything in this module should be regarded as implementation details. +Users should try to not use this module directly. +""" +import functools +import inspect +import itertools +import operator +from importlib import import_module + +from .functoolz import (is_partial_args, is_arity, has_varargs, + has_keywords, num_required_args) + +import builtins + +# We mock builtin callables using lists of tuples with lambda functions. +# +# The tuple spec is (num_position_args, lambda_func, keyword_only_args). +# +# num_position_args: +# - The number of positional-only arguments. If not specified, +# all positional arguments are considered positional-only. +# +# lambda_func: +# - lambda function that matches a signature of a builtin, but does +# not include keyword-only arguments. +# +# keyword_only_args: (optional) +# - Tuple of keyword-only argumemts. + +module_info = {} + +module_info[builtins] = dict( + abs=[ + lambda x: None], + all=[ + lambda iterable: None], + anext=[ + lambda aiterator: None, + lambda aiterator, default: None], + any=[ + lambda iterable: None], + apply=[ + lambda object: None, + lambda object, args: None, + lambda object, args, kwargs: None], + ascii=[ + lambda obj: None], + bin=[ + lambda number: None], + bool=[ + lambda x=False: None], + buffer=[ + lambda object: None, + lambda object, offset: None, + lambda object, offset, size: None], + bytearray=[ + lambda: None, + lambda int: None, + lambda string, encoding='utf8', errors='strict': None], + callable=[ + lambda obj: None], + chr=[ + lambda i: None], + classmethod=[ + lambda function: None], + cmp=[ + lambda x, y: None], + coerce=[ + lambda x, y: None], + complex=[ + lambda real=0, imag=0: None], + delattr=[ + lambda obj, name: None], + dict=[ + lambda **kwargs: None, + lambda mapping, **kwargs: None], + dir=[ + lambda: None, + lambda object: None], + divmod=[ + lambda x, y: None], + enumerate=[ + (0, lambda iterable, start=0: None)], + eval=[ + lambda source: None, + lambda source, globals: None, + lambda source, globals, locals: None], + execfile=[ + lambda filename: None, + lambda filename, globals: None, + lambda filename, globals, locals: None], + file=[ + (0, lambda name, mode='r', buffering=-1: None)], + filter=[ + lambda function, iterable: None], + float=[ + lambda x=0.0: None], + format=[ + lambda value: None, + lambda value, format_spec: None], + frozenset=[ + lambda: None, + lambda iterable: None], + getattr=[ + lambda object, name: None, + lambda object, name, default: None], + globals=[ + lambda: None], + hasattr=[ + lambda obj, name: None], + hash=[ + lambda obj: None], + hex=[ + lambda number: None], + id=[ + lambda obj: None], + input=[ + lambda: None, + lambda prompt: None], + int=[ + lambda x=0: None, + (0, lambda x, base=10: None)], + intern=[ + lambda string: None], + isinstance=[ + lambda obj, class_or_tuple: None], + issubclass=[ + lambda cls, class_or_tuple: None], + iter=[ + lambda iterable: None, + lambda callable, sentinel: None], + len=[ + lambda obj: None], + list=[ + lambda: None, + lambda iterable: None], + locals=[ + lambda: None], + long=[ + lambda x=0: None, + (0, lambda x, base=10: None)], + map=[ + lambda func, sequence, *iterables: None], + memoryview=[ + (0, lambda object: None)], + next=[ + lambda iterator: None, + lambda iterator, default: None], + object=[ + lambda: None], + oct=[ + lambda number: None], + ord=[ + lambda c: None], + pow=[ + lambda x, y: None, + lambda x, y, z: None], + property=[ + lambda fget=None, fset=None, fdel=None, doc=None: None], + range=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + raw_input=[ + lambda: None, + lambda prompt: None], + reduce=[ + lambda function, sequence: None, + lambda function, sequence, initial: None], + reload=[ + lambda module: None], + repr=[ + lambda obj: None], + reversed=[ + lambda sequence: None], + round=[ + (0, lambda number, ndigits=0: None)], + set=[ + lambda: None, + lambda iterable: None], + setattr=[ + lambda obj, name, value: None], + slice=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + staticmethod=[ + lambda function: None], + sum=[ + lambda iterable: None, + lambda iterable, start: None], + super=[ + lambda type: None, + lambda type, obj: None], + tuple=[ + lambda: None, + lambda iterable: None], + type=[ + lambda object: None, + lambda name, bases, dict: None], + unichr=[ + lambda i: None], + unicode=[ + lambda object: None, + lambda string='', encoding='utf8', errors='strict': None], + vars=[ + lambda: None, + lambda object: None], + xrange=[ + lambda stop: None, + lambda start, stop: None, + lambda start, stop, step: None], + zip=[ + lambda *iterables: None], + __build_class__=[ + (2, lambda func, name, *bases, **kwds: None, ('metaclass',))], + __import__=[ + (0, lambda name, globals=None, locals=None, fromlist=None, + level=None: None)], +) +module_info[builtins]['exec'] = [ + lambda source: None, + lambda source, globals: None, + lambda source, globals, locals: None] + +module_info[builtins].update( + breakpoint=[ + lambda *args, **kws: None], + bytes=[ + lambda: None, + lambda int: None, + lambda string, encoding='utf8', errors='strict': None], + compile=[ + (0, lambda source, filename, mode, flags=0, + dont_inherit=False, optimize=-1: None)], + max=[ + (1, lambda iterable: None, ('default', 'key',)), + (1, lambda arg1, arg2, *args: None, ('key',))], + min=[ + (1, lambda iterable: None, ('default', 'key',)), + (1, lambda arg1, arg2, *args: None, ('key',))], + open=[ + (0, lambda file, mode='r', buffering=-1, encoding=None, + errors=None, newline=None, closefd=True, opener=None: None)], + sorted=[ + (1, lambda iterable: None, ('key', 'reverse'))], + str=[ + lambda object='', encoding='utf', errors='strict': None], +) +module_info[builtins]['print'] = [ + (0, lambda *args: None, ('sep', 'end', 'file', 'flush',))] + + +module_info[functools] = dict( + cmp_to_key=[ + (0, lambda mycmp: None)], + partial=[ + lambda func, *args, **kwargs: None], + partialmethod=[ + lambda func, *args, **kwargs: None], + reduce=[ + lambda function, sequence: None, + lambda function, sequence, initial: None], +) + +module_info[itertools] = dict( + accumulate=[ + (0, lambda iterable, func=None: None)], + chain=[ + lambda *iterables: None], + combinations=[ + (0, lambda iterable, r: None)], + combinations_with_replacement=[ + (0, lambda iterable, r: None)], + compress=[ + (0, lambda data, selectors: None)], + count=[ + lambda start=0, step=1: None], + cycle=[ + lambda iterable: None], + dropwhile=[ + lambda predicate, iterable: None], + filterfalse=[ + lambda function, sequence: None], + groupby=[ + (0, lambda iterable, key=None: None)], + ifilter=[ + lambda function, sequence: None], + ifilterfalse=[ + lambda function, sequence: None], + imap=[ + lambda func, sequence, *iterables: None], + islice=[ + lambda iterable, stop: None, + lambda iterable, start, stop: None, + lambda iterable, start, stop, step: None], + izip=[ + lambda *iterables: None], + izip_longest=[ + (0, lambda *iterables: None, ('fillvalue',))], + permutations=[ + (0, lambda iterable, r=0: None)], + repeat=[ + (0, lambda object, times=0: None)], + starmap=[ + lambda function, sequence: None], + takewhile=[ + lambda predicate, iterable: None], + tee=[ + lambda iterable: None, + lambda iterable, n: None], + zip_longest=[ + (0, lambda *iterables: None, ('fillvalue',))], +) + +module_info[itertools].update( + product=[ + (0, lambda *iterables: None, ('repeat',))], +) + + +module_info[operator] = dict( + __abs__=[ + lambda a: None], + __add__=[ + lambda a, b: None], + __and__=[ + lambda a, b: None], + __concat__=[ + lambda a, b: None], + __contains__=[ + lambda a, b: None], + __delitem__=[ + lambda a, b: None], + __delslice__=[ + lambda a, b, c: None], + __div__=[ + lambda a, b: None], + __eq__=[ + lambda a, b: None], + __floordiv__=[ + lambda a, b: None], + __ge__=[ + lambda a, b: None], + __getitem__=[ + lambda a, b: None], + __getslice__=[ + lambda a, b, c: None], + __gt__=[ + lambda a, b: None], + __iadd__=[ + lambda a, b: None], + __iand__=[ + lambda a, b: None], + __iconcat__=[ + lambda a, b: None], + __idiv__=[ + lambda a, b: None], + __ifloordiv__=[ + lambda a, b: None], + __ilshift__=[ + lambda a, b: None], + __imatmul__=[ + lambda a, b: None], + __imod__=[ + lambda a, b: None], + __imul__=[ + lambda a, b: None], + __index__=[ + lambda a: None], + __inv__=[ + lambda a: None], + __invert__=[ + lambda a: None], + __ior__=[ + lambda a, b: None], + __ipow__=[ + lambda a, b: None], + __irepeat__=[ + lambda a, b: None], + __irshift__=[ + lambda a, b: None], + __isub__=[ + lambda a, b: None], + __itruediv__=[ + lambda a, b: None], + __ixor__=[ + lambda a, b: None], + __le__=[ + lambda a, b: None], + __lshift__=[ + lambda a, b: None], + __lt__=[ + lambda a, b: None], + __matmul__=[ + lambda a, b: None], + __mod__=[ + lambda a, b: None], + __mul__=[ + lambda a, b: None], + __ne__=[ + lambda a, b: None], + __neg__=[ + lambda a: None], + __not__=[ + lambda a: None], + __or__=[ + lambda a, b: None], + __pos__=[ + lambda a: None], + __pow__=[ + lambda a, b: None], + __repeat__=[ + lambda a, b: None], + __rshift__=[ + lambda a, b: None], + __setitem__=[ + lambda a, b, c: None], + __setslice__=[ + lambda a, b, c, d: None], + __sub__=[ + lambda a, b: None], + __truediv__=[ + lambda a, b: None], + __xor__=[ + lambda a, b: None], + _abs=[ + lambda x: None], + _compare_digest=[ + lambda a, b: None], + abs=[ + lambda a: None], + add=[ + lambda a, b: None], + and_=[ + lambda a, b: None], + attrgetter=[ + lambda attr, *args: None], + concat=[ + lambda a, b: None], + contains=[ + lambda a, b: None], + countOf=[ + lambda a, b: None], + delitem=[ + lambda a, b: None], + delslice=[ + lambda a, b, c: None], + div=[ + lambda a, b: None], + eq=[ + lambda a, b: None], + floordiv=[ + lambda a, b: None], + ge=[ + lambda a, b: None], + getitem=[ + lambda a, b: None], + getslice=[ + lambda a, b, c: None], + gt=[ + lambda a, b: None], + iadd=[ + lambda a, b: None], + iand=[ + lambda a, b: None], + iconcat=[ + lambda a, b: None], + idiv=[ + lambda a, b: None], + ifloordiv=[ + lambda a, b: None], + ilshift=[ + lambda a, b: None], + imatmul=[ + lambda a, b: None], + imod=[ + lambda a, b: None], + imul=[ + lambda a, b: None], + index=[ + lambda a: None], + indexOf=[ + lambda a, b: None], + inv=[ + lambda a: None], + invert=[ + lambda a: None], + ior=[ + lambda a, b: None], + ipow=[ + lambda a, b: None], + irepeat=[ + lambda a, b: None], + irshift=[ + lambda a, b: None], + is_=[ + lambda a, b: None], + is_not=[ + lambda a, b: None], + isCallable=[ + lambda a: None], + isMappingType=[ + lambda a: None], + isNumberType=[ + lambda a: None], + isSequenceType=[ + lambda a: None], + isub=[ + lambda a, b: None], + itemgetter=[ + lambda item, *args: None], + itruediv=[ + lambda a, b: None], + ixor=[ + lambda a, b: None], + le=[ + lambda a, b: None], + length_hint=[ + lambda obj: None, + lambda obj, default: None], + lshift=[ + lambda a, b: None], + lt=[ + lambda a, b: None], + matmul=[ + lambda a, b: None], + methodcaller=[ + lambda name, *args, **kwargs: None], + mod=[ + lambda a, b: None], + mul=[ + lambda a, b: None], + ne=[ + lambda a, b: None], + neg=[ + lambda a: None], + not_=[ + lambda a: None], + or_=[ + lambda a, b: None], + pos=[ + lambda a: None], + pow=[ + lambda a, b: None], + repeat=[ + lambda a, b: None], + rshift=[ + lambda a, b: None], + sequenceIncludes=[ + lambda a, b: None], + setitem=[ + lambda a, b, c: None], + setslice=[ + lambda a, b, c, d: None], + sub=[ + lambda a, b: None], + truediv=[ + lambda a, b: None], + truth=[ + lambda a: None], + xor=[ + lambda a, b: None], +) + +module_info['toolz'] = dict( + curry=[ + (0, lambda *args, **kwargs: None)], + excepts=[ + (0, lambda exc, func, handler=None: None)], + flip=[ + (0, lambda func=None, a=None, b=None: None)], + juxt=[ + (0, lambda *funcs: None)], + memoize=[ + (0, lambda func=None, cache=None, key=None: None)], +) + +module_info['toolz.functoolz'] = dict( + Compose=[ + (0, lambda funcs: None)], + InstanceProperty=[ + (0, lambda fget=None, fset=None, fdel=None, doc=None, + classval=None: None)], +) + + +def num_pos_args(sigspec): + """ Return the number of positional arguments. ``f(x, y=1)`` has 1""" + return sum(1 for x in sigspec.parameters.values() + if x.kind == x.POSITIONAL_OR_KEYWORD + and x.default is x.empty) + + +def get_exclude_keywords(num_pos_only, sigspec): + """ Return the names of position-only arguments if func has **kwargs""" + if num_pos_only == 0: + return () + has_kwargs = any(x.kind == x.VAR_KEYWORD + for x in sigspec.parameters.values()) + if not has_kwargs: + return () + pos_args = list(sigspec.parameters.values())[:num_pos_only] + return tuple(x.name for x in pos_args) + + +def signature_or_spec(func): + try: + return inspect.signature(func) + except (ValueError, TypeError): + return None + + +def expand_sig(sig): + """ Convert the signature spec in ``module_info`` to add to ``signatures`` + + The input signature spec is one of: + - ``lambda_func`` + - ``(num_position_args, lambda_func)`` + - ``(num_position_args, lambda_func, keyword_only_args)`` + + The output signature spec is: + ``(num_position_args, lambda_func, keyword_exclude, sigspec)`` + + where ``keyword_exclude`` includes keyword only arguments and, if variadic + keywords is present, the names of position-only argument. The latter is + included to support builtins such as ``partial(func, *args, **kwargs)``, + which allows ``func=`` to be used as a keyword even though it's the name + of a positional argument. + """ + if isinstance(sig, tuple): + if len(sig) == 3: + num_pos_only, func, keyword_only = sig + assert isinstance(sig[-1], tuple) + else: + num_pos_only, func = sig + keyword_only = () + sigspec = signature_or_spec(func) + else: + func = sig + sigspec = signature_or_spec(func) + num_pos_only = num_pos_args(sigspec) + keyword_only = () + keyword_exclude = get_exclude_keywords(num_pos_only, sigspec) + return num_pos_only, func, keyword_only + keyword_exclude, sigspec + + +signatures = {} + + +def create_signature_registry(module_info=module_info, signatures=signatures): + for module, info in module_info.items(): + if isinstance(module, str): + module = import_module(module) + for name, sigs in info.items(): + if hasattr(module, name): + new_sigs = tuple(expand_sig(sig) for sig in sigs) + signatures[getattr(module, name)] = new_sigs + + +def check_valid(sig, args, kwargs): + """ Like ``is_valid_args`` for the given signature spec""" + num_pos_only, func, keyword_exclude, sigspec = sig + if len(args) < num_pos_only: + return False + if keyword_exclude: + kwargs = dict(kwargs) + for item in keyword_exclude: + kwargs.pop(item, None) + try: + func(*args, **kwargs) + return True + except TypeError: + return False + + +def _is_valid_args(func, args, kwargs): + """ Like ``is_valid_args`` for builtins in our ``signatures`` registry""" + if func not in signatures: + return None + sigs = signatures[func] + return any(check_valid(sig, args, kwargs) for sig in sigs) + + +def check_partial(sig, args, kwargs): + """ Like ``is_partial_args`` for the given signature spec""" + num_pos_only, func, keyword_exclude, sigspec = sig + if len(args) < num_pos_only: + pad = (None,) * (num_pos_only - len(args)) + args = args + pad + if keyword_exclude: + kwargs = dict(kwargs) + for item in keyword_exclude: + kwargs.pop(item, None) + return is_partial_args(func, args, kwargs, sigspec) + + +def _is_partial_args(func, args, kwargs): + """ Like ``is_partial_args`` for builtins in our ``signatures`` registry""" + if func not in signatures: + return None + sigs = signatures[func] + return any(check_partial(sig, args, kwargs) for sig in sigs) + + +def check_arity(n, sig): + num_pos_only, func, keyword_exclude, sigspec = sig + if keyword_exclude or num_pos_only > n: + return False + return is_arity(n, func, sigspec) + + +def _is_arity(n, func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_arity(n, sig) for sig in sigs] + if all(checks): + return True + elif any(checks): + return None + return False + + +def check_varargs(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + return has_varargs(func, sigspec) + + +def _has_varargs(func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_varargs(sig) for sig in sigs] + if all(checks): + return True + elif any(checks): + return None + return False + + +def check_keywords(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + if keyword_exclude: + return True + return has_keywords(func, sigspec) + + +def _has_keywords(func): + if func not in signatures: + return None + sigs = signatures[func] + checks = [check_keywords(sig) for sig in sigs] + if all(checks): + return True + elif any(checks): + return None + return False + + +def check_required_args(sig): + num_pos_only, func, keyword_exclude, sigspec = sig + return num_required_args(func, sigspec) + + +def _num_required_args(func): + if func not in signatures: + return None + sigs = signatures[func] + vals = [check_required_args(sig) for sig in sigs] + val = vals[0] + if all(x == val for x in vals): + return val + return None diff --git a/contrib/python/toolz/py3/toolz/_version.py b/contrib/python/toolz/py3/toolz/_version.py new file mode 100644 index 0000000000..6e979d1048 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/_version.py @@ -0,0 +1,21 @@ + +# This file was generated by 'versioneer.py' (0.18) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +{ + "date": "2022-07-09T23:15:45-0500", + "dirty": false, + "error": null, + "full-revisionid": "245b78e6320c41a4a9cdd15c6123681fbfb62843", + "version": "0.12.0" +} +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) diff --git a/contrib/python/toolz/py3/toolz/compatibility.py b/contrib/python/toolz/py3/toolz/compatibility.py new file mode 100644 index 0000000000..28bef91dc8 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/compatibility.py @@ -0,0 +1,30 @@ +import warnings +warnings.warn("The toolz.compatibility module is no longer " + "needed in Python 3 and has been deprecated. Please " + "import these utilities directly from the standard library. " + "This module will be removed in a future release.", + category=DeprecationWarning, stacklevel=2) + +import operator +import sys + +PY3 = sys.version_info[0] > 2 +PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4 +PYPY = hasattr(sys, 'pypy_version_info') and PY3 + +__all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', + 'iteritems', 'iterkeys', 'itervalues', 'filterfalse', + 'PY3', 'PY34', 'PYPY') + + +map = map +filter = filter +range = range +zip = zip +from functools import reduce +from itertools import zip_longest +from itertools import filterfalse +iteritems = operator.methodcaller('items') +iterkeys = operator.methodcaller('keys') +itervalues = operator.methodcaller('values') +from collections.abc import Sequence diff --git a/contrib/python/toolz/py3/toolz/curried/__init__.py b/contrib/python/toolz/py3/toolz/curried/__init__.py new file mode 100644 index 0000000000..356eddbd3b --- /dev/null +++ b/contrib/python/toolz/py3/toolz/curried/__init__.py @@ -0,0 +1,103 @@ +""" +Alternate namespace for toolz such that all functions are curried + +Currying provides implicit partial evaluation of all functions + +Example: + + Get usually requires two arguments, an index and a collection + >>> from toolz.curried import get + >>> get(0, ('a', 'b')) + 'a' + + When we use it in higher order functions we often want to pass a partially + evaluated form + >>> data = [(1, 2), (11, 22), (111, 222)] + >>> list(map(lambda seq: get(0, seq), data)) + [1, 11, 111] + + The curried version allows simple expression of partial evaluation + >>> list(map(get(0), data)) + [1, 11, 111] + +See Also: + toolz.functoolz.curry +""" +import toolz +from . import operator +from toolz import ( + apply, + comp, + complement, + compose, + compose_left, + concat, + concatv, + count, + curry, + diff, + first, + flip, + frequencies, + identity, + interleave, + isdistinct, + isiterable, + juxt, + last, + memoize, + merge_sorted, + peek, + pipe, + second, + thread_first, + thread_last, +) +from .exceptions import merge, merge_with + +accumulate = toolz.curry(toolz.accumulate) +assoc = toolz.curry(toolz.assoc) +assoc_in = toolz.curry(toolz.assoc_in) +cons = toolz.curry(toolz.cons) +countby = toolz.curry(toolz.countby) +dissoc = toolz.curry(toolz.dissoc) +do = toolz.curry(toolz.do) +drop = toolz.curry(toolz.drop) +excepts = toolz.curry(toolz.excepts) +filter = toolz.curry(toolz.filter) +get = toolz.curry(toolz.get) +get_in = toolz.curry(toolz.get_in) +groupby = toolz.curry(toolz.groupby) +interpose = toolz.curry(toolz.interpose) +itemfilter = toolz.curry(toolz.itemfilter) +itemmap = toolz.curry(toolz.itemmap) +iterate = toolz.curry(toolz.iterate) +join = toolz.curry(toolz.join) +keyfilter = toolz.curry(toolz.keyfilter) +keymap = toolz.curry(toolz.keymap) +map = toolz.curry(toolz.map) +mapcat = toolz.curry(toolz.mapcat) +nth = toolz.curry(toolz.nth) +partial = toolz.curry(toolz.partial) +partition = toolz.curry(toolz.partition) +partition_all = toolz.curry(toolz.partition_all) +partitionby = toolz.curry(toolz.partitionby) +peekn = toolz.curry(toolz.peekn) +pluck = toolz.curry(toolz.pluck) +random_sample = toolz.curry(toolz.random_sample) +reduce = toolz.curry(toolz.reduce) +reduceby = toolz.curry(toolz.reduceby) +remove = toolz.curry(toolz.remove) +sliding_window = toolz.curry(toolz.sliding_window) +sorted = toolz.curry(toolz.sorted) +tail = toolz.curry(toolz.tail) +take = toolz.curry(toolz.take) +take_nth = toolz.curry(toolz.take_nth) +topk = toolz.curry(toolz.topk) +unique = toolz.curry(toolz.unique) +update_in = toolz.curry(toolz.update_in) +valfilter = toolz.curry(toolz.valfilter) +valmap = toolz.curry(toolz.valmap) + +del exceptions +del toolz diff --git a/contrib/python/toolz/py3/toolz/curried/exceptions.py b/contrib/python/toolz/py3/toolz/curried/exceptions.py new file mode 100644 index 0000000000..75a52bbbf2 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/curried/exceptions.py @@ -0,0 +1,18 @@ +import toolz + + +__all__ = ['merge_with', 'merge'] + + +@toolz.curry +def merge_with(func, d, *dicts, **kwargs): + return toolz.merge_with(func, d, *dicts, **kwargs) + + +@toolz.curry +def merge(d, *dicts, **kwargs): + return toolz.merge(d, *dicts, **kwargs) + + +merge_with.__doc__ = toolz.merge_with.__doc__ +merge.__doc__ = toolz.merge.__doc__ diff --git a/contrib/python/toolz/py3/toolz/curried/operator.py b/contrib/python/toolz/py3/toolz/curried/operator.py new file mode 100644 index 0000000000..35979a6851 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/curried/operator.py @@ -0,0 +1,22 @@ +from __future__ import absolute_import + +import operator + +from toolz.functoolz import curry + + +# Tests will catch if/when this needs updated +IGNORE = { + "__abs__", "__index__", "__inv__", "__invert__", "__neg__", "__not__", + "__pos__", "_abs", "abs", "attrgetter", "index", "inv", "invert", + "itemgetter", "neg", "not_", "pos", "truth" +} +locals().update( + {name: f if name in IGNORE else curry(f) + for name, f in vars(operator).items() if callable(f)} +) + +# Clean up the namespace. +del IGNORE +del curry +del operator diff --git a/contrib/python/toolz/py3/toolz/dicttoolz.py b/contrib/python/toolz/py3/toolz/dicttoolz.py new file mode 100644 index 0000000000..457bc26928 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/dicttoolz.py @@ -0,0 +1,339 @@ +import operator +import collections +from functools import reduce +from collections.abc import Mapping + +__all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap', + 'valfilter', 'keyfilter', 'itemfilter', + 'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in') + + +def _get_factory(f, kwargs): + factory = kwargs.pop('factory', dict) + if kwargs: + raise TypeError("{}() got an unexpected keyword argument " + "'{}'".format(f.__name__, kwargs.popitem()[0])) + return factory + + +def merge(*dicts, **kwargs): + """ Merge a collection of dictionaries + + >>> merge({1: 'one'}, {2: 'two'}) + {1: 'one', 2: 'two'} + + Later dictionaries have precedence + + >>> merge({1: 2, 3: 4}, {3: 3, 4: 4}) + {1: 2, 3: 3, 4: 4} + + See Also: + merge_with + """ + if len(dicts) == 1 and not isinstance(dicts[0], Mapping): + dicts = dicts[0] + factory = _get_factory(merge, kwargs) + + rv = factory() + for d in dicts: + rv.update(d) + return rv + + +def merge_with(func, *dicts, **kwargs): + """ Merge dictionaries and apply function to combined values + + A key may occur in more than one dict, and all values mapped from the key + will be passed to the function as a list, such as func([val1, val2, ...]). + + >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) + {1: 11, 2: 22} + + >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP + {1: 1, 2: 2, 3: 30} + + See Also: + merge + """ + if len(dicts) == 1 and not isinstance(dicts[0], Mapping): + dicts = dicts[0] + factory = _get_factory(merge_with, kwargs) + + values = collections.defaultdict(lambda: [].append) + for d in dicts: + for k, v in d.items(): + values[k](v) + + result = factory() + for k, v in values.items(): + result[k] = func(v.__self__) + return result + + +def valmap(func, d, factory=dict): + """ Apply function to values of dictionary + + >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} + >>> valmap(sum, bills) # doctest: +SKIP + {'Alice': 65, 'Bob': 45} + + See Also: + keymap + itemmap + """ + rv = factory() + rv.update(zip(d.keys(), map(func, d.values()))) + return rv + + +def keymap(func, d, factory=dict): + """ Apply function to keys of dictionary + + >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} + >>> keymap(str.lower, bills) # doctest: +SKIP + {'alice': [20, 15, 30], 'bob': [10, 35]} + + See Also: + valmap + itemmap + """ + rv = factory() + rv.update(zip(map(func, d.keys()), d.values())) + return rv + + +def itemmap(func, d, factory=dict): + """ Apply function to items of dictionary + + >>> accountids = {"Alice": 10, "Bob": 20} + >>> itemmap(reversed, accountids) # doctest: +SKIP + {10: "Alice", 20: "Bob"} + + See Also: + keymap + valmap + """ + rv = factory() + rv.update(map(func, d.items())) + return rv + + +def valfilter(predicate, d, factory=dict): + """ Filter items in dictionary by value + + >>> iseven = lambda x: x % 2 == 0 + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> valfilter(iseven, d) + {1: 2, 3: 4} + + See Also: + keyfilter + itemfilter + valmap + """ + rv = factory() + for k, v in d.items(): + if predicate(v): + rv[k] = v + return rv + + +def keyfilter(predicate, d, factory=dict): + """ Filter items in dictionary by key + + >>> iseven = lambda x: x % 2 == 0 + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> keyfilter(iseven, d) + {2: 3, 4: 5} + + See Also: + valfilter + itemfilter + keymap + """ + rv = factory() + for k, v in d.items(): + if predicate(k): + rv[k] = v + return rv + + +def itemfilter(predicate, d, factory=dict): + """ Filter items in dictionary by item + + >>> def isvalid(item): + ... k, v = item + ... return k % 2 == 0 and v < 4 + + >>> d = {1: 2, 2: 3, 3: 4, 4: 5} + >>> itemfilter(isvalid, d) + {2: 3} + + See Also: + keyfilter + valfilter + itemmap + """ + rv = factory() + for item in d.items(): + if predicate(item): + k, v = item + rv[k] = v + return rv + + +def assoc(d, key, value, factory=dict): + """ Return a new dict with new key value pair + + New dict has d[key] set to value. Does not modify the initial dictionary. + + >>> assoc({'x': 1}, 'x', 2) + {'x': 2} + >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP + {'x': 1, 'y': 3} + """ + d2 = factory() + d2.update(d) + d2[key] = value + return d2 + + +def dissoc(d, *keys, **kwargs): + """ Return a new dict with the given key(s) removed. + + New dict has d[key] deleted for each supplied key. + Does not modify the initial dictionary. + + >>> dissoc({'x': 1, 'y': 2}, 'y') + {'x': 1} + >>> dissoc({'x': 1, 'y': 2}, 'y', 'x') + {} + >>> dissoc({'x': 1}, 'y') # Ignores missing keys + {'x': 1} + """ + factory = _get_factory(dissoc, kwargs) + d2 = factory() + + if len(keys) < len(d) * .6: + d2.update(d) + for key in keys: + if key in d2: + del d2[key] + else: + remaining = set(d) + remaining.difference_update(keys) + for k in remaining: + d2[k] = d[k] + return d2 + + +def assoc_in(d, keys, value, factory=dict): + """ Return a new dict with new, potentially nested, key value pair + + >>> purchase = {'name': 'Alice', + ... 'order': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP + {'credit card': '5555-1234-1234-1234', + 'name': 'Alice', + 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}} + """ + return update_in(d, keys, lambda x: value, value, factory) + + +def update_in(d, keys, func, default=None, factory=dict): + """ Update value in a (potentially) nested dictionary + + inputs: + d - dictionary on which to operate + keys - list or tuple giving the location of the value to be changed in d + func - function to operate on that value + + If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the + original dictionary with v replaced by func(v), but does not mutate the + original dictionary. + + If k0 is not a key in d, update_in creates nested dictionaries to the depth + specified by the keys, with the innermost value set to func(default). + + >>> inc = lambda x: x + 1 + >>> update_in({'a': 0}, ['a'], inc) + {'a': 1} + + >>> transaction = {'name': 'Alice', + ... 'purchase': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP + {'credit card': '5555-1234-1234-1234', + 'name': 'Alice', + 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}} + + >>> # updating a value when k0 is not in d + >>> update_in({}, [1, 2, 3], str, default="bar") + {1: {2: {3: 'bar'}}} + >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) + {1: 'foo', 2: {3: {4: 1}}} + """ + ks = iter(keys) + k = next(ks) + + rv = inner = factory() + rv.update(d) + + for key in ks: + if k in d: + d = d[k] + dtemp = factory() + dtemp.update(d) + else: + d = dtemp = factory() + + inner[k] = inner = dtemp + k = key + + if k in d: + inner[k] = func(d[k]) + else: + inner[k] = func(default) + return rv + + +def get_in(keys, coll, default=None, no_default=False): + """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. + + If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless + ``no_default`` is specified, then it raises KeyError or IndexError. + + ``get_in`` is a generalization of ``operator.getitem`` for nested data + structures such as dictionaries and lists. + + >>> transaction = {'name': 'Alice', + ... 'purchase': {'items': ['Apple', 'Orange'], + ... 'costs': [0.50, 1.25]}, + ... 'credit card': '5555-1234-1234-1234'} + >>> get_in(['purchase', 'items', 0], transaction) + 'Apple' + >>> get_in(['name'], transaction) + 'Alice' + >>> get_in(['purchase', 'total'], transaction) + >>> get_in(['purchase', 'items', 'apple'], transaction) + >>> get_in(['purchase', 'items', 10], transaction) + >>> get_in(['purchase', 'total'], transaction, 0) + 0 + >>> get_in(['y'], {}, no_default=True) + Traceback (most recent call last): + ... + KeyError: 'y' + + See Also: + itertoolz.get + operator.getitem + """ + try: + return reduce(operator.getitem, keys, coll) + except (KeyError, IndexError, TypeError): + if no_default: + raise + return default diff --git a/contrib/python/toolz/py3/toolz/functoolz.py b/contrib/python/toolz/py3/toolz/functoolz.py new file mode 100644 index 0000000000..2c75d3a42a --- /dev/null +++ b/contrib/python/toolz/py3/toolz/functoolz.py @@ -0,0 +1,1048 @@ +from functools import reduce, partial +import inspect +import sys +from operator import attrgetter, not_ +from importlib import import_module +from types import MethodType + +from .utils import no_default + +PYPY = hasattr(sys, 'pypy_version_info') and sys.version_info[0] > 2 + + +__all__ = ('identity', 'apply', 'thread_first', 'thread_last', 'memoize', + 'compose', 'compose_left', 'pipe', 'complement', 'juxt', 'do', + 'curry', 'flip', 'excepts') + +PYPY = hasattr(sys, 'pypy_version_info') + + +def identity(x): + """ Identity function. Return x + + >>> identity(3) + 3 + """ + return x + + +def apply(*func_and_args, **kwargs): + """ Applies a function and returns the results + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> apply(double, 5) + 10 + + >>> tuple(map(apply, [double, inc, double], [10, 500, 8000])) + (20, 501, 16000) + """ + if not func_and_args: + raise TypeError('func argument is required') + func, args = func_and_args[0], func_and_args[1:] + return func(*args, **kwargs) + + +def thread_first(val, *forms): + """ Thread value through a sequence of functions/forms + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> thread_first(1, inc, double) + 4 + + If the function expects more than one input you can specify those inputs + in a tuple. The value is used as the first input. + + >>> def add(x, y): return x + y + >>> def pow(x, y): return x**y + >>> thread_first(1, (add, 4), (pow, 2)) # pow(add(1, 4), 2) + 25 + + So in general + thread_first(x, f, (g, y, z)) + expands to + g(f(x), y, z) + + See Also: + thread_last + """ + def evalform_front(val, form): + if callable(form): + return form(val) + if isinstance(form, tuple): + func, args = form[0], form[1:] + args = (val,) + args + return func(*args) + return reduce(evalform_front, forms, val) + + +def thread_last(val, *forms): + """ Thread value through a sequence of functions/forms + + >>> def double(x): return 2*x + >>> def inc(x): return x + 1 + >>> thread_last(1, inc, double) + 4 + + If the function expects more than one input you can specify those inputs + in a tuple. The value is used as the last input. + + >>> def add(x, y): return x + y + >>> def pow(x, y): return x**y + >>> thread_last(1, (add, 4), (pow, 2)) # pow(2, add(4, 1)) + 32 + + So in general + thread_last(x, f, (g, y, z)) + expands to + g(y, z, f(x)) + + >>> def iseven(x): + ... return x % 2 == 0 + >>> list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) + [2, 4] + + See Also: + thread_first + """ + def evalform_back(val, form): + if callable(form): + return form(val) + if isinstance(form, tuple): + func, args = form[0], form[1:] + args = args + (val,) + return func(*args) + return reduce(evalform_back, forms, val) + + +def instanceproperty(fget=None, fset=None, fdel=None, doc=None, classval=None): + """ Like @property, but returns ``classval`` when used as a class attribute + + >>> class MyClass(object): + ... '''The class docstring''' + ... @instanceproperty(classval=__doc__) + ... def __doc__(self): + ... return 'An object docstring' + ... @instanceproperty + ... def val(self): + ... return 42 + ... + >>> MyClass.__doc__ + 'The class docstring' + >>> MyClass.val is None + True + >>> obj = MyClass() + >>> obj.__doc__ + 'An object docstring' + >>> obj.val + 42 + """ + if fget is None: + return partial(instanceproperty, fset=fset, fdel=fdel, doc=doc, + classval=classval) + return InstanceProperty(fget=fget, fset=fset, fdel=fdel, doc=doc, + classval=classval) + + +class InstanceProperty(property): + """ Like @property, but returns ``classval`` when used as a class attribute + + Should not be used directly. Use ``instanceproperty`` instead. + """ + def __init__(self, fget=None, fset=None, fdel=None, doc=None, + classval=None): + self.classval = classval + property.__init__(self, fget=fget, fset=fset, fdel=fdel, doc=doc) + + def __get__(self, obj, type=None): + if obj is None: + return self.classval + return property.__get__(self, obj, type) + + def __reduce__(self): + state = (self.fget, self.fset, self.fdel, self.__doc__, self.classval) + return InstanceProperty, state + + +class curry(object): + """ Curry a callable function + + Enables partial application of arguments through calling a function with an + incomplete set of arguments. + + >>> def mul(x, y): + ... return x * y + >>> mul = curry(mul) + + >>> double = mul(2) + >>> double(10) + 20 + + Also supports keyword arguments + + >>> @curry # Can use curry as a decorator + ... def f(x, y, a=10): + ... return a * (x + y) + + >>> add = f(a=1) + >>> add(2, 3) + 5 + + See Also: + toolz.curried - namespace of curried functions + https://toolz.readthedocs.io/en/latest/curry.html + """ + def __init__(self, *args, **kwargs): + if not args: + raise TypeError('__init__() takes at least 2 arguments (1 given)') + func, args = args[0], args[1:] + if not callable(func): + raise TypeError("Input must be callable") + + # curry- or functools.partial-like object? Unpack and merge arguments + if ( + hasattr(func, 'func') + and hasattr(func, 'args') + and hasattr(func, 'keywords') + and isinstance(func.args, tuple) + ): + _kwargs = {} + if func.keywords: + _kwargs.update(func.keywords) + _kwargs.update(kwargs) + kwargs = _kwargs + args = func.args + args + func = func.func + + if kwargs: + self._partial = partial(func, *args, **kwargs) + else: + self._partial = partial(func, *args) + + self.__doc__ = getattr(func, '__doc__', None) + self.__name__ = getattr(func, '__name__', '<curry>') + self.__module__ = getattr(func, '__module__', None) + self.__qualname__ = getattr(func, '__qualname__', None) + self._sigspec = None + self._has_unknown_args = None + + @instanceproperty + def func(self): + return self._partial.func + + @instanceproperty + def __signature__(self): + sig = inspect.signature(self.func) + args = self.args or () + keywords = self.keywords or {} + if is_partial_args(self.func, args, keywords, sig) is False: + raise TypeError('curry object has incorrect arguments') + + params = list(sig.parameters.values()) + skip = 0 + for param in params[:len(args)]: + if param.kind == param.VAR_POSITIONAL: + break + skip += 1 + + kwonly = False + newparams = [] + for param in params[skip:]: + kind = param.kind + default = param.default + if kind == param.VAR_KEYWORD: + pass + elif kind == param.VAR_POSITIONAL: + if kwonly: + continue + elif param.name in keywords: + default = keywords[param.name] + kind = param.KEYWORD_ONLY + kwonly = True + else: + if kwonly: + kind = param.KEYWORD_ONLY + if default is param.empty: + default = no_default + newparams.append(param.replace(default=default, kind=kind)) + + return sig.replace(parameters=newparams) + + @instanceproperty + def args(self): + return self._partial.args + + @instanceproperty + def keywords(self): + return self._partial.keywords + + @instanceproperty + def func_name(self): + return self.__name__ + + def __str__(self): + return str(self.func) + + def __repr__(self): + return repr(self.func) + + def __hash__(self): + return hash((self.func, self.args, + frozenset(self.keywords.items()) if self.keywords + else None)) + + def __eq__(self, other): + return (isinstance(other, curry) and self.func == other.func and + self.args == other.args and self.keywords == other.keywords) + + def __ne__(self, other): + return not self.__eq__(other) + + def __call__(self, *args, **kwargs): + try: + return self._partial(*args, **kwargs) + except TypeError as exc: + if self._should_curry(args, kwargs, exc): + return self.bind(*args, **kwargs) + raise + + def _should_curry(self, args, kwargs, exc=None): + func = self.func + args = self.args + args + if self.keywords: + kwargs = dict(self.keywords, **kwargs) + if self._sigspec is None: + sigspec = self._sigspec = _sigs.signature_or_spec(func) + self._has_unknown_args = has_varargs(func, sigspec) is not False + else: + sigspec = self._sigspec + + if is_partial_args(func, args, kwargs, sigspec) is False: + # Nothing can make the call valid + return False + elif self._has_unknown_args: + # The call may be valid and raised a TypeError, but we curry + # anyway because the function may have `*args`. This is useful + # for decorators with signature `func(*args, **kwargs)`. + return True + elif not is_valid_args(func, args, kwargs, sigspec): + # Adding more arguments may make the call valid + return True + else: + # There was a genuine TypeError + return False + + def bind(self, *args, **kwargs): + return type(self)(self, *args, **kwargs) + + def call(self, *args, **kwargs): + return self._partial(*args, **kwargs) + + def __get__(self, instance, owner): + if instance is None: + return self + return curry(self, instance) + + def __reduce__(self): + func = self.func + modname = getattr(func, '__module__', None) + qualname = getattr(func, '__qualname__', None) + if qualname is None: # pragma: no cover + qualname = getattr(func, '__name__', None) + is_decorated = None + if modname and qualname: + attrs = [] + obj = import_module(modname) + for attr in qualname.split('.'): + if isinstance(obj, curry): + attrs.append('func') + obj = obj.func + obj = getattr(obj, attr, None) + if obj is None: + break + attrs.append(attr) + if isinstance(obj, curry) and obj.func is func: + is_decorated = obj is self + qualname = '.'.join(attrs) + func = '%s:%s' % (modname, qualname) + + # functools.partial objects can't be pickled + userdict = tuple((k, v) for k, v in self.__dict__.items() + if k not in ('_partial', '_sigspec')) + state = (type(self), func, self.args, self.keywords, userdict, + is_decorated) + return _restore_curry, state + + +def _restore_curry(cls, func, args, kwargs, userdict, is_decorated): + if isinstance(func, str): + modname, qualname = func.rsplit(':', 1) + obj = import_module(modname) + for attr in qualname.split('.'): + obj = getattr(obj, attr) + if is_decorated: + return obj + func = obj.func + obj = cls(func, *args, **(kwargs or {})) + obj.__dict__.update(userdict) + return obj + + +@curry +def memoize(func, cache=None, key=None): + """ Cache a function's result for speedy future evaluation + + Considerations: + Trades memory for speed. + Only use on pure functions. + + >>> def add(x, y): return x + y + >>> add = memoize(add) + + Or use as a decorator + + >>> @memoize + ... def add(x, y): + ... return x + y + + Use the ``cache`` keyword to provide a dict-like object as an initial cache + + >>> @memoize(cache={(1, 2): 3}) + ... def add(x, y): + ... return x + y + + Note that the above works as a decorator because ``memoize`` is curried. + + It is also possible to provide a ``key(args, kwargs)`` function that + calculates keys used for the cache, which receives an ``args`` tuple and + ``kwargs`` dict as input, and must return a hashable value. However, + the default key function should be sufficient most of the time. + + >>> # Use key function that ignores extraneous keyword arguments + >>> @memoize(key=lambda args, kwargs: args) + ... def add(x, y, verbose=False): + ... if verbose: + ... print('Calculating %s + %s' % (x, y)) + ... return x + y + """ + if cache is None: + cache = {} + + try: + may_have_kwargs = has_keywords(func) is not False + # Is unary function (single arg, no variadic argument or keywords)? + is_unary = is_arity(1, func) + except TypeError: # pragma: no cover + may_have_kwargs = True + is_unary = False + + if key is None: + if is_unary: + def key(args, kwargs): + return args[0] + elif may_have_kwargs: + def key(args, kwargs): + return ( + args or None, + frozenset(kwargs.items()) if kwargs else None, + ) + else: + def key(args, kwargs): + return args + + def memof(*args, **kwargs): + k = key(args, kwargs) + try: + return cache[k] + except TypeError: + raise TypeError("Arguments to memoized function must be hashable") + except KeyError: + cache[k] = result = func(*args, **kwargs) + return result + + try: + memof.__name__ = func.__name__ + except AttributeError: + pass + memof.__doc__ = func.__doc__ + memof.__wrapped__ = func + return memof + + +class Compose(object): + """ A composition of functions + + See Also: + compose + """ + __slots__ = 'first', 'funcs' + + def __init__(self, funcs): + funcs = tuple(reversed(funcs)) + self.first = funcs[0] + self.funcs = funcs[1:] + + def __call__(self, *args, **kwargs): + ret = self.first(*args, **kwargs) + for f in self.funcs: + ret = f(ret) + return ret + + def __getstate__(self): + return self.first, self.funcs + + def __setstate__(self, state): + self.first, self.funcs = state + + @instanceproperty(classval=__doc__) + def __doc__(self): + def composed_doc(*fs): + """Generate a docstring for the composition of fs. + """ + if not fs: + # Argument name for the docstring. + return '*args, **kwargs' + + return '{f}({g})'.format(f=fs[0].__name__, g=composed_doc(*fs[1:])) + + try: + return ( + 'lambda *args, **kwargs: ' + + composed_doc(*reversed((self.first,) + self.funcs)) + ) + except AttributeError: + # One of our callables does not have a `__name__`, whatever. + return 'A composition of functions' + + @property + def __name__(self): + try: + return '_of_'.join( + (f.__name__ for f in reversed((self.first,) + self.funcs)) + ) + except AttributeError: + return type(self).__name__ + + def __repr__(self): + return '{.__class__.__name__}{!r}'.format( + self, tuple(reversed((self.first, ) + self.funcs))) + + def __eq__(self, other): + if isinstance(other, Compose): + return other.first == self.first and other.funcs == self.funcs + return NotImplemented + + def __ne__(self, other): + equality = self.__eq__(other) + return NotImplemented if equality is NotImplemented else not equality + + def __hash__(self): + return hash(self.first) ^ hash(self.funcs) + + # Mimic the descriptor behavior of python functions. + # i.e. let Compose be called as a method when bound to a class. + # adapted from + # docs.python.org/3/howto/descriptor.html#functions-and-methods + def __get__(self, obj, objtype=None): + return self if obj is None else MethodType(self, obj) + + # introspection with Signature is only possible from py3.3+ + @instanceproperty + def __signature__(self): + base = inspect.signature(self.first) + last = inspect.signature(self.funcs[-1]) + return base.replace(return_annotation=last.return_annotation) + + __wrapped__ = instanceproperty(attrgetter('first')) + + +def compose(*funcs): + """ Compose functions to operate in series. + + Returns a function that applies other functions in sequence. + + Functions are applied from right to left so that + ``compose(f, g, h)(x, y)`` is the same as ``f(g(h(x, y)))``. + + If no arguments are provided, the identity function (f(x) = x) is returned. + + >>> inc = lambda i: i + 1 + >>> compose(str, inc)(3) + '4' + + See Also: + compose_left + pipe + """ + if not funcs: + return identity + if len(funcs) == 1: + return funcs[0] + else: + return Compose(funcs) + + +def compose_left(*funcs): + """ Compose functions to operate in series. + + Returns a function that applies other functions in sequence. + + Functions are applied from left to right so that + ``compose_left(f, g, h)(x, y)`` is the same as ``h(g(f(x, y)))``. + + If no arguments are provided, the identity function (f(x) = x) is returned. + + >>> inc = lambda i: i + 1 + >>> compose_left(inc, str)(3) + '4' + + See Also: + compose + pipe + """ + return compose(*reversed(funcs)) + + +def pipe(data, *funcs): + """ Pipe a value through a sequence of functions + + I.e. ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))`` + + We think of the value as progressing through a pipe of several + transformations, much like pipes in UNIX + + ``$ cat data | f | g | h`` + + >>> double = lambda i: 2 * i + >>> pipe(3, double, str) + '6' + + See Also: + compose + compose_left + thread_first + thread_last + """ + for func in funcs: + data = func(data) + return data + + +def complement(func): + """ Convert a predicate function to its logical complement. + + In other words, return a function that, for inputs that normally + yield True, yields False, and vice-versa. + + >>> def iseven(n): return n % 2 == 0 + >>> isodd = complement(iseven) + >>> iseven(2) + True + >>> isodd(2) + False + """ + return compose(not_, func) + + +class juxt(object): + """ Creates a function that calls several functions with the same arguments + + Takes several functions and returns a function that applies its arguments + to each of those functions then returns a tuple of the results. + + Name comes from juxtaposition: the fact of two things being seen or placed + close together with contrasting effect. + + >>> inc = lambda x: x + 1 + >>> double = lambda x: x * 2 + >>> juxt(inc, double)(10) + (11, 20) + >>> juxt([inc, double])(10) + (11, 20) + """ + __slots__ = ['funcs'] + + def __init__(self, *funcs): + if len(funcs) == 1 and not callable(funcs[0]): + funcs = funcs[0] + self.funcs = tuple(funcs) + + def __call__(self, *args, **kwargs): + return tuple(func(*args, **kwargs) for func in self.funcs) + + def __getstate__(self): + return self.funcs + + def __setstate__(self, state): + self.funcs = state + + +def do(func, x): + """ Runs ``func`` on ``x``, returns ``x`` + + Because the results of ``func`` are not returned, only the side + effects of ``func`` are relevant. + + Logging functions can be made by composing ``do`` with a storage function + like ``list.append`` or ``file.write`` + + >>> from toolz import compose + >>> from toolz.curried import do + + >>> log = [] + >>> inc = lambda x: x + 1 + >>> inc = compose(inc, do(log.append)) + >>> inc(1) + 2 + >>> inc(11) + 12 + >>> log + [1, 11] + """ + func(x) + return x + + +@curry +def flip(func, a, b): + """ Call the function call with the arguments flipped + + This function is curried. + + >>> def div(a, b): + ... return a // b + ... + >>> flip(div, 2, 6) + 3 + >>> div_by_two = flip(div, 2) + >>> div_by_two(4) + 2 + + This is particularly useful for built in functions and functions defined + in C extensions that accept positional only arguments. For example: + isinstance, issubclass. + + >>> data = [1, 'a', 'b', 2, 1.5, object(), 3] + >>> only_ints = list(filter(flip(isinstance, int), data)) + >>> only_ints + [1, 2, 3] + """ + return func(b, a) + + +def return_none(exc): + """ Returns None. + """ + return None + + +class excepts(object): + """A wrapper around a function to catch exceptions and + dispatch to a handler. + + This is like a functional try/except block, in the same way that + ifexprs are functional if/else blocks. + + Examples + -------- + >>> excepting = excepts( + ... ValueError, + ... lambda a: [1, 2].index(a), + ... lambda _: -1, + ... ) + >>> excepting(1) + 0 + >>> excepting(3) + -1 + + Multiple exceptions and default except clause. + >>> excepting = excepts((IndexError, KeyError), lambda a: a[0]) + >>> excepting([]) + >>> excepting([1]) + 1 + >>> excepting({}) + >>> excepting({0: 1}) + 1 + """ + def __init__(self, exc, func, handler=return_none): + self.exc = exc + self.func = func + self.handler = handler + + def __call__(self, *args, **kwargs): + try: + return self.func(*args, **kwargs) + except self.exc as e: + return self.handler(e) + + @instanceproperty(classval=__doc__) + def __doc__(self): + from textwrap import dedent + + exc = self.exc + try: + if isinstance(exc, tuple): + exc_name = '(%s)' % ', '.join( + map(attrgetter('__name__'), exc), + ) + else: + exc_name = exc.__name__ + + return dedent( + """\ + A wrapper around {inst.func.__name__!r} that will except: + {exc} + and handle any exceptions with {inst.handler.__name__!r}. + + Docs for {inst.func.__name__!r}: + {inst.func.__doc__} + + Docs for {inst.handler.__name__!r}: + {inst.handler.__doc__} + """ + ).format( + inst=self, + exc=exc_name, + ) + except AttributeError: + return type(self).__doc__ + + @property + def __name__(self): + exc = self.exc + try: + if isinstance(exc, tuple): + exc_name = '_or_'.join(map(attrgetter('__name__'), exc)) + else: + exc_name = exc.__name__ + return '%s_excepting_%s' % (self.func.__name__, exc_name) + except AttributeError: + return 'excepting' + + +def _check_sigspec(sigspec, func, builtin_func, *builtin_args): + if sigspec is None: + try: + sigspec = inspect.signature(func) + except (ValueError, TypeError) as e: + sigspec = e + if isinstance(sigspec, ValueError): + return None, builtin_func(*builtin_args) + elif not isinstance(sigspec, inspect.Signature): + if ( + func in _sigs.signatures + and (( + hasattr(func, '__signature__') + and hasattr(func.__signature__, '__get__') + )) + ): + val = builtin_func(*builtin_args) + return None, val + return None, False + return sigspec, None + + +if PYPY: # pragma: no cover + _check_sigspec_orig = _check_sigspec + + def _check_sigspec(sigspec, func, builtin_func, *builtin_args): + # PyPy may lie, so use our registry for builtins instead + if func in _sigs.signatures: + val = builtin_func(*builtin_args) + return None, val + return _check_sigspec_orig(sigspec, func, builtin_func, *builtin_args) + + +_check_sigspec.__doc__ = """ \ +Private function to aid in introspection compatibly across Python versions. + +If a callable doesn't have a signature (Python 3) or an argspec (Python 2), +the signature registry in toolz._signatures is used. +""" + + +def num_required_args(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._num_required_args, + func) + if sigspec is None: + return rv + return sum(1 for p in sigspec.parameters.values() + if p.default is p.empty + and p.kind in (p.POSITIONAL_OR_KEYWORD, p.POSITIONAL_ONLY)) + + +def has_varargs(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_varargs, func) + if sigspec is None: + return rv + return any(p.kind == p.VAR_POSITIONAL + for p in sigspec.parameters.values()) + + +def has_keywords(func, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._has_keywords, func) + if sigspec is None: + return rv + return any(p.default is not p.empty + or p.kind in (p.KEYWORD_ONLY, p.VAR_KEYWORD) + for p in sigspec.parameters.values()) + + +def is_valid_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_valid_args, + func, args, kwargs) + if sigspec is None: + return rv + try: + sigspec.bind(*args, **kwargs) + except TypeError: + return False + return True + + +def is_partial_args(func, args, kwargs, sigspec=None): + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_partial_args, + func, args, kwargs) + if sigspec is None: + return rv + try: + sigspec.bind_partial(*args, **kwargs) + except TypeError: + return False + return True + + +def is_arity(n, func, sigspec=None): + """ Does a function have only n positional arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x): + ... return x + >>> is_arity(1, f) + True + >>> def g(x, y=1): + ... return x + y + >>> is_arity(1, g) + False + """ + sigspec, rv = _check_sigspec(sigspec, func, _sigs._is_arity, n, func) + if sigspec is None: + return rv + num = num_required_args(func, sigspec) + if num is not None: + num = num == n + if not num: + return False + varargs = has_varargs(func, sigspec) + if varargs: + return False + keywords = has_keywords(func, sigspec) + if keywords: + return False + if num is None or varargs is None or keywords is None: # pragma: no cover + return None + return True + + +num_required_args.__doc__ = """ \ +Number of required positional arguments + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x, y, z=3): + ... return x + y + z + >>> num_required_args(f) + 2 + >>> def g(*args, **kwargs): + ... pass + >>> num_required_args(g) + 0 + """ + +has_varargs.__doc__ = """ \ +Does a function have variadic positional arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(*args): + ... return args + >>> has_varargs(f) + True + >>> def g(**kwargs): + ... return kwargs + >>> has_varargs(g) + False + """ + +has_keywords.__doc__ = """ \ +Does a function have keyword arguments? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def f(x, y=0): + ... return x + y + + >>> has_keywords(f) + True + """ + +is_valid_args.__doc__ = """ \ +Is ``func(*args, **kwargs)`` a valid function call? + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def add(x, y): + ... return x + y + + >>> is_valid_args(add, (1,), {}) + False + >>> is_valid_args(add, (1, 2), {}) + True + >>> is_valid_args(map, (), {}) + False + + **Implementation notes** + Python 2 relies on ``inspect.getargspec``, which only works for + user-defined functions. Python 3 uses ``inspect.signature``, which + works for many more types of callables. + + Many builtins in the standard library are also supported. + """ + +is_partial_args.__doc__ = """ \ +Can partial(func, *args, **kwargs)(*args2, **kwargs2) be a valid call? + + Returns True *only* if the call is valid or if it is possible for the + call to become valid by adding more positional or keyword arguments. + + This function relies on introspection and does not call the function. + Returns None if validity can't be determined. + + >>> def add(x, y): + ... return x + y + + >>> is_partial_args(add, (1,), {}) + True + >>> is_partial_args(add, (1, 2), {}) + True + >>> is_partial_args(add, (1, 2, 3), {}) + False + >>> is_partial_args(map, (), {}) + True + + **Implementation notes** + Python 2 relies on ``inspect.getargspec``, which only works for + user-defined functions. Python 3 uses ``inspect.signature``, which + works for many more types of callables. + + Many builtins in the standard library are also supported. + """ + +from . import _signatures as _sigs diff --git a/contrib/python/toolz/py3/toolz/itertoolz.py b/contrib/python/toolz/py3/toolz/itertoolz.py new file mode 100644 index 0000000000..5049e5eb4b --- /dev/null +++ b/contrib/python/toolz/py3/toolz/itertoolz.py @@ -0,0 +1,1057 @@ +import itertools +import heapq +import collections +import operator +from functools import partial +from itertools import filterfalse, zip_longest +from collections.abc import Sequence +from toolz.utils import no_default + + +__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', + 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', + 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', + 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', + 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') + + +def remove(predicate, seq): + """ Return those items of sequence for which predicate(item) is False + + >>> def iseven(x): + ... return x % 2 == 0 + >>> list(remove(iseven, [1, 2, 3, 4])) + [1, 3] + """ + return filterfalse(predicate, seq) + + +def accumulate(binop, seq, initial=no_default): + """ Repeatedly apply binary function to a sequence, accumulating results + + >>> from operator import add, mul + >>> list(accumulate(add, [1, 2, 3, 4, 5])) + [1, 3, 6, 10, 15] + >>> list(accumulate(mul, [1, 2, 3, 4, 5])) + [1, 2, 6, 24, 120] + + Accumulate is similar to ``reduce`` and is good for making functions like + cumulative sum: + + >>> from functools import partial, reduce + >>> sum = partial(reduce, add) + >>> cumsum = partial(accumulate, add) + + Accumulate also takes an optional argument that will be used as the first + value. This is similar to reduce. + + >>> list(accumulate(add, [1, 2, 3], -1)) + [-1, 0, 2, 5] + >>> list(accumulate(add, [], 1)) + [1] + + See Also: + itertools.accumulate : In standard itertools for Python 3.2+ + """ + seq = iter(seq) + if initial == no_default: + try: + result = next(seq) + except StopIteration: + return + else: + result = initial + yield result + for elem in seq: + result = binop(result, elem) + yield result + + +def groupby(key, seq): + """ Group a collection by a key function + + >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] + >>> groupby(len, names) # doctest: +SKIP + {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} + + >>> iseven = lambda x: x % 2 == 0 + >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP + {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} + + Non-callable keys imply grouping on a member. + + >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, + ... {'name': 'Bob', 'gender': 'M'}, + ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP + {'F': [{'gender': 'F', 'name': 'Alice'}], + 'M': [{'gender': 'M', 'name': 'Bob'}, + {'gender': 'M', 'name': 'Charlie'}]} + + Not to be confused with ``itertools.groupby`` + + See Also: + countby + """ + if not callable(key): + key = getter(key) + d = collections.defaultdict(lambda: [].append) + for item in seq: + d[key(item)](item) + rv = {} + for k, v in d.items(): + rv[k] = v.__self__ + return rv + + +def merge_sorted(*seqs, **kwargs): + """ Merge and sort a collection of sorted collections + + This works lazily and only keeps one value from each iterable in memory. + + >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) + [1, 2, 3, 4, 5, 6] + + >>> ''.join(merge_sorted('abc', 'abc', 'abc')) + 'aaabbbccc' + + The "key" function used to sort the input may be passed as a keyword. + + >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) + [2, 1, 3, 3] + """ + if len(seqs) == 0: + return iter([]) + elif len(seqs) == 1: + return iter(seqs[0]) + + key = kwargs.get('key', None) + if key is None: + return _merge_sorted_binary(seqs) + else: + return _merge_sorted_binary_key(seqs, key) + + +def _merge_sorted_binary(seqs): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary(L1) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary(L2) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + + for val1 in seq1: + if val2 < val1: + yield val2 + for val2 in seq2: + if val2 < val1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def _merge_sorted_binary_key(seqs, key): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary_key(L1, key) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary_key(L2, key) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + key2 = key(val2) + + for val1 in seq1: + key1 = key(val1) + if key2 < key1: + yield val2 + for val2 in seq2: + key2 = key(val2) + if key2 < key1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def interleave(seqs): + """ Interleave a sequence of sequences + + >>> list(interleave([[1, 2], [3, 4]])) + [1, 3, 2, 4] + + >>> ''.join(interleave(('ABC', 'XY'))) + 'AXBYC' + + Both the individual sequences and the sequence of sequences may be infinite + + Returns a lazy iterator + """ + iters = itertools.cycle(map(iter, seqs)) + while True: + try: + for itr in iters: + yield next(itr) + return + except StopIteration: + predicate = partial(operator.is_not, itr) + iters = itertools.cycle(itertools.takewhile(predicate, iters)) + + +def unique(seq, key=None): + """ Return only unique elements of a sequence + + >>> tuple(unique((1, 2, 3))) + (1, 2, 3) + >>> tuple(unique((1, 2, 1, 3))) + (1, 2, 3) + + Uniqueness can be defined by key keyword + + >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) + ('cat', 'mouse') + """ + seen = set() + seen_add = seen.add + if key is None: + for item in seq: + if item not in seen: + seen_add(item) + yield item + else: # calculate key + for item in seq: + val = key(item) + if val not in seen: + seen_add(val) + yield item + + +def isiterable(x): + """ Is x iterable? + + >>> isiterable([1, 2, 3]) + True + >>> isiterable('abc') + True + >>> isiterable(5) + False + """ + try: + iter(x) + return True + except TypeError: + return False + + +def isdistinct(seq): + """ All values in sequence are distinct + + >>> isdistinct([1, 2, 3]) + True + >>> isdistinct([1, 2, 1]) + False + + >>> isdistinct("Hello") + False + >>> isdistinct("World") + True + """ + if iter(seq) is seq: + seen = set() + seen_add = seen.add + for item in seq: + if item in seen: + return False + seen_add(item) + return True + else: + return len(seq) == len(set(seq)) + + +def take(n, seq): + """ The first n elements of a sequence + + >>> list(take(2, [10, 20, 30, 40, 50])) + [10, 20] + + See Also: + drop + tail + """ + return itertools.islice(seq, n) + + +def tail(n, seq): + """ The last n elements of a sequence + + >>> tail(2, [10, 20, 30, 40, 50]) + [40, 50] + + See Also: + drop + take + """ + try: + return seq[-n:] + except (TypeError, KeyError): + return tuple(collections.deque(seq, n)) + + +def drop(n, seq): + """ The sequence following the first n elements + + >>> list(drop(2, [10, 20, 30, 40, 50])) + [30, 40, 50] + + See Also: + take + tail + """ + return itertools.islice(seq, n, None) + + +def take_nth(n, seq): + """ Every nth item in seq + + >>> list(take_nth(2, [10, 20, 30, 40, 50])) + [10, 30, 50] + """ + return itertools.islice(seq, 0, None, n) + + +def first(seq): + """ The first element in a sequence + + >>> first('ABC') + 'A' + """ + return next(iter(seq)) + + +def second(seq): + """ The second element in a sequence + + >>> second('ABC') + 'B' + """ + seq = iter(seq) + next(seq) + return next(seq) + + +def nth(n, seq): + """ The nth element in a sequence + + >>> nth(1, 'ABC') + 'B' + """ + if isinstance(seq, (tuple, list, Sequence)): + return seq[n] + else: + return next(itertools.islice(seq, n, None)) + + +def last(seq): + """ The last element in a sequence + + >>> last('ABC') + 'C' + """ + return tail(1, seq)[0] + + +rest = partial(drop, 1) + + +def _get(ind, seq, default): + try: + return seq[ind] + except (KeyError, IndexError): + return default + + +def get(ind, seq, default=no_default): + """ Get element in a sequence or dict + + Provides standard indexing + + >>> get(1, 'ABC') # Same as 'ABC'[1] + 'B' + + Pass a list to get multiple values + + >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) + ('B', 'C') + + Works on any value that supports indexing/getitem + For example here we see that it works with dictionaries + + >>> phonebook = {'Alice': '555-1234', + ... 'Bob': '555-5678', + ... 'Charlie':'555-9999'} + >>> get('Alice', phonebook) + '555-1234' + + >>> get(['Alice', 'Bob'], phonebook) + ('555-1234', '555-5678') + + Provide a default for missing values + + >>> get(['Alice', 'Dennis'], phonebook, None) + ('555-1234', None) + + See Also: + pluck + """ + try: + return seq[ind] + except TypeError: # `ind` may be a list + if isinstance(ind, list): + if default == no_default: + if len(ind) > 1: + return operator.itemgetter(*ind)(seq) + elif ind: + return seq[ind[0]], + else: + return () + else: + return tuple(_get(i, seq, default) for i in ind) + elif default != no_default: + return default + else: + raise + except (KeyError, IndexError): # we know `ind` is not a list + if default == no_default: + raise + else: + return default + + +def concat(seqs): + """ Concatenate zero or more iterables, any of which may be infinite. + + An infinite sequence will prevent the rest of the arguments from + being included. + + We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs + can be a generator. + + >>> list(concat([[], [1], [2, 3]])) + [1, 2, 3] + + See also: + itertools.chain.from_iterable equivalent + """ + return itertools.chain.from_iterable(seqs) + + +def concatv(*seqs): + """ Variadic version of concat + + >>> list(concatv([], ["a"], ["b", "c"])) + ['a', 'b', 'c'] + + See also: + itertools.chain + """ + return concat(seqs) + + +def mapcat(func, seqs): + """ Apply func to each sequence in seqs, concatenating results. + + >>> list(mapcat(lambda s: [c.upper() for c in s], + ... [["a", "b"], ["c", "d", "e"]])) + ['A', 'B', 'C', 'D', 'E'] + """ + return concat(map(func, seqs)) + + +def cons(el, seq): + """ Add el to beginning of (possibly infinite) sequence seq. + + >>> list(cons(1, [2, 3])) + [1, 2, 3] + """ + return itertools.chain([el], seq) + + +def interpose(el, seq): + """ Introduce element between each pair of elements in seq + + >>> list(interpose("a", [1, 2, 3])) + [1, 'a', 2, 'a', 3] + """ + inposed = concat(zip(itertools.repeat(el), seq)) + next(inposed) + return inposed + + +def frequencies(seq): + """ Find number of occurrences of each value in seq + + >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP + {'cat': 3, 'ox': 1, 'pig': 2} + + See Also: + countby + groupby + """ + d = collections.defaultdict(int) + for item in seq: + d[item] += 1 + return dict(d) + + +def reduceby(key, binop, seq, init=no_default): + """ Perform a simultaneous groupby and reduction + + The computation: + + >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP + + is equivalent to the following: + + >>> def reduction(group): # doctest: +SKIP + ... return reduce(binop, group, init) # doctest: +SKIP + + >>> groups = groupby(key, seq) # doctest: +SKIP + >>> result = valmap(reduction, groups) # doctest: +SKIP + + But the former does not build the intermediate groups, allowing it to + operate in much less space. This makes it suitable for larger datasets + that do not fit comfortably in memory + + The ``init`` keyword argument is the default initialization of the + reduction. This can be either a constant value like ``0`` or a callable + like ``lambda : 0`` as might be used in ``defaultdict``. + + Simple Examples + --------------- + + >>> from operator import add, mul + >>> iseven = lambda x: x % 2 == 0 + + >>> data = [1, 2, 3, 4, 5] + + >>> reduceby(iseven, add, data) # doctest: +SKIP + {False: 9, True: 6} + + >>> reduceby(iseven, mul, data) # doctest: +SKIP + {False: 15, True: 8} + + Complex Example + --------------- + + >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, + ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, + ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, + ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] + + >>> reduceby('state', # doctest: +SKIP + ... lambda acc, x: acc + x['cost'], + ... projects, 0) + {'CA': 1200000, 'IL': 2100000} + + Example Using ``init`` + ---------------------- + + >>> def set_add(s, i): + ... s.add(i) + ... return s + + >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP + {True: set([2, 4]), + False: set([1, 3])} + """ + is_no_default = init == no_default + if not is_no_default and not callable(init): + _init = init + init = lambda: _init + if not callable(key): + key = getter(key) + d = {} + for item in seq: + k = key(item) + if k not in d: + if is_no_default: + d[k] = item + continue + else: + d[k] = init() + d[k] = binop(d[k], item) + return d + + +def iterate(func, x): + """ Repeatedly apply a function func onto an original input + + Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. + + >>> def inc(x): return x + 1 + >>> counter = iterate(inc, 0) + >>> next(counter) + 0 + >>> next(counter) + 1 + >>> next(counter) + 2 + + >>> double = lambda x: x * 2 + >>> powers_of_two = iterate(double, 1) + >>> next(powers_of_two) + 1 + >>> next(powers_of_two) + 2 + >>> next(powers_of_two) + 4 + >>> next(powers_of_two) + 8 + """ + while True: + yield x + x = func(x) + + +def sliding_window(n, seq): + """ A sequence of overlapping subsequences + + >>> list(sliding_window(2, [1, 2, 3, 4])) + [(1, 2), (2, 3), (3, 4)] + + This function creates a sliding window suitable for transformations like + sliding means / smoothing + + >>> mean = lambda seq: float(sum(seq)) / len(seq) + >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) + [1.5, 2.5, 3.5] + """ + return zip(*(collections.deque(itertools.islice(it, i), 0) or it + for i, it in enumerate(itertools.tee(seq, n)))) + + +no_pad = '__no__pad__' + + +def partition(n, seq, pad=no_pad): + """ Partition sequence into tuples of length n + + >>> list(partition(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + If the length of ``seq`` is not evenly divisible by ``n``, the final tuple + is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: + + >>> list(partition(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4)] + + >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) + [(1, 2), (3, 4), (5, None)] + + See Also: + partition_all + """ + args = [iter(seq)] * n + if pad is no_pad: + return zip(*args) + else: + return zip_longest(*args, fillvalue=pad) + + +def partition_all(n, seq): + """ Partition all elements of sequence into tuples of length at most n + + The final tuple may be shorter to accommodate extra elements. + + >>> list(partition_all(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + >>> list(partition_all(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4), (5,)] + + See Also: + partition + """ + args = [iter(seq)] * n + it = zip_longest(*args, fillvalue=no_pad) + try: + prev = next(it) + except StopIteration: + return + for item in it: + yield prev + prev = item + if prev[-1] is no_pad: + try: + # If seq defines __len__, then + # we can quickly calculate where no_pad starts + yield prev[:len(seq) % n] + except TypeError: + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + # Binary search from CPython's bisect module, + # modified for identity testing. + lo, hi = 0, n + while lo < hi: + mid = (lo + hi) // 2 + if prev[mid] is no_pad: + hi = mid + else: + lo = mid + 1 + yield prev[:lo] + else: + yield prev + + +def count(seq): + """ Count the number of items in seq + + Like the builtin ``len`` but works on lazy sequences. + + Not to be confused with ``itertools.count`` + + See also: + len + """ + if hasattr(seq, '__len__'): + return len(seq) + return sum(1 for i in seq) + + +def pluck(ind, seqs, default=no_default): + """ plucks an element or several elements from each item in a sequence. + + ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more + elements of each item in the sequence. + + This is equivalent to running `map(curried.get(ind), seqs)` + + ``ind`` can be either a single string/index or a list of strings/indices. + ``seqs`` should be sequence containing sequences or dicts. + + e.g. + + >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] + >>> list(pluck('name', data)) + ['Cheese', 'Pies'] + >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) + [(1, 2), (4, 5)] + + See Also: + get + map + """ + if default == no_default: + get = getter(ind) + return map(get, seqs) + elif isinstance(ind, list): + return (tuple(_get(item, seq, default) for item in ind) + for seq in seqs) + return (_get(ind, seq, default) for seq in seqs) + + +def getter(index): + if isinstance(index, list): + if len(index) == 1: + index = index[0] + return lambda x: (x[index],) + elif index: + return operator.itemgetter(*index) + else: + return lambda x: () + else: + return operator.itemgetter(index) + + +def join(leftkey, leftseq, rightkey, rightseq, + left_default=no_default, right_default=no_default): + """ Join two sequences on common attributes + + This is a semi-streaming operation. The LEFT sequence is fully evaluated + and placed into memory. The RIGHT sequence is evaluated lazily and so can + be arbitrarily large. + (Note: If right_default is defined, then unique keys of rightseq + will also be stored in memory.) + + >>> friends = [('Alice', 'Edith'), + ... ('Alice', 'Zhao'), + ... ('Edith', 'Alice'), + ... ('Zhao', 'Alice'), + ... ('Zhao', 'Edith')] + + >>> cities = [('Alice', 'NYC'), + ... ('Alice', 'Chicago'), + ... ('Dan', 'Syndey'), + ... ('Edith', 'Paris'), + ... ('Edith', 'Berlin'), + ... ('Zhao', 'Shanghai')] + + >>> # Vacation opportunities + >>> # In what cities do people have friends? + >>> result = join(second, friends, + ... first, cities) + >>> for ((a, b), (c, d)) in sorted(unique(result)): + ... print((a, d)) + ('Alice', 'Berlin') + ('Alice', 'Paris') + ('Alice', 'Shanghai') + ('Edith', 'Chicago') + ('Edith', 'NYC') + ('Zhao', 'Chicago') + ('Zhao', 'NYC') + ('Zhao', 'Berlin') + ('Zhao', 'Paris') + + Specify outer joins with keyword arguments ``left_default`` and/or + ``right_default``. Here is a full outer join in which unmatched elements + are paired with None. + + >>> identity = lambda x: x + >>> list(join(identity, [1, 2, 3], + ... identity, [2, 3, 4], + ... left_default=None, right_default=None)) + [(2, 2), (3, 3), (None, 4), (1, None)] + + Usually the key arguments are callables to be applied to the sequences. If + the keys are not obviously callable then it is assumed that indexing was + intended, e.g. the following is a legal change. + The join is implemented as a hash join and the keys of leftseq must be + hashable. Additionally, if right_default is defined, then keys of rightseq + must also be hashable. + + >>> # result = join(second, friends, first, cities) + >>> result = join(1, friends, 0, cities) # doctest: +SKIP + """ + if not callable(leftkey): + leftkey = getter(leftkey) + if not callable(rightkey): + rightkey = getter(rightkey) + + d = groupby(leftkey, leftseq) + + if left_default == no_default and right_default == no_default: + # Inner Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + elif left_default != no_default and right_default == no_default: + # Right Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + elif right_default != no_default: + seen_keys = set() + seen = seen_keys.add + + if left_default == no_default: + # Left Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + # Full Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + + for key, matches in d.items(): + if key not in seen_keys: + for match in matches: + yield (match, right_default) + + +def diff(*seqs, **kwargs): + """ Return those items that differ between sequences + + >>> list(diff([1, 2, 3], [1, 2, 10, 100])) + [(3, 10)] + + Shorter sequences may be padded with a ``default`` value: + + >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) + [(3, 10), (None, 100)] + + A ``key`` function may also be applied to each item to use during + comparisons: + + >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) + [('bananas', 'Oranges')] + """ + N = len(seqs) + if N == 1 and isinstance(seqs[0], list): + seqs = seqs[0] + N = len(seqs) + if N < 2: + raise TypeError('Too few sequences given (min 2 required)') + default = kwargs.get('default', no_default) + if default == no_default: + iters = zip(*seqs) + else: + iters = zip_longest(*seqs, fillvalue=default) + key = kwargs.get('key', None) + if key is None: + for items in iters: + if items.count(items[0]) != N: + yield items + else: + for items in iters: + vals = tuple(map(key, items)) + if vals.count(vals[0]) != N: + yield items + + +def topk(k, seq, key=None): + """ Find the k largest elements of a sequence + + Operates lazily in ``n*log(k)`` time + + >>> topk(2, [1, 100, 10, 1000]) + (1000, 100) + + Use a key function to change sorted order + + >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) + ('Charlie', 'Alice') + + See also: + heapq.nlargest + """ + if key is not None and not callable(key): + key = getter(key) + return tuple(heapq.nlargest(k, seq, key=key)) + + +def peek(seq): + """ Retrieve the next element of a sequence + + Returns the first element and an iterable equivalent to the original + sequence, still having the element retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first, seq = peek(seq) + >>> first + 0 + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + item = next(iterator) + return item, itertools.chain((item,), iterator) + + +def peekn(n, seq): + """ Retrieve the next n elements of a sequence + + Returns a tuple of the first n elements and an iterable equivalent + to the original, still having the elements retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first_two, seq = peekn(2, seq) + >>> first_two + (0, 1) + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + peeked = tuple(take(n, iterator)) + return peeked, itertools.chain(iter(peeked), iterator) + + +def random_sample(prob, seq, random_state=None): + """ Return elements from a sequence with probability of prob + + Returns a lazy iterator of random items from seq. + + ``random_sample`` considers each item independently and without + replacement. See below how the first time it returned 13 items and the + next time it returned 6 items. + + >>> seq = list(range(100)) + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 44, 54, 61, 69, 94] + + Providing an integer seed for ``random_state`` will result in + deterministic sampling. Given the same seed it will return the same sample + every time. + + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + + ``random_state`` can also be any object with a method ``random`` that + returns floats between 0.0 and 1.0 (exclusive). + + >>> from random import Random + >>> randobj = Random(2016) + >>> list(random_sample(0.1, seq, random_state=randobj)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + """ + if not hasattr(random_state, 'random'): + from random import Random + + random_state = Random(random_state) + return filter(lambda _: random_state.random() < prob, seq) diff --git a/contrib/python/toolz/py3/toolz/recipes.py b/contrib/python/toolz/py3/toolz/recipes.py new file mode 100644 index 0000000000..89de88db2b --- /dev/null +++ b/contrib/python/toolz/py3/toolz/recipes.py @@ -0,0 +1,46 @@ +import itertools +from .itertoolz import frequencies, pluck, getter + + +__all__ = ('countby', 'partitionby') + + +def countby(key, seq): + """ Count elements of a collection by a key function + + >>> countby(len, ['cat', 'mouse', 'dog']) + {3: 2, 5: 1} + + >>> def iseven(x): return x % 2 == 0 + >>> countby(iseven, [1, 2, 3]) # doctest:+SKIP + {True: 1, False: 2} + + See Also: + groupby + """ + if not callable(key): + key = getter(key) + return frequencies(map(key, seq)) + + +def partitionby(func, seq): + """ Partition a sequence according to a function + + Partition `s` into a sequence of lists such that, when traversing + `s`, every time the output of `func` changes a new list is started + and that and subsequent items are collected into that list. + + >>> is_space = lambda c: c == " " + >>> list(partitionby(is_space, "I have space")) + [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')] + + >>> is_large = lambda x: x > 10 + >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5])) + [(1, 2, 1), (99, 88, 33, 99), (-1, 5)] + + See also: + partition + groupby + itertools.groupby + """ + return map(tuple, pluck(1, itertools.groupby(seq, key=func))) diff --git a/contrib/python/toolz/py3/toolz/sandbox/__init__.py b/contrib/python/toolz/py3/toolz/sandbox/__init__.py new file mode 100644 index 0000000000..0abda1cb42 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/sandbox/__init__.py @@ -0,0 +1,2 @@ +from .core import EqualityHashKey, unzip +from .parallel import fold diff --git a/contrib/python/toolz/py3/toolz/sandbox/core.py b/contrib/python/toolz/py3/toolz/sandbox/core.py new file mode 100644 index 0000000000..55e09d74e9 --- /dev/null +++ b/contrib/python/toolz/py3/toolz/sandbox/core.py @@ -0,0 +1,133 @@ +from toolz.itertoolz import getter, cons, pluck +from itertools import tee, starmap + + +# See #166: https://github.com/pytoolz/toolz/issues/166 +# See #173: https://github.com/pytoolz/toolz/pull/173 +class EqualityHashKey(object): + """ Create a hash key that uses equality comparisons between items. + + This may be used to create hash keys for otherwise unhashable types: + + >>> from toolz import curry + >>> EqualityHashDefault = curry(EqualityHashKey, None) + >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP + {=[]=, =()=, =[1]=} + + **Caution:** adding N ``EqualityHashKey`` items to a hash container + may require O(N**2) operations, not O(N) as for typical hashable types. + Therefore, a suitable key function such as ``tuple`` or ``frozenset`` + is usually preferred over using ``EqualityHashKey`` if possible. + + The ``key`` argument to ``EqualityHashKey`` should be a function or + index that returns a hashable object that effectively distinguishes + unequal items. This helps avoid the poor scaling that occurs when + using the default key. For example, the above example can be improved + by using a key function that distinguishes items by length or type: + + >>> EqualityHashLen = curry(EqualityHashKey, len) + >>> EqualityHashType = curry(EqualityHashKey, type) # this works too + >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP + {=[]=, =()=, =[1]=} + + ``EqualityHashKey`` is convenient to use when a suitable key function + is complicated or unavailable. For example, the following returns all + unique values based on equality: + + >>> from toolz import unique + >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}] + >>> list(unique(vals, key=EqualityHashDefault)) + [[], (), [1], [2], {}] + + **Warning:** don't change the equality value of an item already in a hash + container. Unhashable types are unhashable for a reason. For example: + + >>> L1 = [1] ; L2 = [2] + >>> s = set(map(EqualityHashDefault, [L1, L2])) + >>> s # doctest: +SKIP + {=[1]=, =[2]=} + + >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items! + >>> s # doctest: +SKIP + {=[2]=, =[2]=} + + Although this may appear problematic, immutable data types is a common + idiom in functional programming, and``EqualityHashKey`` easily allows + the same idiom to be used by convention rather than strict requirement. + + See Also: + identity + """ + __slots__ = ['item', 'key'] + _default_hashkey = '__default__hashkey__' + + def __init__(self, key, item): + if key is None: + self.key = self._default_hashkey + elif not callable(key): + self.key = getter(key) + else: + self.key = key + self.item = item + + def __hash__(self): + if self.key == self._default_hashkey: + val = self.key + else: + val = self.key(self.item) + return hash(val) + + def __eq__(self, other): + try: + return (self._default_hashkey == other._default_hashkey and + self.item == other.item) + except AttributeError: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '=%s=' % str(self.item) + + def __repr__(self): + return '=%s=' % repr(self.item) + + +# See issue #293: https://github.com/pytoolz/toolz/issues/239 +def unzip(seq): + """Inverse of ``zip`` + + >>> a, b = unzip([('a', 1), ('b', 2)]) + >>> list(a) + ['a', 'b'] + >>> list(b) + [1, 2] + + Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this + implementation can handle an infinite sequence ``seq``. + + Caveats: + + * The implementation uses ``tee``, and so can use a significant amount + of auxiliary storage if the resulting iterators are consumed at + different times. + + * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be + used if ``seq`` is a finite sequence of infinite sequences. + + """ + + seq = iter(seq) + + # Check how many iterators we need + try: + first = tuple(next(seq)) + except StopIteration: + return tuple() + + # and create them + niters = len(first) + seqs = tee(cons(first, seq), niters) + + return tuple(starmap(pluck, enumerate(seqs))) diff --git a/contrib/python/toolz/py3/toolz/sandbox/parallel.py b/contrib/python/toolz/py3/toolz/sandbox/parallel.py new file mode 100644 index 0000000000..114077d2ba --- /dev/null +++ b/contrib/python/toolz/py3/toolz/sandbox/parallel.py @@ -0,0 +1,75 @@ +import functools +from toolz.itertoolz import partition_all +from toolz.utils import no_default + + +def _reduce(func, seq, initial=None): + if initial is None: + return functools.reduce(func, seq) + else: + return functools.reduce(func, seq, initial) + + +def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): + """ + Reduce without guarantee of ordered reduction. + + inputs: + + ``binop`` - associative operator. The associative property allows us to + leverage a parallel map to perform reductions in parallel. + ``seq`` - a sequence to be aggregated + ``default`` - an identity element like 0 for ``add`` or 1 for mul + + ``map`` - an implementation of ``map``. This may be parallel and + determines how work is distributed. + ``chunksize`` - Number of elements of ``seq`` that should be handled + within a single function call + ``combine`` - Binary operator to combine two intermediate results. + If ``binop`` is of type (total, item) -> total + then ``combine`` is of type (total, total) -> total + Defaults to ``binop`` for common case of operators like add + + Fold chunks up the collection into blocks of size ``chunksize`` and then + feeds each of these to calls to ``reduce``. This work is distributed + with a call to ``map``, gathered back and then refolded to finish the + computation. In this way ``fold`` specifies only how to chunk up data but + leaves the distribution of this work to an externally provided ``map`` + function. This function can be sequential or rely on multithreading, + multiprocessing, or even distributed solutions. + + If ``map`` intends to serialize functions it should be prepared to accept + and serialize lambdas. Note that the standard ``pickle`` module fails + here. + + Example + ------- + + >>> # Provide a parallel map to accomplish a parallel sum + >>> from operator import add + >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) + 10 + """ + assert chunksize > 1 + + if combine is None: + combine = binop + + chunks = partition_all(chunksize, seq) + + # Evaluate sequence in chunks via map + if default == no_default: + results = map( + functools.partial(_reduce, binop), + chunks) + else: + results = map( + functools.partial(_reduce, binop, initial=default), + chunks) + + results = list(results) # TODO: Support complete laziness + + if len(results) == 1: # Return completed result + return results[0] + else: # Recurse to reaggregate intermediate results + return fold(combine, results, map=map, chunksize=chunksize) diff --git a/contrib/python/toolz/py3/toolz/utils.py b/contrib/python/toolz/py3/toolz/utils.py new file mode 100644 index 0000000000..1002c4649f --- /dev/null +++ b/contrib/python/toolz/py3/toolz/utils.py @@ -0,0 +1,9 @@ +def raises(err, lamda): + try: + lamda() + return False + except err: + return True + + +no_default = '__no__default__' diff --git a/contrib/python/toolz/py3/ya.make b/contrib/python/toolz/py3/ya.make new file mode 100644 index 0000000000..bc99c48e44 --- /dev/null +++ b/contrib/python/toolz/py3/ya.make @@ -0,0 +1,42 @@ +# Generated by devtools/yamaker (pypi). + +PY3_LIBRARY() + +VERSION(0.12.0) + +LICENSE(BSD-3-Clause) + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + tlz/__init__.py + tlz/_build_tlz.py + toolz/__init__.py + toolz/_signatures.py + toolz/_version.py + toolz/compatibility.py + toolz/curried/__init__.py + toolz/curried/exceptions.py + toolz/curried/operator.py + toolz/dicttoolz.py + toolz/functoolz.py + toolz/itertoolz.py + toolz/recipes.py + toolz/sandbox/__init__.py + toolz/sandbox/core.py + toolz/sandbox/parallel.py + toolz/utils.py +) + +RESOURCE_FILES( + PREFIX contrib/python/toolz/py3/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/toolz/ya.make b/contrib/python/toolz/ya.make new file mode 100644 index 0000000000..0c05eedeac --- /dev/null +++ b/contrib/python/toolz/ya.make @@ -0,0 +1,18 @@ +PY23_LIBRARY() + +LICENSE(Service-Py23-Proxy) + +IF (PYTHON2) + PEERDIR(contrib/python/toolz/py2) +ELSE() + PEERDIR(contrib/python/toolz/py3) +ENDIF() + +NO_LINT() + +END() + +RECURSE( + py2 + py3 +) |