diff options
| author | arcadia-devtools <[email protected]> | 2022-03-30 23:08:06 +0300 |
|---|---|---|
| committer | arcadia-devtools <[email protected]> | 2022-03-30 23:08:06 +0300 |
| commit | 859572f7b489b0198236b3c1509da1cc0c308db3 (patch) | |
| tree | 24585241e51d871aa0865adf62186f2c3144e4c7 /contrib/python/jmespath/py2 | |
| parent | 50abcd45608871a5c7d4e5c9f35cf33090c0d0fe (diff) | |
intermediate changes
ref:ec898a324c765070b3680b073e330bd4d9fa25c1
Diffstat (limited to 'contrib/python/jmespath/py2')
| -rw-r--r-- | contrib/python/jmespath/py2/.dist-info/METADATA | 251 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/.dist-info/top_level.txt | 1 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/.yandex_meta/yamaker.yaml | 2 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/LICENSE.txt | 20 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/README.rst | 222 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/__init__.py | 23 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/ast.py | 90 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/compat.py | 65 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/exceptions.py | 122 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/functions.py | 362 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/lexer.py | 208 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/parser.py | 527 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/jmespath/visitor.py | 328 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/tests/__init__.py | 40 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/tests/test_compliance.py | 114 | ||||
| -rw-r--r-- | contrib/python/jmespath/py2/tests/test_parser.py | 368 |
16 files changed, 2743 insertions, 0 deletions
diff --git a/contrib/python/jmespath/py2/.dist-info/METADATA b/contrib/python/jmespath/py2/.dist-info/METADATA new file mode 100644 index 00000000000..78a973544bc --- /dev/null +++ b/contrib/python/jmespath/py2/.dist-info/METADATA @@ -0,0 +1,251 @@ +Metadata-Version: 2.0 +Name: jmespath +Version: 0.10.0 +Summary: JSON Matching Expressions +Home-page: https://github.com/jmespath/jmespath.py +Author: James Saryerwinnie +Author-email: [email protected] +License: MIT +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Requires-Python: >=2.6, !=3.0.*, !=3.1.*, !=3.2.* + +JMESPath +======== + + +.. image:: https://badges.gitter.im/Join Chat.svg + :target: https://gitter.im/jmespath/chat + + +.. image:: https://travis-ci.org/jmespath/jmespath.py.svg?branch=develop + :target: https://travis-ci.org/jmespath/jmespath.py + + +.. image:: https://codecov.io/github/jmespath/jmespath.py/coverage.svg?branch=develop + :target: https://codecov.io/github/jmespath/jmespath.py?branch=develop + + +JMESPath (pronounced "james path") allows you to declaratively specify how to +extract elements from a JSON document. + +For example, given this document:: + + {"foo": {"bar": "baz"}} + +The jmespath expression ``foo.bar`` will return "baz". + +JMESPath also supports: + +Referencing elements in a list. Given the data:: + + {"foo": {"bar": ["one", "two"]}} + +The expression: ``foo.bar[0]`` will return "one". +You can also reference all the items in a list using the ``*`` +syntax:: + + {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}} + +The expression: ``foo.bar[*].name`` will return ["one", "two"]. +Negative indexing is also supported (-1 refers to the last element +in the list). Given the data above, the expression +``foo.bar[-1].name`` will return "two". + +The ``*`` can also be used for hash types:: + + {"foo": {"bar": {"name": "one"}, "baz": {"name": "two"}}} + +The expression: ``foo.*.name`` will return ["one", "two"]. + + +Installation +============ + +You can install JMESPath from pypi with: + +.. code:: bash + + pip install jmespath + + +API +=== + +The ``jmespath.py`` library has two functions +that operate on python data structures. You can use ``search`` +and give it the jmespath expression and the data: + +.. code:: python + + >>> import jmespath + >>> path = jmespath.search('foo.bar', {'foo': {'bar': 'baz'}}) + 'baz' + +Similar to the ``re`` module, you can use the ``compile`` function +to compile the JMESPath expression and use this parsed expression +to perform repeated searches: + +.. code:: python + + >>> import jmespath + >>> expression = jmespath.compile('foo.bar') + >>> expression.search({'foo': {'bar': 'baz'}}) + 'baz' + >>> expression.search({'foo': {'bar': 'other'}}) + 'other' + +This is useful if you're going to use the same jmespath expression to +search multiple documents. This avoids having to reparse the +JMESPath expression each time you search a new document. + +Options +------- + +You can provide an instance of ``jmespath.Options`` to control how +a JMESPath expression is evaluated. The most common scenario for +using an ``Options`` instance is if you want to have ordered output +of your dict keys. To do this you can use either of these options: + +.. code:: python + + >>> import jmespath + >>> jmespath.search('{a: a, b: b}', + ... mydata, + ... jmespath.Options(dict_cls=collections.OrderedDict)) + + + >>> import jmespath + >>> parsed = jmespath.compile('{a: a, b: b}') + >>> parsed.search(mydata, + ... jmespath.Options(dict_cls=collections.OrderedDict)) + + +Custom Functions +~~~~~~~~~~~~~~~~ + +The JMESPath language has numerous +`built-in functions +<http://jmespath.org/specification.html#built-in-functions>`__, but it is +also possible to add your own custom functions. Keep in mind that +custom function support in jmespath.py is experimental and the API may +change based on feedback. + +**If you have a custom function that you've found useful, consider submitting +it to jmespath.site and propose that it be added to the JMESPath language.** +You can submit proposals +`here <https://github.com/jmespath/jmespath.site/issues>`__. + +To create custom functions: + +* Create a subclass of ``jmespath.functions.Functions``. +* Create a method with the name ``_func_<your function name>``. +* Apply the ``jmespath.functions.signature`` decorator that indicates + the expected types of the function arguments. +* Provide an instance of your subclass in a ``jmespath.Options`` object. + +Below are a few examples: + +.. code:: python + + import jmespath + from jmespath import functions + + # 1. Create a subclass of functions.Functions. + # The function.Functions base class has logic + # that introspects all of its methods and automatically + # registers your custom functions in its function table. + class CustomFunctions(functions.Functions): + + # 2 and 3. Create a function that starts with _func_ + # and decorate it with @signature which indicates its + # expected types. + # In this example, we're creating a jmespath function + # called "unique_letters" that accepts a single argument + # with an expected type "string". + @functions.signature({'types': ['string']}) + def _func_unique_letters(self, s): + # Given a string s, return a sorted + # string of unique letters: 'ccbbadd' -> 'abcd' + return ''.join(sorted(set(s))) + + # Here's another example. This is creating + # a jmespath function called "my_add" that expects + # two arguments, both of which should be of type number. + @functions.signature({'types': ['number']}, {'types': ['number']}) + def _func_my_add(self, x, y): + return x + y + + # 4. Provide an instance of your subclass in a Options object. + options = jmespath.Options(custom_functions=CustomFunctions()) + + # Provide this value to jmespath.search: + # This will print 3 + print( + jmespath.search( + 'my_add(`1`, `2`)', {}, options=options) + ) + + # This will print "abcd" + print( + jmespath.search( + 'foo.bar | unique_letters(@)', + {'foo': {'bar': 'ccbbadd'}}, + options=options) + ) + +Again, if you come up with useful functions that you think make +sense in the JMESPath language (and make sense to implement in all +JMESPath libraries, not just python), please let us know at +`jmespath.site <https://github.com/jmespath/jmespath.site/issues>`__. + + +Specification +============= + +If you'd like to learn more about the JMESPath language, you can check out +the `JMESPath tutorial <http://jmespath.org/tutorial.html>`__. Also check +out the `JMESPath examples page <http://jmespath.org/examples.html>`__ for +examples of more complex jmespath queries. + +The grammar is specified using ABNF, as described in +`RFC4234 <http://www.ietf.org/rfc/rfc4234.txt>`_. +You can find the most up to date +`grammar for JMESPath here <http://jmespath.org/specification.html#grammar>`__. + +You can read the full +`JMESPath specification here <http://jmespath.org/specification.html>`__. + + +Testing +======= + +In addition to the unit tests for the jmespath modules, +there is a ``tests/compliance`` directory that contains +.json files with test cases. This allows other implementations +to verify they are producing the correct output. Each json +file is grouped by feature. + + +Discuss +======= + +Join us on our `Gitter channel <https://gitter.im/jmespath/chat>`__ +if you want to chat or if you have any questions. + + diff --git a/contrib/python/jmespath/py2/.dist-info/top_level.txt b/contrib/python/jmespath/py2/.dist-info/top_level.txt new file mode 100644 index 00000000000..45c1e038e5f --- /dev/null +++ b/contrib/python/jmespath/py2/.dist-info/top_level.txt @@ -0,0 +1 @@ +jmespath diff --git a/contrib/python/jmespath/py2/.yandex_meta/yamaker.yaml b/contrib/python/jmespath/py2/.yandex_meta/yamaker.yaml new file mode 100644 index 00000000000..24449849208 --- /dev/null +++ b/contrib/python/jmespath/py2/.yandex_meta/yamaker.yaml @@ -0,0 +1,2 @@ +keep: +- tests/.+ diff --git a/contrib/python/jmespath/py2/LICENSE.txt b/contrib/python/jmespath/py2/LICENSE.txt new file mode 100644 index 00000000000..aa689285366 --- /dev/null +++ b/contrib/python/jmespath/py2/LICENSE.txt @@ -0,0 +1,20 @@ +Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, dis- +tribute, sublicense, and/or sell copies of the Software, and to permit +persons to whom the Software is furnished to do so, subject to the fol- +lowing conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/contrib/python/jmespath/py2/README.rst b/contrib/python/jmespath/py2/README.rst new file mode 100644 index 00000000000..530709edeec --- /dev/null +++ b/contrib/python/jmespath/py2/README.rst @@ -0,0 +1,222 @@ +JMESPath +======== + + +.. image:: https://badges.gitter.im/Join Chat.svg + :target: https://gitter.im/jmespath/chat + + +.. image:: https://travis-ci.org/jmespath/jmespath.py.svg?branch=develop + :target: https://travis-ci.org/jmespath/jmespath.py + + +.. image:: https://codecov.io/github/jmespath/jmespath.py/coverage.svg?branch=develop + :target: https://codecov.io/github/jmespath/jmespath.py?branch=develop + + +JMESPath (pronounced "james path") allows you to declaratively specify how to +extract elements from a JSON document. + +For example, given this document:: + + {"foo": {"bar": "baz"}} + +The jmespath expression ``foo.bar`` will return "baz". + +JMESPath also supports: + +Referencing elements in a list. Given the data:: + + {"foo": {"bar": ["one", "two"]}} + +The expression: ``foo.bar[0]`` will return "one". +You can also reference all the items in a list using the ``*`` +syntax:: + + {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}} + +The expression: ``foo.bar[*].name`` will return ["one", "two"]. +Negative indexing is also supported (-1 refers to the last element +in the list). Given the data above, the expression +``foo.bar[-1].name`` will return "two". + +The ``*`` can also be used for hash types:: + + {"foo": {"bar": {"name": "one"}, "baz": {"name": "two"}}} + +The expression: ``foo.*.name`` will return ["one", "two"]. + + +Installation +============ + +You can install JMESPath from pypi with: + +.. code:: bash + + pip install jmespath + + +API +=== + +The ``jmespath.py`` library has two functions +that operate on python data structures. You can use ``search`` +and give it the jmespath expression and the data: + +.. code:: python + + >>> import jmespath + >>> path = jmespath.search('foo.bar', {'foo': {'bar': 'baz'}}) + 'baz' + +Similar to the ``re`` module, you can use the ``compile`` function +to compile the JMESPath expression and use this parsed expression +to perform repeated searches: + +.. code:: python + + >>> import jmespath + >>> expression = jmespath.compile('foo.bar') + >>> expression.search({'foo': {'bar': 'baz'}}) + 'baz' + >>> expression.search({'foo': {'bar': 'other'}}) + 'other' + +This is useful if you're going to use the same jmespath expression to +search multiple documents. This avoids having to reparse the +JMESPath expression each time you search a new document. + +Options +------- + +You can provide an instance of ``jmespath.Options`` to control how +a JMESPath expression is evaluated. The most common scenario for +using an ``Options`` instance is if you want to have ordered output +of your dict keys. To do this you can use either of these options: + +.. code:: python + + >>> import jmespath + >>> jmespath.search('{a: a, b: b}', + ... mydata, + ... jmespath.Options(dict_cls=collections.OrderedDict)) + + + >>> import jmespath + >>> parsed = jmespath.compile('{a: a, b: b}') + >>> parsed.search(mydata, + ... jmespath.Options(dict_cls=collections.OrderedDict)) + + +Custom Functions +~~~~~~~~~~~~~~~~ + +The JMESPath language has numerous +`built-in functions +<http://jmespath.org/specification.html#built-in-functions>`__, but it is +also possible to add your own custom functions. Keep in mind that +custom function support in jmespath.py is experimental and the API may +change based on feedback. + +**If you have a custom function that you've found useful, consider submitting +it to jmespath.site and propose that it be added to the JMESPath language.** +You can submit proposals +`here <https://github.com/jmespath/jmespath.site/issues>`__. + +To create custom functions: + +* Create a subclass of ``jmespath.functions.Functions``. +* Create a method with the name ``_func_<your function name>``. +* Apply the ``jmespath.functions.signature`` decorator that indicates + the expected types of the function arguments. +* Provide an instance of your subclass in a ``jmespath.Options`` object. + +Below are a few examples: + +.. code:: python + + import jmespath + from jmespath import functions + + # 1. Create a subclass of functions.Functions. + # The function.Functions base class has logic + # that introspects all of its methods and automatically + # registers your custom functions in its function table. + class CustomFunctions(functions.Functions): + + # 2 and 3. Create a function that starts with _func_ + # and decorate it with @signature which indicates its + # expected types. + # In this example, we're creating a jmespath function + # called "unique_letters" that accepts a single argument + # with an expected type "string". + @functions.signature({'types': ['string']}) + def _func_unique_letters(self, s): + # Given a string s, return a sorted + # string of unique letters: 'ccbbadd' -> 'abcd' + return ''.join(sorted(set(s))) + + # Here's another example. This is creating + # a jmespath function called "my_add" that expects + # two arguments, both of which should be of type number. + @functions.signature({'types': ['number']}, {'types': ['number']}) + def _func_my_add(self, x, y): + return x + y + + # 4. Provide an instance of your subclass in a Options object. + options = jmespath.Options(custom_functions=CustomFunctions()) + + # Provide this value to jmespath.search: + # This will print 3 + print( + jmespath.search( + 'my_add(`1`, `2`)', {}, options=options) + ) + + # This will print "abcd" + print( + jmespath.search( + 'foo.bar | unique_letters(@)', + {'foo': {'bar': 'ccbbadd'}}, + options=options) + ) + +Again, if you come up with useful functions that you think make +sense in the JMESPath language (and make sense to implement in all +JMESPath libraries, not just python), please let us know at +`jmespath.site <https://github.com/jmespath/jmespath.site/issues>`__. + + +Specification +============= + +If you'd like to learn more about the JMESPath language, you can check out +the `JMESPath tutorial <http://jmespath.org/tutorial.html>`__. Also check +out the `JMESPath examples page <http://jmespath.org/examples.html>`__ for +examples of more complex jmespath queries. + +The grammar is specified using ABNF, as described in +`RFC4234 <http://www.ietf.org/rfc/rfc4234.txt>`_. +You can find the most up to date +`grammar for JMESPath here <http://jmespath.org/specification.html#grammar>`__. + +You can read the full +`JMESPath specification here <http://jmespath.org/specification.html>`__. + + +Testing +======= + +In addition to the unit tests for the jmespath modules, +there is a ``tests/compliance`` directory that contains +.json files with test cases. This allows other implementations +to verify they are producing the correct output. Each json +file is grouped by feature. + + +Discuss +======= + +Join us on our `Gitter channel <https://gitter.im/jmespath/chat>`__ +if you want to chat or if you have any questions. diff --git a/contrib/python/jmespath/py2/jmespath/__init__.py b/contrib/python/jmespath/py2/jmespath/__init__.py new file mode 100644 index 00000000000..99482dba8ef --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/__init__.py @@ -0,0 +1,23 @@ +import warnings +import sys +from jmespath import parser +from jmespath.visitor import Options + +__version__ = '0.10.0' + + +if sys.version_info[:2] <= (2, 6) or ((3, 0) <= sys.version_info[:2] <= (3, 3)): + python_ver = '.'.join(str(x) for x in sys.version_info[:3]) + + warnings.warn( + 'You are using Python {0}, which will no longer be supported in ' + 'version 0.11.0'.format(python_ver), + DeprecationWarning) + + +def compile(expression): + return parser.Parser().parse(expression) + + +def search(expression, data, options=None): + return parser.Parser().parse(expression).search(data, options=options) diff --git a/contrib/python/jmespath/py2/jmespath/ast.py b/contrib/python/jmespath/py2/jmespath/ast.py new file mode 100644 index 00000000000..dd56c6ed6bf --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/ast.py @@ -0,0 +1,90 @@ +# AST nodes have this structure: +# {"type": <node type>", children: [], "value": ""} + + +def comparator(name, first, second): + return {'type': 'comparator', 'children': [first, second], 'value': name} + + +def current_node(): + return {'type': 'current', 'children': []} + + +def expref(expression): + return {'type': 'expref', 'children': [expression]} + + +def function_expression(name, args): + return {'type': 'function_expression', 'children': args, 'value': name} + + +def field(name): + return {"type": "field", "children": [], "value": name} + + +def filter_projection(left, right, comparator): + return {'type': 'filter_projection', 'children': [left, right, comparator]} + + +def flatten(node): + return {'type': 'flatten', 'children': [node]} + + +def identity(): + return {"type": "identity", 'children': []} + + +def index(index): + return {"type": "index", "value": index, "children": []} + + +def index_expression(children): + return {"type": "index_expression", 'children': children} + + +def key_val_pair(key_name, node): + return {"type": "key_val_pair", 'children': [node], "value": key_name} + + +def literal(literal_value): + return {'type': 'literal', 'value': literal_value, 'children': []} + + +def multi_select_dict(nodes): + return {"type": "multi_select_dict", "children": nodes} + + +def multi_select_list(nodes): + return {"type": "multi_select_list", "children": nodes} + + +def or_expression(left, right): + return {"type": "or_expression", "children": [left, right]} + + +def and_expression(left, right): + return {"type": "and_expression", "children": [left, right]} + + +def not_expression(expr): + return {"type": "not_expression", "children": [expr]} + + +def pipe(left, right): + return {'type': 'pipe', 'children': [left, right]} + + +def projection(left, right): + return {'type': 'projection', 'children': [left, right]} + + +def subexpression(children): + return {"type": "subexpression", 'children': children} + + +def slice(start, end, step): + return {"type": "slice", "children": [start, end, step]} + + +def value_projection(left, right): + return {'type': 'value_projection', 'children': [left, right]} diff --git a/contrib/python/jmespath/py2/jmespath/compat.py b/contrib/python/jmespath/py2/jmespath/compat.py new file mode 100644 index 00000000000..2ed0fe78792 --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/compat.py @@ -0,0 +1,65 @@ +import sys +import inspect + +PY2 = sys.version_info[0] == 2 + + +def with_metaclass(meta, *bases): + # Taken from flask/six. + class metaclass(meta): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +if PY2: + text_type = unicode + string_type = basestring + from itertools import izip_longest as zip_longest + + def with_str_method(cls): + """Class decorator that handles __str__ compat between py2 and py3.""" + # In python2, the __str__ should be __unicode__ + # and __str__ should return bytes. + cls.__unicode__ = cls.__str__ + def __str__(self): + return self.__unicode__().encode('utf-8') + cls.__str__ = __str__ + return cls + + def with_repr_method(cls): + """Class decorator that handle __repr__ with py2 and py3.""" + # This is almost the same thing as with_str_method *except* + # it uses the unicode_escape encoding. This also means we need to be + # careful encoding the input multiple times, so we only encode + # if we get a unicode type. + original_repr_method = cls.__repr__ + def __repr__(self): + original_repr = original_repr_method(self) + if isinstance(original_repr, text_type): + original_repr = original_repr.encode('unicode_escape') + return original_repr + cls.__repr__ = __repr__ + return cls + + def get_methods(cls): + for name, method in inspect.getmembers(cls, + predicate=inspect.ismethod): + yield name, method + +else: + text_type = str + string_type = str + from itertools import zip_longest + + def with_str_method(cls): + # In python3, we don't need to do anything, we return a str type. + return cls + + def with_repr_method(cls): + return cls + + def get_methods(cls): + for name, method in inspect.getmembers(cls, + predicate=inspect.isfunction): + yield name, method diff --git a/contrib/python/jmespath/py2/jmespath/exceptions.py b/contrib/python/jmespath/py2/jmespath/exceptions.py new file mode 100644 index 00000000000..0156015918b --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/exceptions.py @@ -0,0 +1,122 @@ +from jmespath.compat import with_str_method + + +class JMESPathError(ValueError): + pass + + +@with_str_method +class ParseError(JMESPathError): + _ERROR_MESSAGE = 'Invalid jmespath expression' + def __init__(self, lex_position, token_value, token_type, + msg=_ERROR_MESSAGE): + super(ParseError, self).__init__(lex_position, token_value, token_type) + self.lex_position = lex_position + self.token_value = token_value + self.token_type = token_type.upper() + self.msg = msg + # Whatever catches the ParseError can fill in the full expression + self.expression = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + '%s: Parse error at column %s, ' + 'token "%s" (%s), for expression:\n"%s"\n%s' % ( + self.msg, self.lex_position, self.token_value, self.token_type, + self.expression, underline)) + + +@with_str_method +class IncompleteExpressionError(ParseError): + def set_expression(self, expression): + self.expression = expression + self.lex_position = len(expression) + self.token_type = None + self.token_value = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + 'Invalid jmespath expression: Incomplete expression:\n' + '"%s"\n%s' % (self.expression, underline)) + + +@with_str_method +class LexerError(ParseError): + def __init__(self, lexer_position, lexer_value, message, expression=None): + self.lexer_position = lexer_position + self.lexer_value = lexer_value + self.message = message + super(LexerError, self).__init__(lexer_position, + lexer_value, + message) + # Whatever catches LexerError can set this. + self.expression = expression + + def __str__(self): + underline = ' ' * self.lexer_position + '^' + return 'Bad jmespath expression: %s:\n%s\n%s' % ( + self.message, self.expression, underline) + + +@with_str_method +class ArityError(ParseError): + def __init__(self, expected, actual, name): + self.expected_arity = expected + self.actual_arity = actual + self.function_name = name + self.expression = None + + def __str__(self): + return ("Expected %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + def _pluralize(self, word, count): + if count == 1: + return word + else: + return word + 's' + + +@with_str_method +class VariadictArityError(ArityError): + def __str__(self): + return ("Expected at least %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + +@with_str_method +class JMESPathTypeError(JMESPathError): + def __init__(self, function_name, current_value, actual_type, + expected_types): + self.function_name = function_name + self.current_value = current_value + self.actual_type = actual_type + self.expected_types = expected_types + + def __str__(self): + return ('In function %s(), invalid type for value: %s, ' + 'expected one of: %s, received: "%s"' % ( + self.function_name, self.current_value, + self.expected_types, self.actual_type)) + + +class EmptyExpressionError(JMESPathError): + def __init__(self): + super(EmptyExpressionError, self).__init__( + "Invalid JMESPath expression: cannot be empty.") + + +class UnknownFunctionError(JMESPathError): + pass diff --git a/contrib/python/jmespath/py2/jmespath/functions.py b/contrib/python/jmespath/py2/jmespath/functions.py new file mode 100644 index 00000000000..31dab051694 --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/functions.py @@ -0,0 +1,362 @@ +import math +import json + +from jmespath import exceptions +from jmespath.compat import string_type as STRING_TYPE +from jmespath.compat import get_methods, with_metaclass + + +# python types -> jmespath types +TYPES_MAP = { + 'bool': 'boolean', + 'list': 'array', + 'dict': 'object', + 'NoneType': 'null', + 'unicode': 'string', + 'str': 'string', + 'float': 'number', + 'int': 'number', + 'long': 'number', + 'OrderedDict': 'object', + '_Projection': 'array', + '_Expression': 'expref', +} + + +# jmespath types -> python types +REVERSE_TYPES_MAP = { + 'boolean': ('bool',), + 'array': ('list', '_Projection'), + 'object': ('dict', 'OrderedDict',), + 'null': ('NoneType',), + 'string': ('unicode', 'str'), + 'number': ('float', 'int', 'long'), + 'expref': ('_Expression',), +} + + +def signature(*arguments): + def _record_signature(func): + func.signature = arguments + return func + return _record_signature + + +class FunctionRegistry(type): + def __init__(cls, name, bases, attrs): + cls._populate_function_table() + super(FunctionRegistry, cls).__init__(name, bases, attrs) + + def _populate_function_table(cls): + function_table = {} + # Any method with a @signature decorator that also + # starts with "_func_" is registered as a function. + # _func_max_by -> max_by function. + for name, method in get_methods(cls): + if not name.startswith('_func_'): + continue + signature = getattr(method, 'signature', None) + if signature is not None: + function_table[name[6:]] = { + 'function': method, + 'signature': signature, + } + cls.FUNCTION_TABLE = function_table + + +class Functions(with_metaclass(FunctionRegistry, object)): + + FUNCTION_TABLE = { + } + + def call_function(self, function_name, resolved_args): + try: + spec = self.FUNCTION_TABLE[function_name] + except KeyError: + raise exceptions.UnknownFunctionError( + "Unknown function: %s()" % function_name) + function = spec['function'] + signature = spec['signature'] + self._validate_arguments(resolved_args, signature, function_name) + return function(self, *resolved_args) + + def _validate_arguments(self, args, signature, function_name): + if signature and signature[-1].get('variadic'): + if len(args) < len(signature): + raise exceptions.VariadictArityError( + len(signature), len(args), function_name) + elif len(args) != len(signature): + raise exceptions.ArityError( + len(signature), len(args), function_name) + return self._type_check(args, signature, function_name) + + def _type_check(self, actual, signature, function_name): + for i in range(len(signature)): + allowed_types = signature[i]['types'] + if allowed_types: + self._type_check_single(actual[i], allowed_types, + function_name) + + def _type_check_single(self, current, types, function_name): + # Type checking involves checking the top level type, + # and in the case of arrays, potentially checking the types + # of each element. + allowed_types, allowed_subtypes = self._get_allowed_pytypes(types) + # We're not using isinstance() on purpose. + # The type model for jmespath does not map + # 1-1 with python types (booleans are considered + # integers in python for example). + actual_typename = type(current).__name__ + if actual_typename not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, current, + self._convert_to_jmespath_type(actual_typename), types) + # If we're dealing with a list type, we can have + # additional restrictions on the type of the list + # elements (for example a function can require a + # list of numbers or a list of strings). + # Arrays are the only types that can have subtypes. + if allowed_subtypes: + self._subtype_check(current, allowed_subtypes, + types, function_name) + + def _get_allowed_pytypes(self, types): + allowed_types = [] + allowed_subtypes = [] + for t in types: + type_ = t.split('-', 1) + if len(type_) == 2: + type_, subtype = type_ + allowed_subtypes.append(REVERSE_TYPES_MAP[subtype]) + else: + type_ = type_[0] + allowed_types.extend(REVERSE_TYPES_MAP[type_]) + return allowed_types, allowed_subtypes + + def _subtype_check(self, current, allowed_subtypes, types, function_name): + if len(allowed_subtypes) == 1: + # The easy case, we know up front what type + # we need to validate. + allowed_subtypes = allowed_subtypes[0] + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed_subtypes: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + elif len(allowed_subtypes) > 1 and current: + # Dynamic type validation. Based on the first + # type we see, we validate that the remaining types + # match. + first = type(current[0]).__name__ + for subtypes in allowed_subtypes: + if first in subtypes: + allowed = subtypes + break + else: + raise exceptions.JMESPathTypeError( + function_name, current[0], first, types) + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + + @signature({'types': ['number']}) + def _func_abs(self, arg): + return abs(arg) + + @signature({'types': ['array-number']}) + def _func_avg(self, arg): + if arg: + return sum(arg) / float(len(arg)) + else: + return None + + @signature({'types': [], 'variadic': True}) + def _func_not_null(self, *arguments): + for argument in arguments: + if argument is not None: + return argument + + @signature({'types': []}) + def _func_to_array(self, arg): + if isinstance(arg, list): + return arg + else: + return [arg] + + @signature({'types': []}) + def _func_to_string(self, arg): + if isinstance(arg, STRING_TYPE): + return arg + else: + return json.dumps(arg, separators=(',', ':'), + default=str) + + @signature({'types': []}) + def _func_to_number(self, arg): + if isinstance(arg, (list, dict, bool)): + return None + elif arg is None: + return None + elif isinstance(arg, (int, float)): + return arg + else: + try: + return int(arg) + except ValueError: + try: + return float(arg) + except ValueError: + return None + + @signature({'types': ['array', 'string']}, {'types': []}) + def _func_contains(self, subject, search): + return search in subject + + @signature({'types': ['string', 'array', 'object']}) + def _func_length(self, arg): + return len(arg) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_ends_with(self, search, suffix): + return search.endswith(suffix) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_starts_with(self, search, suffix): + return search.startswith(suffix) + + @signature({'types': ['array', 'string']}) + def _func_reverse(self, arg): + if isinstance(arg, STRING_TYPE): + return arg[::-1] + else: + return list(reversed(arg)) + + @signature({"types": ['number']}) + def _func_ceil(self, arg): + return math.ceil(arg) + + @signature({"types": ['number']}) + def _func_floor(self, arg): + return math.floor(arg) + + @signature({"types": ['string']}, {"types": ['array-string']}) + def _func_join(self, separator, array): + return separator.join(array) + + @signature({'types': ['expref']}, {'types': ['array']}) + def _func_map(self, expref, arg): + result = [] + for element in arg: + result.append(expref.visit(expref.expression, element)) + return result + + @signature({"types": ['array-number', 'array-string']}) + def _func_max(self, arg): + if arg: + return max(arg) + else: + return None + + @signature({"types": ["object"], "variadic": True}) + def _func_merge(self, *arguments): + merged = {} + for arg in arguments: + merged.update(arg) + return merged + + @signature({"types": ['array-number', 'array-string']}) + def _func_min(self, arg): + if arg: + return min(arg) + else: + return None + + @signature({"types": ['array-string', 'array-number']}) + def _func_sort(self, arg): + return list(sorted(arg)) + + @signature({"types": ['array-number']}) + def _func_sum(self, arg): + return sum(arg) + + @signature({"types": ['object']}) + def _func_keys(self, arg): + # To be consistent with .values() + # should we also return the indices of a list? + return list(arg.keys()) + + @signature({"types": ['object']}) + def _func_values(self, arg): + return list(arg.values()) + + @signature({'types': []}) + def _func_type(self, arg): + if isinstance(arg, STRING_TYPE): + return "string" + elif isinstance(arg, bool): + return "boolean" + elif isinstance(arg, list): + return "array" + elif isinstance(arg, dict): + return "object" + elif isinstance(arg, (float, int)): + return "number" + elif arg is None: + return "null" + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_sort_by(self, array, expref): + if not array: + return array + # sort_by allows for the expref to be either a number of + # a string, so we have some special logic to handle this. + # We evaluate the first array element and verify that it's + # either a string of a number. We then create a key function + # that validates that type, which requires that remaining array + # elements resolve to the same type as the first element. + required_type = self._convert_to_jmespath_type( + type(expref.visit(expref.expression, array[0])).__name__) + if required_type not in ['number', 'string']: + raise exceptions.JMESPathTypeError( + 'sort_by', array[0], required_type, ['string', 'number']) + keyfunc = self._create_key_func(expref, + [required_type], + 'sort_by') + return list(sorted(array, key=keyfunc)) + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_min_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'min_by') + if array: + return min(array, key=keyfunc) + else: + return None + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_max_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'max_by') + if array: + return max(array, key=keyfunc) + else: + return None + + def _create_key_func(self, expref, allowed_types, function_name): + def keyfunc(x): + result = expref.visit(expref.expression, x) + actual_typename = type(result).__name__ + jmespath_type = self._convert_to_jmespath_type(actual_typename) + # allowed_types is in term of jmespath types, not python types. + if jmespath_type not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, result, jmespath_type, allowed_types) + return result + return keyfunc + + def _convert_to_jmespath_type(self, pyobject): + return TYPES_MAP.get(pyobject, 'unknown') diff --git a/contrib/python/jmespath/py2/jmespath/lexer.py b/contrib/python/jmespath/py2/jmespath/lexer.py new file mode 100644 index 00000000000..8db05e37608 --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/lexer.py @@ -0,0 +1,208 @@ +import string +import warnings +from json import loads + +from jmespath.exceptions import LexerError, EmptyExpressionError + + +class Lexer(object): + START_IDENTIFIER = set(string.ascii_letters + '_') + VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') + VALID_NUMBER = set(string.digits) + WHITESPACE = set(" \t\n\r") + SIMPLE_TOKENS = { + '.': 'dot', + '*': 'star', + ']': 'rbracket', + ',': 'comma', + ':': 'colon', + '@': 'current', + '(': 'lparen', + ')': 'rparen', + '{': 'lbrace', + '}': 'rbrace', + } + + def tokenize(self, expression): + self._initialize_for_expression(expression) + while self._current is not None: + if self._current in self.SIMPLE_TOKENS: + yield {'type': self.SIMPLE_TOKENS[self._current], + 'value': self._current, + 'start': self._position, 'end': self._position + 1} + self._next() + elif self._current in self.START_IDENTIFIER: + start = self._position + buff = self._current + while self._next() in self.VALID_IDENTIFIER: + buff += self._current + yield {'type': 'unquoted_identifier', 'value': buff, + 'start': start, 'end': start + len(buff)} + elif self._current in self.WHITESPACE: + self._next() + elif self._current == '[': + start = self._position + next_char = self._next() + if next_char == ']': + self._next() + yield {'type': 'flatten', 'value': '[]', + 'start': start, 'end': start + 2} + elif next_char == '?': + self._next() + yield {'type': 'filter', 'value': '[?', + 'start': start, 'end': start + 2} + else: + yield {'type': 'lbracket', 'value': '[', + 'start': start, 'end': start + 1} + elif self._current == "'": + yield self._consume_raw_string_literal() + elif self._current == '|': + yield self._match_or_else('|', 'or', 'pipe') + elif self._current == '&': + yield self._match_or_else('&', 'and', 'expref') + elif self._current == '`': + yield self._consume_literal() + elif self._current in self.VALID_NUMBER: + start = self._position + buff = self._consume_number() + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + elif self._current == '-': + # Negative number. + start = self._position + buff = self._consume_number() + if len(buff) > 1: + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + else: + raise LexerError(lexer_position=start, + lexer_value=buff, + message="Unknown token '%s'" % buff) + elif self._current == '"': + yield self._consume_quoted_identifier() + elif self._current == '<': + yield self._match_or_else('=', 'lte', 'lt') + elif self._current == '>': + yield self._match_or_else('=', 'gte', 'gt') + elif self._current == '!': + yield self._match_or_else('=', 'ne', 'not') + elif self._current == '=': + if self._next() == '=': + yield {'type': 'eq', 'value': '==', + 'start': self._position - 1, 'end': self._position} + self._next() + else: + if self._current is None: + # If we're at the EOF, we never advanced + # the position so we don't need to rewind + # it back one location. + position = self._position + else: + position = self._position - 1 + raise LexerError( + lexer_position=position, + lexer_value='=', + message="Unknown token '='") + else: + raise LexerError(lexer_position=self._position, + lexer_value=self._current, + message="Unknown token %s" % self._current) + yield {'type': 'eof', 'value': '', + 'start': self._length, 'end': self._length} + + def _consume_number(self): + start = self._position + buff = self._current + while self._next() in self.VALID_NUMBER: + buff += self._current + return buff + + def _initialize_for_expression(self, expression): + if not expression: + raise EmptyExpressionError() + self._position = 0 + self._expression = expression + self._chars = list(self._expression) + self._current = self._chars[self._position] + self._length = len(self._expression) + + def _next(self): + if self._position == self._length - 1: + self._current = None + else: + self._position += 1 + self._current = self._chars[self._position] + return self._current + + def _consume_until(self, delimiter): + # Consume until the delimiter is reached, + # allowing for the delimiter to be escaped with "\". + start = self._position + buff = '' + self._next() + while self._current != delimiter: + if self._current == '\\': + buff += '\\' + self._next() + if self._current is None: + # We're at the EOF. + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Unclosed %s delimiter" % delimiter) + buff += self._current + self._next() + # Skip the closing delimiter. + self._next() + return buff + + def _consume_literal(self): + start = self._position + lexeme = self._consume_until('`').replace('\\`', '`') + try: + # Assume it is valid JSON and attempt to parse. + parsed_json = loads(lexeme) + except ValueError: + try: + # Invalid JSON values should be converted to quoted + # JSON strings during the JEP-12 deprecation period. + parsed_json = loads('"%s"' % lexeme.lstrip()) + warnings.warn("deprecated string literal syntax", + PendingDeprecationWarning) + except ValueError: + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Bad token %s" % lexeme) + token_len = self._position - start + return {'type': 'literal', 'value': parsed_json, + 'start': start, 'end': token_len} + + def _consume_quoted_identifier(self): + start = self._position + lexeme = '"' + self._consume_until('"') + '"' + try: + token_len = self._position - start + return {'type': 'quoted_identifier', 'value': loads(lexeme), + 'start': start, 'end': token_len} + except ValueError as e: + error_message = str(e).split(':')[0] + raise LexerError(lexer_position=start, + lexer_value=lexeme, + message=error_message) + + def _consume_raw_string_literal(self): + start = self._position + lexeme = self._consume_until("'").replace("\\'", "'") + token_len = self._position - start + return {'type': 'literal', 'value': lexeme, + 'start': start, 'end': token_len} + + def _match_or_else(self, expected, match_type, else_type): + start = self._position + current = self._current + next_char = self._next() + if next_char == expected: + self._next() + return {'type': match_type, 'value': current + next_char, + 'start': start, 'end': start + 1} + return {'type': else_type, 'value': current, + 'start': start, 'end': start} diff --git a/contrib/python/jmespath/py2/jmespath/parser.py b/contrib/python/jmespath/py2/jmespath/parser.py new file mode 100644 index 00000000000..eeac38fa897 --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/parser.py @@ -0,0 +1,527 @@ +"""Top down operator precedence parser. + +This is an implementation of Vaughan R. Pratt's +"Top Down Operator Precedence" parser. +(http://dl.acm.org/citation.cfm?doid=512927.512931). + +These are some additional resources that help explain the +general idea behind a Pratt parser: + +* http://effbot.org/zone/simple-top-down-parsing.htm +* http://javascript.crockford.com/tdop/tdop.html + +A few notes on the implementation. + +* All the nud/led tokens are on the Parser class itself, and are dispatched + using getattr(). This keeps all the parsing logic contained to a single + class. +* We use two passes through the data. One to create a list of token, + then one pass through the tokens to create the AST. While the lexer actually + yields tokens, we convert it to a list so we can easily implement two tokens + of lookahead. A previous implementation used a fixed circular buffer, but it + was significantly slower. Also, the average jmespath expression typically + does not have a large amount of token so this is not an issue. And + interestingly enough, creating a token list first is actually faster than + consuming from the token iterator one token at a time. + +""" +import random + +from jmespath import lexer +from jmespath.compat import with_repr_method +from jmespath import ast +from jmespath import exceptions +from jmespath import visitor + + +class Parser(object): + BINDING_POWER = { + 'eof': 0, + 'unquoted_identifier': 0, + 'quoted_identifier': 0, + 'literal': 0, + 'rbracket': 0, + 'rparen': 0, + 'comma': 0, + 'rbrace': 0, + 'number': 0, + 'current': 0, + 'expref': 0, + 'colon': 0, + 'pipe': 1, + 'or': 2, + 'and': 3, + 'eq': 5, + 'gt': 5, + 'lt': 5, + 'gte': 5, + 'lte': 5, + 'ne': 5, + 'flatten': 9, + # Everything above stops a projection. + 'star': 20, + 'filter': 21, + 'dot': 40, + 'not': 45, + 'lbrace': 50, + 'lbracket': 55, + 'lparen': 60, + } + # The maximum binding power for a token that can stop + # a projection. + _PROJECTION_STOP = 10 + # The _MAX_SIZE most recent expressions are cached in + # _CACHE dict. + _CACHE = {} + _MAX_SIZE = 128 + + def __init__(self, lookahead=2): + self.tokenizer = None + self._tokens = [None] * lookahead + self._buffer_size = lookahead + self._index = 0 + + def parse(self, expression): + cached = self._CACHE.get(expression) + if cached is not None: + return cached + parsed_result = self._do_parse(expression) + self._CACHE[expression] = parsed_result + if len(self._CACHE) > self._MAX_SIZE: + self._free_cache_entries() + return parsed_result + + def _do_parse(self, expression): + try: + return self._parse(expression) + except exceptions.LexerError as e: + e.expression = expression + raise + except exceptions.IncompleteExpressionError as e: + e.set_expression(expression) + raise + except exceptions.ParseError as e: + e.expression = expression + raise + + def _parse(self, expression): + self.tokenizer = lexer.Lexer().tokenize(expression) + self._tokens = list(self.tokenizer) + self._index = 0 + parsed = self._expression(binding_power=0) + if not self._current_token() == 'eof': + t = self._lookahead_token(0) + raise exceptions.ParseError(t['start'], t['value'], t['type'], + "Unexpected token: %s" % t['value']) + return ParsedResult(expression, parsed) + + def _expression(self, binding_power=0): + left_token = self._lookahead_token(0) + self._advance() + nud_function = getattr( + self, '_token_nud_%s' % left_token['type'], + self._error_nud_token) + left = nud_function(left_token) + current_token = self._current_token() + while binding_power < self.BINDING_POWER[current_token]: + led = getattr(self, '_token_led_%s' % current_token, None) + if led is None: + error_token = self._lookahead_token(0) + self._error_led_token(error_token) + else: + self._advance() + left = led(left) + current_token = self._current_token() + return left + + def _token_nud_literal(self, token): + return ast.literal(token['value']) + + def _token_nud_unquoted_identifier(self, token): + return ast.field(token['value']) + + def _token_nud_quoted_identifier(self, token): + field = ast.field(token['value']) + # You can't have a quoted identifier as a function + # name. + if self._current_token() == 'lparen': + t = self._lookahead_token(0) + raise exceptions.ParseError( + 0, t['value'], t['type'], + 'Quoted identifier not allowed for function names.') + return field + + def _token_nud_star(self, token): + left = ast.identity() + if self._current_token() == 'rbracket': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.value_projection(left, right) + + def _token_nud_filter(self, token): + return self._token_led_filter(ast.identity()) + + def _token_nud_lbrace(self, token): + return self._parse_multi_select_hash() + + def _token_nud_lparen(self, token): + expression = self._expression() + self._match('rparen') + return expression + + def _token_nud_flatten(self, token): + left = ast.flatten(ast.identity()) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_nud_not(self, token): + expr = self._expression(self.BINDING_POWER['not']) + return ast.not_expression(expr) + + def _token_nud_lbracket(self, token): + if self._current_token() in ['number', 'colon']: + right = self._parse_index_expression() + # We could optimize this and remove the identity() node. + # We don't really need an index_expression node, we can + # just use emit an index node here if we're not dealing + # with a slice. + return self._project_if_slice(ast.identity(), right) + elif self._current_token() == 'star' and \ + self._lookahead(1) == 'rbracket': + self._advance() + self._advance() + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(ast.identity(), right) + else: + return self._parse_multi_select_list() + + def _parse_index_expression(self): + # We're here: + # [<current> + # ^ + # | current token + if (self._lookahead(0) == 'colon' or + self._lookahead(1) == 'colon'): + return self._parse_slice_expression() + else: + # Parse the syntax [number] + node = ast.index(self._lookahead_token(0)['value']) + self._advance() + self._match('rbracket') + return node + + def _parse_slice_expression(self): + # [start:end:step] + # Where start, end, and step are optional. + # The last colon is optional as well. + parts = [None, None, None] + index = 0 + current_token = self._current_token() + while not current_token == 'rbracket' and index < 3: + if current_token == 'colon': + index += 1 + if index == 3: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + self._advance() + elif current_token == 'number': + parts[index] = self._lookahead_token(0)['value'] + self._advance() + else: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + current_token = self._current_token() + self._match('rbracket') + return ast.slice(*parts) + + def _token_nud_current(self, token): + return ast.current_node() + + def _token_nud_expref(self, token): + expression = self._expression(self.BINDING_POWER['expref']) + return ast.expref(expression) + + def _token_led_dot(self, left): + if not self._current_token() == 'star': + right = self._parse_dot_rhs(self.BINDING_POWER['dot']) + if left['type'] == 'subexpression': + left['children'].append(right) + return left + else: + return ast.subexpression([left, right]) + else: + # We're creating a projection. + self._advance() + right = self._parse_projection_rhs( + self.BINDING_POWER['dot']) + return ast.value_projection(left, right) + + def _token_led_pipe(self, left): + right = self._expression(self.BINDING_POWER['pipe']) + return ast.pipe(left, right) + + def _token_led_or(self, left): + right = self._expression(self.BINDING_POWER['or']) + return ast.or_expression(left, right) + + def _token_led_and(self, left): + right = self._expression(self.BINDING_POWER['and']) + return ast.and_expression(left, right) + + def _token_led_lparen(self, left): + if left['type'] != 'field': + # 0 - first func arg or closing paren. + # -1 - '(' token + # -2 - invalid function "name". + prev_t = self._lookahead_token(-2) + raise exceptions.ParseError( + prev_t['start'], prev_t['value'], prev_t['type'], + "Invalid function name '%s'" % prev_t['value']) + name = left['value'] + args = [] + while not self._current_token() == 'rparen': + expression = self._expression() + if self._current_token() == 'comma': + self._match('comma') + args.append(expression) + self._match('rparen') + function_node = ast.function_expression(name, args) + return function_node + + def _token_led_filter(self, left): + # Filters are projections. + condition = self._expression(0) + self._match('rbracket') + if self._current_token() == 'flatten': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['filter']) + return ast.filter_projection(left, right, condition) + + def _token_led_eq(self, left): + return self._parse_comparator(left, 'eq') + + def _token_led_ne(self, left): + return self._parse_comparator(left, 'ne') + + def _token_led_gt(self, left): + return self._parse_comparator(left, 'gt') + + def _token_led_gte(self, left): + return self._parse_comparator(left, 'gte') + + def _token_led_lt(self, left): + return self._parse_comparator(left, 'lt') + + def _token_led_lte(self, left): + return self._parse_comparator(left, 'lte') + + def _token_led_flatten(self, left): + left = ast.flatten(left) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_led_lbracket(self, left): + token = self._lookahead_token(0) + if token['type'] in ['number', 'colon']: + right = self._parse_index_expression() + if left['type'] == 'index_expression': + # Optimization: if the left node is an index expr, + # we can avoid creating another node and instead just add + # the right node as a child of the left. + left['children'].append(right) + return left + else: + return self._project_if_slice(left, right) + else: + # We have a projection + self._match('star') + self._match('rbracket') + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(left, right) + + def _project_if_slice(self, left, right): + index_expr = ast.index_expression([left, right]) + if right['type'] == 'slice': + return ast.projection( + index_expr, + self._parse_projection_rhs(self.BINDING_POWER['star'])) + else: + return index_expr + + def _parse_comparator(self, left, comparator): + right = self._expression(self.BINDING_POWER[comparator]) + return ast.comparator(comparator, left, right) + + def _parse_multi_select_list(self): + expressions = [] + while True: + expression = self._expression() + expressions.append(expression) + if self._current_token() == 'rbracket': + break + else: + self._match('comma') + self._match('rbracket') + return ast.multi_select_list(expressions) + + def _parse_multi_select_hash(self): + pairs = [] + while True: + key_token = self._lookahead_token(0) + # Before getting the token value, verify it's + # an identifier. + self._match_multiple_tokens( + token_types=['quoted_identifier', 'unquoted_identifier']) + key_name = key_token['value'] + self._match('colon') + value = self._expression(0) + node = ast.key_val_pair(key_name=key_name, node=value) + pairs.append(node) + if self._current_token() == 'comma': + self._match('comma') + elif self._current_token() == 'rbrace': + self._match('rbrace') + break + return ast.multi_select_dict(nodes=pairs) + + def _parse_projection_rhs(self, binding_power): + # Parse the right hand side of the projection. + if self.BINDING_POWER[self._current_token()] < self._PROJECTION_STOP: + # BP of 10 are all the tokens that stop a projection. + right = ast.identity() + elif self._current_token() == 'lbracket': + right = self._expression(binding_power) + elif self._current_token() == 'filter': + right = self._expression(binding_power) + elif self._current_token() == 'dot': + self._match('dot') + right = self._parse_dot_rhs(binding_power) + else: + self._raise_parse_error_for_token(self._lookahead_token(0), + 'syntax error') + return right + + def _parse_dot_rhs(self, binding_power): + # From the grammar: + # expression '.' ( identifier / + # multi-select-list / + # multi-select-hash / + # function-expression / + # * + # In terms of tokens that means that after a '.', + # you can have: + lookahead = self._current_token() + # Common case "foo.bar", so first check for an identifier. + if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']: + return self._expression(binding_power) + elif lookahead == 'lbracket': + self._match('lbracket') + return self._parse_multi_select_list() + elif lookahead == 'lbrace': + self._match('lbrace') + return self._parse_multi_select_hash() + else: + t = self._lookahead_token(0) + allowed = ['quoted_identifier', 'unquoted_identifier', + 'lbracket', 'lbrace'] + msg = ( + "Expecting: %s, got: %s" % (allowed, t['type']) + ) + self._raise_parse_error_for_token(t, msg) + + def _error_nud_token(self, token): + if token['type'] == 'eof': + raise exceptions.IncompleteExpressionError( + token['start'], token['value'], token['type']) + self._raise_parse_error_for_token(token, 'invalid token') + + def _error_led_token(self, token): + self._raise_parse_error_for_token(token, 'invalid token') + + def _match(self, token_type=None): + # inline'd self._current_token() + if self._current_token() == token_type: + # inline'd self._advance() + self._advance() + else: + self._raise_parse_error_maybe_eof( + token_type, self._lookahead_token(0)) + + def _match_multiple_tokens(self, token_types): + if self._current_token() not in token_types: + self._raise_parse_error_maybe_eof( + token_types, self._lookahead_token(0)) + self._advance() + + def _advance(self): + self._index += 1 + + def _current_token(self): + return self._tokens[self._index]['type'] + + def _lookahead(self, number): + return self._tokens[self._index + number]['type'] + + def _lookahead_token(self, number): + return self._tokens[self._index + number] + + def _raise_parse_error_for_token(self, token, reason): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + raise exceptions.ParseError(lex_position, actual_value, + actual_type, reason) + + def _raise_parse_error_maybe_eof(self, expected_type, token): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + if actual_type == 'eof': + raise exceptions.IncompleteExpressionError( + lex_position, actual_value, actual_type) + message = 'Expecting: %s, got: %s' % (expected_type, + actual_type) + raise exceptions.ParseError( + lex_position, actual_value, actual_type, message) + + def _free_cache_entries(self): + for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)): + self._CACHE.pop(key, None) + + @classmethod + def purge(cls): + """Clear the expression compilation cache.""" + cls._CACHE.clear() + + +@with_repr_method +class ParsedResult(object): + def __init__(self, expression, parsed): + self.expression = expression + self.parsed = parsed + + def search(self, value, options=None): + interpreter = visitor.TreeInterpreter(options) + result = interpreter.visit(self.parsed, value) + return result + + def _render_dot_file(self): + """Render the parsed AST as a dot file. + + Note that this is marked as an internal method because + the AST is an implementation detail and is subject + to change. This method can be used to help troubleshoot + or for development purposes, but is not considered part + of the public supported API. Use at your own risk. + + """ + renderer = visitor.GraphvizVisitor() + contents = renderer.visit(self.parsed) + return contents + + def __repr__(self): + return repr(self.parsed) diff --git a/contrib/python/jmespath/py2/jmespath/visitor.py b/contrib/python/jmespath/py2/jmespath/visitor.py new file mode 100644 index 00000000000..b3e846b7614 --- /dev/null +++ b/contrib/python/jmespath/py2/jmespath/visitor.py @@ -0,0 +1,328 @@ +import operator + +from jmespath import functions +from jmespath.compat import string_type +from numbers import Number + + +def _equals(x, y): + if _is_special_integer_case(x, y): + return False + else: + return x == y + + +def _is_special_integer_case(x, y): + # We need to special case comparing 0 or 1 to + # True/False. While normally comparing any + # integer other than 0/1 to True/False will always + # return False. However 0/1 have this: + # >>> 0 == True + # False + # >>> 0 == False + # True + # >>> 1 == True + # True + # >>> 1 == False + # False + # + # Also need to consider that: + # >>> 0 in [True, False] + # True + if type(x) is int and (x == 0 or x == 1): + return y is True or y is False + elif type(y) is int and (y == 0 or y == 1): + return x is True or x is False + + +def _is_comparable(x): + # The spec doesn't officially support string types yet, + # but enough people are relying on this behavior that + # it's been added back. This should eventually become + # part of the official spec. + return _is_actual_number(x) or isinstance(x, string_type) + + +def _is_actual_number(x): + # We need to handle python's quirkiness with booleans, + # specifically: + # + # >>> isinstance(False, int) + # True + # >>> isinstance(True, int) + # True + if x is True or x is False: + return False + return isinstance(x, Number) + + +class Options(object): + """Options to control how a JMESPath function is evaluated.""" + def __init__(self, dict_cls=None, custom_functions=None): + #: The class to use when creating a dict. The interpreter + # may create dictionaries during the evaluation of a JMESPath + # expression. For example, a multi-select hash will + # create a dictionary. By default we use a dict() type. + # You can set this value to change what dict type is used. + # The most common reason you would change this is if you + # want to set a collections.OrderedDict so that you can + # have predictable key ordering. + self.dict_cls = dict_cls + self.custom_functions = custom_functions + + +class _Expression(object): + def __init__(self, expression, interpreter): + self.expression = expression + self.interpreter = interpreter + + def visit(self, node, *args, **kwargs): + return self.interpreter.visit(node, *args, **kwargs) + + +class Visitor(object): + def __init__(self): + self._method_cache = {} + + def visit(self, node, *args, **kwargs): + node_type = node['type'] + method = self._method_cache.get(node_type) + if method is None: + method = getattr( + self, 'visit_%s' % node['type'], self.default_visit) + self._method_cache[node_type] = method + return method(node, *args, **kwargs) + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError("default_visit") + + +class TreeInterpreter(Visitor): + COMPARATOR_FUNC = { + 'eq': _equals, + 'ne': lambda x, y: not _equals(x, y), + 'lt': operator.lt, + 'gt': operator.gt, + 'lte': operator.le, + 'gte': operator.ge + } + _EQUALITY_OPS = ['eq', 'ne'] + MAP_TYPE = dict + + def __init__(self, options=None): + super(TreeInterpreter, self).__init__() + self._dict_cls = self.MAP_TYPE + if options is None: + options = Options() + self._options = options + if options.dict_cls is not None: + self._dict_cls = self._options.dict_cls + if options.custom_functions is not None: + self._functions = self._options.custom_functions + else: + self._functions = functions.Functions() + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError(node['type']) + + def visit_subexpression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_field(self, node, value): + try: + return value.get(node['value']) + except AttributeError: + return None + + def visit_comparator(self, node, value): + # Common case: comparator is == or != + comparator_func = self.COMPARATOR_FUNC[node['value']] + if node['value'] in self._EQUALITY_OPS: + return comparator_func( + self.visit(node['children'][0], value), + self.visit(node['children'][1], value) + ) + else: + # Ordering operators are only valid for numbers. + # Evaluating any other type with a comparison operator + # will yield a None value. + left = self.visit(node['children'][0], value) + right = self.visit(node['children'][1], value) + num_types = (int, float) + if not (_is_comparable(left) and + _is_comparable(right)): + return None + return comparator_func(left, right) + + def visit_current(self, node, value): + return value + + def visit_expref(self, node, value): + return _Expression(node['children'][0], self) + + def visit_function_expression(self, node, value): + resolved_args = [] + for child in node['children']: + current = self.visit(child, value) + resolved_args.append(current) + return self._functions.call_function(node['value'], resolved_args) + + def visit_filter_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + comparator_node = node['children'][2] + collected = [] + for element in base: + if self._is_true(self.visit(comparator_node, element)): + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_flatten(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + # Can't flatten the object if it's not a list. + return None + merged_list = [] + for element in base: + if isinstance(element, list): + merged_list.extend(element) + else: + merged_list.append(element) + return merged_list + + def visit_identity(self, node, value): + return value + + def visit_index(self, node, value): + # Even though we can index strings, we don't + # want to support that. + if not isinstance(value, list): + return None + try: + return value[node['value']] + except IndexError: + return None + + def visit_index_expression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_slice(self, node, value): + if not isinstance(value, list): + return None + s = slice(*node['children']) + return value[s] + + def visit_key_val_pair(self, node, value): + return self.visit(node['children'][0], value) + + def visit_literal(self, node, value): + return node['value'] + + def visit_multi_select_dict(self, node, value): + if value is None: + return None + collected = self._dict_cls() + for child in node['children']: + collected[child['value']] = self.visit(child, value) + return collected + + def visit_multi_select_list(self, node, value): + if value is None: + return None + collected = [] + for child in node['children']: + collected.append(self.visit(child, value)) + return collected + + def visit_or_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + matched = self.visit(node['children'][1], value) + return matched + + def visit_and_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + return matched + return self.visit(node['children'][1], value) + + def visit_not_expression(self, node, value): + original_result = self.visit(node['children'][0], value) + if type(original_result) is int and original_result == 0: + # Special case for 0, !0 should be false, not true. + # 0 is not a special cased integer in jmespath. + return False + return not original_result + + def visit_pipe(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_value_projection(self, node, value): + base = self.visit(node['children'][0], value) + try: + base = base.values() + except AttributeError: + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def _is_false(self, value): + # This looks weird, but we're explicitly using equality checks + # because the truth/false values are different between + # python and jmespath. + return (value == '' or value == [] or value == {} or value is None or + value is False) + + def _is_true(self, value): + return not self._is_false(value) + + +class GraphvizVisitor(Visitor): + def __init__(self): + super(GraphvizVisitor, self).__init__() + self._lines = [] + self._count = 1 + + def visit(self, node, *args, **kwargs): + self._lines.append('digraph AST {') + current = '%s%s' % (node['type'], self._count) + self._count += 1 + self._visit(node, current) + self._lines.append('}') + return '\n'.join(self._lines) + + def _visit(self, node, current): + self._lines.append('%s [label="%s(%s)"]' % ( + current, node['type'], node.get('value', ''))) + for child in node.get('children', []): + child_name = '%s%s' % (child['type'], self._count) + self._count += 1 + self._lines.append(' %s -> %s' % (current, child_name)) + self._visit(child, child_name) diff --git a/contrib/python/jmespath/py2/tests/__init__.py b/contrib/python/jmespath/py2/tests/__init__.py new file mode 100644 index 00000000000..d86946ccda5 --- /dev/null +++ b/contrib/python/jmespath/py2/tests/__init__.py @@ -0,0 +1,40 @@ +import sys +from jmespath import ast + + +# The unittest module got a significant overhaul +# in 2.7, so if we're in 2.6 we can use the backported +# version unittest2. +if sys.version_info[:2] == (2, 6): + import unittest2 as unittest + import simplejson as json + from ordereddict import OrderedDict +else: + import unittest + import json + from collections import OrderedDict + + +# Helper method used to create an s-expression +# of the AST to make unit test assertions easier. +# You get a nice string diff on assert failures. +def as_s_expression(node): + parts = [] + _as_s_expression(node, parts) + return ''.join(parts) + + +def _as_s_expression(node, parts): + parts.append("(%s" % (node.__class__.__name__.lower())) + if isinstance(node, ast.Field): + parts.append(" %s" % node.name) + elif isinstance(node, ast.FunctionExpression): + parts.append(" %s" % node.name) + elif isinstance(node, ast.KeyValPair): + parts.append(" %s" % node.key_name) + for child in node.children: + parts.append(" ") + _as_s_expression(child, parts) + parts.append(")") + + diff --git a/contrib/python/jmespath/py2/tests/test_compliance.py b/contrib/python/jmespath/py2/tests/test_compliance.py new file mode 100644 index 00000000000..86e82970273 --- /dev/null +++ b/contrib/python/jmespath/py2/tests/test_compliance.py @@ -0,0 +1,114 @@ +import os +import pytest +from pprint import pformat +from . import OrderedDict +from . import json + +from jmespath.visitor import Options + + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +COMPLIANCE_DIR = os.path.join(TEST_DIR, 'compliance') +LEGACY_DIR = os.path.join(TEST_DIR, 'legacy') +NOT_SPECIFIED = object() +OPTIONS = Options(dict_cls=OrderedDict) + + +def _load_all_cases(): + for full_path in _walk_files(): + if full_path.endswith('.json'): + for given, test_type, test_data in load_cases(full_path): + t = test_data + # Benchmark tests aren't run as part of the normal + # test suite, so we only care about 'result' and + # 'error' test_types. + if test_type == 'result': + yield (given, t['expression'], t['result'], os.path.basename(full_path)) + elif test_type == 'error': + yield (given, t['expression'], t['error'], os.path.basename(full_path)) + + +def _walk_files(): + # Check for a shortcut when running the tests interactively. + # If a JMESPATH_TEST is defined, that file is used as the + # only test to run. Useful when doing feature development. + single_file = os.environ.get('JMESPATH_TEST') + if single_file is not None: + yield os.path.abspath(single_file) + else: + for root, dirnames, filenames in os.walk(TEST_DIR): + for filename in filenames: + yield os.path.join(root, filename) + for root, dirnames, filenames in os.walk(LEGACY_DIR): + for filename in filenames: + yield os.path.join(root, filename) + + +def load_cases(full_path): + all_test_data = json.load(open(full_path), object_pairs_hook=OrderedDict) + for test_data in all_test_data: + given = test_data['given'] + for case in test_data['cases']: + if 'result' in case: + test_type = 'result' + elif 'error' in case: + test_type = 'error' + elif 'bench' in case: + test_type = 'bench' + else: + raise RuntimeError("Unknown test type: %s" % json.dumps(case)) + yield (given, test_type, case) + + + 'given,expression,expected,filename', + list(_load_all_cases()) +) +def test_compliance(given, expression, expected, filename): + _test_expression(given, expression, expected, filename) + + +def _test_expression(given, expression, expected, filename): + import jmespath.parser + try: + parsed = jmespath.compile(expression) + except ValueError as e: + raise AssertionError( + 'jmespath expression failed to compile: "%s", error: %s"' % + (expression, e)) + actual = parsed.search(given, options=OPTIONS) + expected_repr = json.dumps(expected, indent=4) + actual_repr = json.dumps(actual, indent=4) + error_msg = ("\n\n (%s) The expression '%s' was suppose to give:\n%s\n" + "Instead it matched:\n%s\nparsed as:\n%s\ngiven:\n%s" % ( + filename, expression, expected_repr, + actual_repr, pformat(parsed.parsed), + json.dumps(given, indent=4))) + error_msg = error_msg.replace(r'\n', '\n') + assert actua == expected, error_msg + + +def _test_error_expression(given, expression, error, filename): + import jmespath.parser + if error not in ('syntax', 'invalid-type', + 'unknown-function', 'invalid-arity', 'invalid-value'): + raise RuntimeError("Unknown error type '%s'" % error) + try: + parsed = jmespath.compile(expression) + parsed.search(given) + except ValueError: + # Test passes, it raised a parse error as expected. + pass + except Exception as e: + # Failure because an unexpected exception was raised. + error_msg = ("\n\n (%s) The expression '%s' was suppose to be a " + "syntax error, but it raised an unexpected error:\n\n%s" % ( + filename, expression, e)) + error_msg = error_msg.replace(r'\n', '\n') + raise AssertionError(error_msg) + else: + error_msg = ("\n\n (%s) The expression '%s' was suppose to be a " + "syntax error, but it successfully parsed as:\n\n%s" % ( + filename, expression, pformat(parsed.parsed))) + error_msg = error_msg.replace(r'\n', '\n') + raise AssertionError(error_msg) diff --git a/contrib/python/jmespath/py2/tests/test_parser.py b/contrib/python/jmespath/py2/tests/test_parser.py new file mode 100644 index 00000000000..121b4b79b2a --- /dev/null +++ b/contrib/python/jmespath/py2/tests/test_parser.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python + +import re +from . import unittest, OrderedDict + +from jmespath import parser +from jmespath import visitor +from jmespath import ast +from jmespath import exceptions + + +class TestParser(unittest.TestCase): + def setUp(self): + self.parser = parser.Parser() + + def assert_parsed_ast(self, expression, expected_ast): + parsed = self.parser.parse(expression) + self.assertEqual(parsed.parsed, expected_ast) + + def test_parse_empty_string_raises_exception(self): + with self.assertRaises(exceptions.EmptyExpressionError): + self.parser.parse('') + + def test_field(self): + self.assert_parsed_ast('foo', ast.field('foo')) + + def test_dot_syntax(self): + self.assert_parsed_ast('foo.bar', + ast.subexpression([ast.field('foo'), + ast.field('bar')])) + + def test_multiple_dots(self): + parsed = self.parser.parse('foo.bar.baz') + self.assertEqual( + parsed.search({'foo': {'bar': {'baz': 'correct'}}}), 'correct') + + def test_index(self): + parsed = self.parser.parse('foo[1]') + self.assertEqual( + parsed.search({'foo': ['zero', 'one', 'two']}), + 'one') + + def test_quoted_subexpression(self): + self.assert_parsed_ast('"foo"."bar"', + ast.subexpression([ + ast.field('foo'), + ast.field('bar')])) + + def test_wildcard(self): + parsed = self.parser.parse('foo[*]') + self.assertEqual( + parsed.search({'foo': ['zero', 'one', 'two']}), + ['zero', 'one', 'two']) + + def test_wildcard_with_children(self): + parsed = self.parser.parse('foo[*].bar') + self.assertEqual( + parsed.search({'foo': [{'bar': 'one'}, {'bar': 'two'}]}), + ['one', 'two']) + + def test_or_expression(self): + parsed = self.parser.parse('foo || bar') + self.assertEqual(parsed.search({'foo': 'foo'}), 'foo') + self.assertEqual(parsed.search({'bar': 'bar'}), 'bar') + self.assertEqual(parsed.search({'foo': 'foo', 'bar': 'bar'}), 'foo') + self.assertEqual(parsed.search({'bad': 'bad'}), None) + + def test_complex_or_expression(self): + parsed = self.parser.parse('foo.foo || foo.bar') + self.assertEqual(parsed.search({'foo': {'foo': 'foo'}}), 'foo') + self.assertEqual(parsed.search({'foo': {'bar': 'bar'}}), 'bar') + self.assertEqual(parsed.search({'foo': {'baz': 'baz'}}), None) + + def test_or_repr(self): + self.assert_parsed_ast('foo || bar', ast.or_expression(ast.field('foo'), + ast.field('bar'))) + + def test_unicode_literals_escaped(self): + self.assert_parsed_ast(r'`"\u2713"`', ast.literal(u'\u2713')) + + def test_multiselect(self): + parsed = self.parser.parse('foo.{bar: bar,baz: baz}') + self.assertEqual( + parsed.search({'foo': {'bar': 'bar', 'baz': 'baz', 'qux': 'qux'}}), + {'bar': 'bar', 'baz': 'baz'}) + + def test_multiselect_subexpressions(self): + parsed = self.parser.parse('foo.{"bar.baz": bar.baz, qux: qux}') + self.assertEqual( + parsed.search({'foo': {'bar': {'baz': 'CORRECT'}, 'qux': 'qux'}}), + {'bar.baz': 'CORRECT', 'qux': 'qux'}) + + def test_multiselect_with_all_quoted_keys(self): + parsed = self.parser.parse('foo.{"bar": bar.baz, "qux": qux}') + result = parsed.search({'foo': {'bar': {'baz': 'CORRECT'}, 'qux': 'qux'}}) + self.assertEqual(result, {"bar": "CORRECT", "qux": "qux"}) + + def test_function_call_with_and_statement(self): + self.assert_parsed_ast( + 'f(@ && @)', + {'children': [{'children': [{'children': [], 'type': 'current'}, + {'children': [], 'type': 'current'}], + 'type': 'and_expression'}], + 'type': 'function_expression', + 'value': 'f'}) + + +class TestErrorMessages(unittest.TestCase): + + def setUp(self): + self.parser = parser.Parser() + + def assert_error_message(self, expression, error_message, + exception=exceptions.ParseError): + try: + self.parser.parse(expression) + except exception as e: + self.assertEqual(error_message, str(e)) + return + except Exception as e: + self.fail( + "Unexpected error raised (%s: %s) for bad expression: %s" % + (e.__class__.__name__, e, expression)) + else: + self.fail( + "ParseError not raised for bad expression: %s" % expression) + + def test_bad_parse(self): + with self.assertRaises(exceptions.ParseError): + self.parser.parse('foo]baz') + + def test_bad_parse_error_message(self): + error_message = ( + 'Unexpected token: ]: Parse error at column 3, ' + 'token "]" (RBRACKET), for expression:\n' + '"foo]baz"\n' + ' ^') + self.assert_error_message('foo]baz', error_message) + + def test_bad_parse_error_message_with_multiselect(self): + error_message = ( + 'Invalid jmespath expression: Incomplete expression:\n' + '"foo.{bar: baz,bar: bar"\n' + ' ^') + self.assert_error_message('foo.{bar: baz,bar: bar', error_message) + + def test_incomplete_expression_with_missing_paren(self): + error_message = ( + 'Invalid jmespath expression: Incomplete expression:\n' + '"length(@,"\n' + ' ^') + self.assert_error_message('length(@,', error_message) + + def test_bad_lexer_values(self): + error_message = ( + 'Bad jmespath expression: ' + 'Unclosed " delimiter:\n' + 'foo."bar\n' + ' ^') + self.assert_error_message('foo."bar', error_message, + exception=exceptions.LexerError) + + def test_bad_unicode_string(self): + # This error message is straight from the JSON parser + # and pypy has a slightly different error message, + # so we're not using assert_error_message. + error_message = re.compile( + r'Bad jmespath expression: ' + r'Invalid \\uXXXX escape.*\\uAZ12', re.DOTALL) + with self.assertRaisesRegexp(exceptions.LexerError, error_message): + self.parser.parse(r'"\uAZ12"') + + +class TestParserWildcards(unittest.TestCase): + def setUp(self): + self.parser = parser.Parser() + self.data = { + 'foo': [ + {'bar': [{'baz': 'one'}, {'baz': 'two'}]}, + {'bar': [{'baz': 'three'}, {'baz': 'four'}, {'baz': 'five'}]}, + ] + } + + def test_multiple_index_wildcards(self): + parsed = self.parser.parse('foo[*].bar[*].baz') + self.assertEqual(parsed.search(self.data), + [['one', 'two'], ['three', 'four', 'five']]) + + def test_wildcard_mix_with_indices(self): + parsed = self.parser.parse('foo[*].bar[0].baz') + self.assertEqual(parsed.search(self.data), + ['one', 'three']) + + def test_wildcard_mix_last(self): + parsed = self.parser.parse('foo[0].bar[*].baz') + self.assertEqual(parsed.search(self.data), + ['one', 'two']) + + def test_indices_out_of_bounds(self): + parsed = self.parser.parse('foo[*].bar[2].baz') + self.assertEqual(parsed.search(self.data), + ['five']) + + def test_root_indices(self): + parsed = self.parser.parse('[0]') + self.assertEqual(parsed.search(['one', 'two']), 'one') + + def test_root_wildcard(self): + parsed = self.parser.parse('*.foo') + data = {'top1': {'foo': 'bar'}, 'top2': {'foo': 'baz'}, + 'top3': {'notfoo': 'notfoo'}} + # Sorted is being used because the order of the keys are not + # required to be in any specific order. + self.assertEqual(sorted(parsed.search(data)), sorted(['bar', 'baz'])) + self.assertEqual(sorted(self.parser.parse('*.notfoo').search(data)), + sorted(['notfoo'])) + + def test_only_wildcard(self): + parsed = self.parser.parse('*') + data = {'foo': 'a', 'bar': 'b', 'baz': 'c'} + self.assertEqual(sorted(parsed.search(data)), sorted(['a', 'b', 'c'])) + + def test_escape_sequences(self): + self.assertEqual(self.parser.parse(r'"foo\tbar"').search( + {'foo\tbar': 'baz'}), 'baz') + self.assertEqual(self.parser.parse(r'"foo\nbar"').search( + {'foo\nbar': 'baz'}), 'baz') + self.assertEqual(self.parser.parse(r'"foo\bbar"').search( + {'foo\bbar': 'baz'}), 'baz') + self.assertEqual(self.parser.parse(r'"foo\fbar"').search( + {'foo\fbar': 'baz'}), 'baz') + self.assertEqual(self.parser.parse(r'"foo\rbar"').search( + {'foo\rbar': 'baz'}), 'baz') + + def test_consecutive_escape_sequences(self): + parsed = self.parser.parse(r'"foo\\nbar"') + self.assertEqual(parsed.search({'foo\\nbar': 'baz'}), 'baz') + + parsed = self.parser.parse(r'"foo\n\t\rbar"') + self.assertEqual(parsed.search({'foo\n\t\rbar': 'baz'}), 'baz') + + def test_escape_sequence_at_end_of_string_not_allowed(self): + with self.assertRaises(ValueError): + self.parser.parse('foobar\\') + + def test_wildcard_with_multiselect(self): + parsed = self.parser.parse('foo.*.{a: a, b: b}') + data = { + 'foo': { + 'one': { + 'a': {'c': 'CORRECT', 'd': 'other'}, + 'b': {'c': 'ALSOCORRECT', 'd': 'other'}, + }, + 'two': { + 'a': {'c': 'CORRECT', 'd': 'other'}, + 'c': {'c': 'WRONG', 'd': 'other'}, + }, + } + } + match = parsed.search(data) + self.assertEqual(len(match), 2) + self.assertIn('a', match[0]) + self.assertIn('b', match[0]) + self.assertIn('a', match[1]) + self.assertIn('b', match[1]) + + +class TestMergedLists(unittest.TestCase): + def setUp(self): + self.parser = parser.Parser() + self.data = { + "foo": [ + [["one", "two"], ["three", "four"]], + [["five", "six"], ["seven", "eight"]], + [["nine"], ["ten"]] + ] + } + + def test_merge_with_indices(self): + parsed = self.parser.parse('foo[][0]') + match = parsed.search(self.data) + self.assertEqual(match, ["one", "three", "five", "seven", + "nine", "ten"]) + + def test_trailing_merged_operator(self): + parsed = self.parser.parse('foo[]') + match = parsed.search(self.data) + self.assertEqual( + match, + [["one", "two"], ["three", "four"], + ["five", "six"], ["seven", "eight"], + ["nine"], ["ten"]]) + + +class TestParserCaching(unittest.TestCase): + def test_compile_lots_of_expressions(self): + # We have to be careful here because this is an implementation detail + # that should be abstracted from the user, but we need to make sure we + # exercise the code and that it doesn't blow up. + p = parser.Parser() + compiled = [] + compiled2 = [] + for i in range(parser.Parser._MAX_SIZE + 1): + compiled.append(p.parse('foo%s' % i)) + # Rerun the test and half of these entries should be from the + # cache but they should still be equal to compiled. + for i in range(parser.Parser._MAX_SIZE + 1): + compiled2.append(p.parse('foo%s' % i)) + self.assertEqual(len(compiled), len(compiled2)) + self.assertEqual( + [expr.parsed for expr in compiled], + [expr.parsed for expr in compiled2]) + + def test_cache_purge(self): + p = parser.Parser() + first = p.parse('foo') + cached = p.parse('foo') + p.purge() + second = p.parse('foo') + self.assertEqual(first.parsed, + second.parsed) + self.assertEqual(first.parsed, + cached.parsed) + + +class TestParserAddsExpressionAttribute(unittest.TestCase): + def test_expression_available_from_parser(self): + p = parser.Parser() + parsed = p.parse('foo.bar') + self.assertEqual(parsed.expression, 'foo.bar') + + +class TestParsedResultAddsOptions(unittest.TestCase): + def test_can_have_ordered_dict(self): + p = parser.Parser() + parsed = p.parse('{a: a, b: b, c: c}') + options = visitor.Options(dict_cls=OrderedDict) + result = parsed.search( + {"c": "c", "b": "b", "a": "a"}, options=options) + # The order should be 'a', 'b' because we're using an + # OrderedDict + self.assertEqual(list(result), ['a', 'b', 'c']) + + +class TestRenderGraphvizFile(unittest.TestCase): + def test_dot_file_rendered(self): + p = parser.Parser() + result = p.parse('foo') + dot_contents = result._render_dot_file() + self.assertEqual(dot_contents, + 'digraph AST {\nfield1 [label="field(foo)"]\n}') + + def test_dot_file_subexpr(self): + p = parser.Parser() + result = p.parse('foo.bar') + dot_contents = result._render_dot_file() + self.assertEqual( + dot_contents, + 'digraph AST {\n' + 'subexpression1 [label="subexpression()"]\n' + ' subexpression1 -> field2\n' + 'field2 [label="field(foo)"]\n' + ' subexpression1 -> field3\n' + 'field3 [label="field(bar)"]\n}') + + +if __name__ == '__main__': + unittest.main() |
