diff options
| author | anoh <[email protected]> | 2022-02-10 16:48:27 +0300 |
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:27 +0300 |
| commit | f49c77cb796fe12b4fb7dbdbdf472955017a87b1 (patch) | |
| tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /contrib/python/jmespath | |
| parent | 5ecbea34cc22f2d6d3c7f2b634131656807921d1 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/jmespath')
| -rw-r--r-- | contrib/python/jmespath/jmespath/__init__.py | 22 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/ast.py | 180 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/compat.py | 130 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/exceptions.py | 244 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/functions.py | 724 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/lexer.py | 416 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/parser.py | 1052 | ||||
| -rw-r--r-- | contrib/python/jmespath/jmespath/visitor.py | 650 | ||||
| -rw-r--r-- | contrib/python/jmespath/ya.make | 24 |
9 files changed, 1721 insertions, 1721 deletions
diff --git a/contrib/python/jmespath/jmespath/__init__.py b/contrib/python/jmespath/jmespath/__init__.py index 430c0318287..99482dba8ef 100644 --- a/contrib/python/jmespath/jmespath/__init__.py +++ b/contrib/python/jmespath/jmespath/__init__.py @@ -1,11 +1,11 @@ import warnings import sys -from jmespath import parser -from jmespath.visitor import Options - +from jmespath import parser +from jmespath.visitor import Options + __version__ = '0.10.0' - - + + if sys.version_info[:2] <= (2, 6) or ((3, 0) <= sys.version_info[:2] <= (3, 3)): python_ver = '.'.join(str(x) for x in sys.version_info[:3]) @@ -15,9 +15,9 @@ if sys.version_info[:2] <= (2, 6) or ((3, 0) <= sys.version_info[:2] <= (3, 3)): DeprecationWarning) -def compile(expression): - return parser.Parser().parse(expression) - - -def search(expression, data, options=None): - return parser.Parser().parse(expression).search(data, options=options) +def compile(expression): + return parser.Parser().parse(expression) + + +def search(expression, data, options=None): + return parser.Parser().parse(expression).search(data, options=options) diff --git a/contrib/python/jmespath/jmespath/ast.py b/contrib/python/jmespath/jmespath/ast.py index 3c25dc67d72..dd56c6ed6bf 100644 --- a/contrib/python/jmespath/jmespath/ast.py +++ b/contrib/python/jmespath/jmespath/ast.py @@ -1,90 +1,90 @@ -# AST nodes have this structure: -# {"type": <node type>", children: [], "value": ""} - - -def comparator(name, first, second): - return {'type': 'comparator', 'children': [first, second], 'value': name} - - -def current_node(): - return {'type': 'current', 'children': []} - - -def expref(expression): - return {'type': 'expref', 'children': [expression]} - - -def function_expression(name, args): - return {'type': 'function_expression', 'children': args, 'value': name} - - -def field(name): - return {"type": "field", "children": [], "value": name} - - -def filter_projection(left, right, comparator): - return {'type': 'filter_projection', 'children': [left, right, comparator]} - - -def flatten(node): - return {'type': 'flatten', 'children': [node]} - - -def identity(): - return {"type": "identity", 'children': []} - - -def index(index): - return {"type": "index", "value": index, "children": []} - - -def index_expression(children): - return {"type": "index_expression", 'children': children} - - -def key_val_pair(key_name, node): - return {"type": "key_val_pair", 'children': [node], "value": key_name} - - -def literal(literal_value): - return {'type': 'literal', 'value': literal_value, 'children': []} - - -def multi_select_dict(nodes): - return {"type": "multi_select_dict", "children": nodes} - - -def multi_select_list(nodes): - return {"type": "multi_select_list", "children": nodes} - - -def or_expression(left, right): - return {"type": "or_expression", "children": [left, right]} - - -def and_expression(left, right): - return {"type": "and_expression", "children": [left, right]} - - -def not_expression(expr): - return {"type": "not_expression", "children": [expr]} - - -def pipe(left, right): - return {'type': 'pipe', 'children': [left, right]} - - -def projection(left, right): - return {'type': 'projection', 'children': [left, right]} - - -def subexpression(children): - return {"type": "subexpression", 'children': children} - - -def slice(start, end, step): - return {"type": "slice", "children": [start, end, step]} - - -def value_projection(left, right): - return {'type': 'value_projection', 'children': [left, right]} +# AST nodes have this structure: +# {"type": <node type>", children: [], "value": ""} + + +def comparator(name, first, second): + return {'type': 'comparator', 'children': [first, second], 'value': name} + + +def current_node(): + return {'type': 'current', 'children': []} + + +def expref(expression): + return {'type': 'expref', 'children': [expression]} + + +def function_expression(name, args): + return {'type': 'function_expression', 'children': args, 'value': name} + + +def field(name): + return {"type": "field", "children": [], "value": name} + + +def filter_projection(left, right, comparator): + return {'type': 'filter_projection', 'children': [left, right, comparator]} + + +def flatten(node): + return {'type': 'flatten', 'children': [node]} + + +def identity(): + return {"type": "identity", 'children': []} + + +def index(index): + return {"type": "index", "value": index, "children": []} + + +def index_expression(children): + return {"type": "index_expression", 'children': children} + + +def key_val_pair(key_name, node): + return {"type": "key_val_pair", 'children': [node], "value": key_name} + + +def literal(literal_value): + return {'type': 'literal', 'value': literal_value, 'children': []} + + +def multi_select_dict(nodes): + return {"type": "multi_select_dict", "children": nodes} + + +def multi_select_list(nodes): + return {"type": "multi_select_list", "children": nodes} + + +def or_expression(left, right): + return {"type": "or_expression", "children": [left, right]} + + +def and_expression(left, right): + return {"type": "and_expression", "children": [left, right]} + + +def not_expression(expr): + return {"type": "not_expression", "children": [expr]} + + +def pipe(left, right): + return {'type': 'pipe', 'children': [left, right]} + + +def projection(left, right): + return {'type': 'projection', 'children': [left, right]} + + +def subexpression(children): + return {"type": "subexpression", 'children': children} + + +def slice(start, end, step): + return {"type": "slice", "children": [start, end, step]} + + +def value_projection(left, right): + return {'type': 'value_projection', 'children': [left, right]} diff --git a/contrib/python/jmespath/jmespath/compat.py b/contrib/python/jmespath/jmespath/compat.py index df3a0e3a876..2ed0fe78792 100644 --- a/contrib/python/jmespath/jmespath/compat.py +++ b/contrib/python/jmespath/jmespath/compat.py @@ -1,65 +1,65 @@ -import sys -import inspect - -PY2 = sys.version_info[0] == 2 - - -def with_metaclass(meta, *bases): - # Taken from flask/six. - class metaclass(meta): - def __new__(cls, name, this_bases, d): - return meta(name, bases, d) - return type.__new__(metaclass, 'temporary_class', (), {}) - - -if PY2: - text_type = unicode - string_type = basestring - from itertools import izip_longest as zip_longest - - def with_str_method(cls): - """Class decorator that handles __str__ compat between py2 and py3.""" - # In python2, the __str__ should be __unicode__ - # and __str__ should return bytes. - cls.__unicode__ = cls.__str__ - def __str__(self): - return self.__unicode__().encode('utf-8') - cls.__str__ = __str__ - return cls - - def with_repr_method(cls): - """Class decorator that handle __repr__ with py2 and py3.""" - # This is almost the same thing as with_str_method *except* - # it uses the unicode_escape encoding. This also means we need to be - # careful encoding the input multiple times, so we only encode - # if we get a unicode type. - original_repr_method = cls.__repr__ - def __repr__(self): - original_repr = original_repr_method(self) - if isinstance(original_repr, text_type): - original_repr = original_repr.encode('unicode_escape') - return original_repr - cls.__repr__ = __repr__ - return cls - - def get_methods(cls): - for name, method in inspect.getmembers(cls, - predicate=inspect.ismethod): - yield name, method - -else: - text_type = str - string_type = str - from itertools import zip_longest - - def with_str_method(cls): - # In python3, we don't need to do anything, we return a str type. - return cls - - def with_repr_method(cls): - return cls - - def get_methods(cls): - for name, method in inspect.getmembers(cls, - predicate=inspect.isfunction): - yield name, method +import sys +import inspect + +PY2 = sys.version_info[0] == 2 + + +def with_metaclass(meta, *bases): + # Taken from flask/six. + class metaclass(meta): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +if PY2: + text_type = unicode + string_type = basestring + from itertools import izip_longest as zip_longest + + def with_str_method(cls): + """Class decorator that handles __str__ compat between py2 and py3.""" + # In python2, the __str__ should be __unicode__ + # and __str__ should return bytes. + cls.__unicode__ = cls.__str__ + def __str__(self): + return self.__unicode__().encode('utf-8') + cls.__str__ = __str__ + return cls + + def with_repr_method(cls): + """Class decorator that handle __repr__ with py2 and py3.""" + # This is almost the same thing as with_str_method *except* + # it uses the unicode_escape encoding. This also means we need to be + # careful encoding the input multiple times, so we only encode + # if we get a unicode type. + original_repr_method = cls.__repr__ + def __repr__(self): + original_repr = original_repr_method(self) + if isinstance(original_repr, text_type): + original_repr = original_repr.encode('unicode_escape') + return original_repr + cls.__repr__ = __repr__ + return cls + + def get_methods(cls): + for name, method in inspect.getmembers(cls, + predicate=inspect.ismethod): + yield name, method + +else: + text_type = str + string_type = str + from itertools import zip_longest + + def with_str_method(cls): + # In python3, we don't need to do anything, we return a str type. + return cls + + def with_repr_method(cls): + return cls + + def get_methods(cls): + for name, method in inspect.getmembers(cls, + predicate=inspect.isfunction): + yield name, method diff --git a/contrib/python/jmespath/jmespath/exceptions.py b/contrib/python/jmespath/jmespath/exceptions.py index 32f2eec4e4c..0156015918b 100644 --- a/contrib/python/jmespath/jmespath/exceptions.py +++ b/contrib/python/jmespath/jmespath/exceptions.py @@ -1,122 +1,122 @@ -from jmespath.compat import with_str_method - - -class JMESPathError(ValueError): - pass - - -@with_str_method -class ParseError(JMESPathError): - _ERROR_MESSAGE = 'Invalid jmespath expression' - def __init__(self, lex_position, token_value, token_type, - msg=_ERROR_MESSAGE): - super(ParseError, self).__init__(lex_position, token_value, token_type) - self.lex_position = lex_position - self.token_value = token_value - self.token_type = token_type.upper() - self.msg = msg - # Whatever catches the ParseError can fill in the full expression - self.expression = None - - def __str__(self): - # self.lex_position +1 to account for the starting double quote char. - underline = ' ' * (self.lex_position + 1) + '^' - return ( - '%s: Parse error at column %s, ' - 'token "%s" (%s), for expression:\n"%s"\n%s' % ( - self.msg, self.lex_position, self.token_value, self.token_type, - self.expression, underline)) - - -@with_str_method -class IncompleteExpressionError(ParseError): - def set_expression(self, expression): - self.expression = expression - self.lex_position = len(expression) - self.token_type = None - self.token_value = None - - def __str__(self): - # self.lex_position +1 to account for the starting double quote char. - underline = ' ' * (self.lex_position + 1) + '^' - return ( - 'Invalid jmespath expression: Incomplete expression:\n' - '"%s"\n%s' % (self.expression, underline)) - - -@with_str_method -class LexerError(ParseError): - def __init__(self, lexer_position, lexer_value, message, expression=None): - self.lexer_position = lexer_position - self.lexer_value = lexer_value - self.message = message - super(LexerError, self).__init__(lexer_position, - lexer_value, - message) - # Whatever catches LexerError can set this. - self.expression = expression - - def __str__(self): - underline = ' ' * self.lexer_position + '^' - return 'Bad jmespath expression: %s:\n%s\n%s' % ( - self.message, self.expression, underline) - - -@with_str_method -class ArityError(ParseError): - def __init__(self, expected, actual, name): - self.expected_arity = expected - self.actual_arity = actual - self.function_name = name - self.expression = None - - def __str__(self): - return ("Expected %s %s for function %s(), " - "received %s" % ( - self.expected_arity, - self._pluralize('argument', self.expected_arity), - self.function_name, - self.actual_arity)) - - def _pluralize(self, word, count): - if count == 1: - return word - else: - return word + 's' - - -@with_str_method -class VariadictArityError(ArityError): - def __str__(self): - return ("Expected at least %s %s for function %s(), " - "received %s" % ( - self.expected_arity, - self._pluralize('argument', self.expected_arity), - self.function_name, - self.actual_arity)) - - -@with_str_method -class JMESPathTypeError(JMESPathError): - def __init__(self, function_name, current_value, actual_type, - expected_types): - self.function_name = function_name - self.current_value = current_value - self.actual_type = actual_type - self.expected_types = expected_types - - def __str__(self): - return ('In function %s(), invalid type for value: %s, ' - 'expected one of: %s, received: "%s"' % ( - self.function_name, self.current_value, - self.expected_types, self.actual_type)) - - -class EmptyExpressionError(JMESPathError): - def __init__(self): - super(EmptyExpressionError, self).__init__( - "Invalid JMESPath expression: cannot be empty.") - - -class UnknownFunctionError(JMESPathError): - pass +from jmespath.compat import with_str_method + + +class JMESPathError(ValueError): + pass + + +@with_str_method +class ParseError(JMESPathError): + _ERROR_MESSAGE = 'Invalid jmespath expression' + def __init__(self, lex_position, token_value, token_type, + msg=_ERROR_MESSAGE): + super(ParseError, self).__init__(lex_position, token_value, token_type) + self.lex_position = lex_position + self.token_value = token_value + self.token_type = token_type.upper() + self.msg = msg + # Whatever catches the ParseError can fill in the full expression + self.expression = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + '%s: Parse error at column %s, ' + 'token "%s" (%s), for expression:\n"%s"\n%s' % ( + self.msg, self.lex_position, self.token_value, self.token_type, + self.expression, underline)) + + +@with_str_method +class IncompleteExpressionError(ParseError): + def set_expression(self, expression): + self.expression = expression + self.lex_position = len(expression) + self.token_type = None + self.token_value = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + 'Invalid jmespath expression: Incomplete expression:\n' + '"%s"\n%s' % (self.expression, underline)) + + +@with_str_method +class LexerError(ParseError): + def __init__(self, lexer_position, lexer_value, message, expression=None): + self.lexer_position = lexer_position + self.lexer_value = lexer_value + self.message = message + super(LexerError, self).__init__(lexer_position, + lexer_value, + message) + # Whatever catches LexerError can set this. + self.expression = expression + + def __str__(self): + underline = ' ' * self.lexer_position + '^' + return 'Bad jmespath expression: %s:\n%s\n%s' % ( + self.message, self.expression, underline) + + +@with_str_method +class ArityError(ParseError): + def __init__(self, expected, actual, name): + self.expected_arity = expected + self.actual_arity = actual + self.function_name = name + self.expression = None + + def __str__(self): + return ("Expected %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + def _pluralize(self, word, count): + if count == 1: + return word + else: + return word + 's' + + +@with_str_method +class VariadictArityError(ArityError): + def __str__(self): + return ("Expected at least %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + +@with_str_method +class JMESPathTypeError(JMESPathError): + def __init__(self, function_name, current_value, actual_type, + expected_types): + self.function_name = function_name + self.current_value = current_value + self.actual_type = actual_type + self.expected_types = expected_types + + def __str__(self): + return ('In function %s(), invalid type for value: %s, ' + 'expected one of: %s, received: "%s"' % ( + self.function_name, self.current_value, + self.expected_types, self.actual_type)) + + +class EmptyExpressionError(JMESPathError): + def __init__(self): + super(EmptyExpressionError, self).__init__( + "Invalid JMESPath expression: cannot be empty.") + + +class UnknownFunctionError(JMESPathError): + pass diff --git a/contrib/python/jmespath/jmespath/functions.py b/contrib/python/jmespath/jmespath/functions.py index 2ab913387b1..31dab051694 100644 --- a/contrib/python/jmespath/jmespath/functions.py +++ b/contrib/python/jmespath/jmespath/functions.py @@ -1,362 +1,362 @@ -import math -import json - -from jmespath import exceptions -from jmespath.compat import string_type as STRING_TYPE -from jmespath.compat import get_methods, with_metaclass - - -# python types -> jmespath types -TYPES_MAP = { - 'bool': 'boolean', - 'list': 'array', - 'dict': 'object', - 'NoneType': 'null', - 'unicode': 'string', - 'str': 'string', - 'float': 'number', - 'int': 'number', - 'long': 'number', - 'OrderedDict': 'object', - '_Projection': 'array', - '_Expression': 'expref', -} - - -# jmespath types -> python types -REVERSE_TYPES_MAP = { - 'boolean': ('bool',), - 'array': ('list', '_Projection'), - 'object': ('dict', 'OrderedDict',), - 'null': ('NoneType',), - 'string': ('unicode', 'str'), - 'number': ('float', 'int', 'long'), - 'expref': ('_Expression',), -} - - -def signature(*arguments): - def _record_signature(func): - func.signature = arguments - return func - return _record_signature - - -class FunctionRegistry(type): - def __init__(cls, name, bases, attrs): - cls._populate_function_table() - super(FunctionRegistry, cls).__init__(name, bases, attrs) - - def _populate_function_table(cls): - function_table = {} - # Any method with a @signature decorator that also - # starts with "_func_" is registered as a function. - # _func_max_by -> max_by function. - for name, method in get_methods(cls): - if not name.startswith('_func_'): - continue - signature = getattr(method, 'signature', None) - if signature is not None: - function_table[name[6:]] = { - 'function': method, - 'signature': signature, - } - cls.FUNCTION_TABLE = function_table - - -class Functions(with_metaclass(FunctionRegistry, object)): - - FUNCTION_TABLE = { - } - - def call_function(self, function_name, resolved_args): - try: - spec = self.FUNCTION_TABLE[function_name] - except KeyError: - raise exceptions.UnknownFunctionError( - "Unknown function: %s()" % function_name) - function = spec['function'] - signature = spec['signature'] - self._validate_arguments(resolved_args, signature, function_name) - return function(self, *resolved_args) - - def _validate_arguments(self, args, signature, function_name): - if signature and signature[-1].get('variadic'): - if len(args) < len(signature): - raise exceptions.VariadictArityError( - len(signature), len(args), function_name) - elif len(args) != len(signature): - raise exceptions.ArityError( - len(signature), len(args), function_name) - return self._type_check(args, signature, function_name) - - def _type_check(self, actual, signature, function_name): - for i in range(len(signature)): - allowed_types = signature[i]['types'] - if allowed_types: - self._type_check_single(actual[i], allowed_types, - function_name) - - def _type_check_single(self, current, types, function_name): - # Type checking involves checking the top level type, - # and in the case of arrays, potentially checking the types - # of each element. - allowed_types, allowed_subtypes = self._get_allowed_pytypes(types) - # We're not using isinstance() on purpose. - # The type model for jmespath does not map - # 1-1 with python types (booleans are considered - # integers in python for example). - actual_typename = type(current).__name__ - if actual_typename not in allowed_types: - raise exceptions.JMESPathTypeError( - function_name, current, - self._convert_to_jmespath_type(actual_typename), types) - # If we're dealing with a list type, we can have - # additional restrictions on the type of the list - # elements (for example a function can require a - # list of numbers or a list of strings). - # Arrays are the only types that can have subtypes. - if allowed_subtypes: - self._subtype_check(current, allowed_subtypes, - types, function_name) - - def _get_allowed_pytypes(self, types): - allowed_types = [] - allowed_subtypes = [] - for t in types: - type_ = t.split('-', 1) - if len(type_) == 2: - type_, subtype = type_ - allowed_subtypes.append(REVERSE_TYPES_MAP[subtype]) - else: - type_ = type_[0] - allowed_types.extend(REVERSE_TYPES_MAP[type_]) - return allowed_types, allowed_subtypes - - def _subtype_check(self, current, allowed_subtypes, types, function_name): - if len(allowed_subtypes) == 1: - # The easy case, we know up front what type - # we need to validate. - allowed_subtypes = allowed_subtypes[0] - for element in current: - actual_typename = type(element).__name__ - if actual_typename not in allowed_subtypes: - raise exceptions.JMESPathTypeError( - function_name, element, actual_typename, types) - elif len(allowed_subtypes) > 1 and current: - # Dynamic type validation. Based on the first - # type we see, we validate that the remaining types - # match. - first = type(current[0]).__name__ - for subtypes in allowed_subtypes: - if first in subtypes: - allowed = subtypes - break - else: - raise exceptions.JMESPathTypeError( - function_name, current[0], first, types) - for element in current: - actual_typename = type(element).__name__ - if actual_typename not in allowed: - raise exceptions.JMESPathTypeError( - function_name, element, actual_typename, types) - - @signature({'types': ['number']}) - def _func_abs(self, arg): - return abs(arg) - - @signature({'types': ['array-number']}) - def _func_avg(self, arg): - if arg: - return sum(arg) / float(len(arg)) - else: - return None - - @signature({'types': [], 'variadic': True}) - def _func_not_null(self, *arguments): - for argument in arguments: - if argument is not None: - return argument - - @signature({'types': []}) - def _func_to_array(self, arg): - if isinstance(arg, list): - return arg - else: - return [arg] - - @signature({'types': []}) - def _func_to_string(self, arg): - if isinstance(arg, STRING_TYPE): - return arg - else: - return json.dumps(arg, separators=(',', ':'), - default=str) - - @signature({'types': []}) - def _func_to_number(self, arg): - if isinstance(arg, (list, dict, bool)): - return None - elif arg is None: - return None - elif isinstance(arg, (int, float)): - return arg - else: - try: - return int(arg) - except ValueError: - try: - return float(arg) - except ValueError: - return None - - @signature({'types': ['array', 'string']}, {'types': []}) - def _func_contains(self, subject, search): - return search in subject - - @signature({'types': ['string', 'array', 'object']}) - def _func_length(self, arg): - return len(arg) - - @signature({'types': ['string']}, {'types': ['string']}) - def _func_ends_with(self, search, suffix): - return search.endswith(suffix) - - @signature({'types': ['string']}, {'types': ['string']}) - def _func_starts_with(self, search, suffix): - return search.startswith(suffix) - - @signature({'types': ['array', 'string']}) - def _func_reverse(self, arg): - if isinstance(arg, STRING_TYPE): - return arg[::-1] - else: - return list(reversed(arg)) - - @signature({"types": ['number']}) - def _func_ceil(self, arg): - return math.ceil(arg) - - @signature({"types": ['number']}) - def _func_floor(self, arg): - return math.floor(arg) - - @signature({"types": ['string']}, {"types": ['array-string']}) - def _func_join(self, separator, array): - return separator.join(array) - - @signature({'types': ['expref']}, {'types': ['array']}) - def _func_map(self, expref, arg): - result = [] - for element in arg: - result.append(expref.visit(expref.expression, element)) - return result - - @signature({"types": ['array-number', 'array-string']}) - def _func_max(self, arg): - if arg: - return max(arg) - else: - return None - - @signature({"types": ["object"], "variadic": True}) - def _func_merge(self, *arguments): - merged = {} - for arg in arguments: - merged.update(arg) - return merged - - @signature({"types": ['array-number', 'array-string']}) - def _func_min(self, arg): - if arg: - return min(arg) - else: - return None - - @signature({"types": ['array-string', 'array-number']}) - def _func_sort(self, arg): - return list(sorted(arg)) - - @signature({"types": ['array-number']}) - def _func_sum(self, arg): - return sum(arg) - - @signature({"types": ['object']}) - def _func_keys(self, arg): - # To be consistent with .values() - # should we also return the indices of a list? - return list(arg.keys()) - - @signature({"types": ['object']}) - def _func_values(self, arg): - return list(arg.values()) - - @signature({'types': []}) - def _func_type(self, arg): - if isinstance(arg, STRING_TYPE): - return "string" - elif isinstance(arg, bool): - return "boolean" - elif isinstance(arg, list): - return "array" - elif isinstance(arg, dict): - return "object" - elif isinstance(arg, (float, int)): - return "number" - elif arg is None: - return "null" - - @signature({'types': ['array']}, {'types': ['expref']}) - def _func_sort_by(self, array, expref): - if not array: - return array - # sort_by allows for the expref to be either a number of - # a string, so we have some special logic to handle this. - # We evaluate the first array element and verify that it's - # either a string of a number. We then create a key function - # that validates that type, which requires that remaining array - # elements resolve to the same type as the first element. - required_type = self._convert_to_jmespath_type( - type(expref.visit(expref.expression, array[0])).__name__) - if required_type not in ['number', 'string']: - raise exceptions.JMESPathTypeError( - 'sort_by', array[0], required_type, ['string', 'number']) - keyfunc = self._create_key_func(expref, - [required_type], - 'sort_by') - return list(sorted(array, key=keyfunc)) - - @signature({'types': ['array']}, {'types': ['expref']}) - def _func_min_by(self, array, expref): - keyfunc = self._create_key_func(expref, - ['number', 'string'], - 'min_by') - if array: - return min(array, key=keyfunc) - else: - return None - - @signature({'types': ['array']}, {'types': ['expref']}) - def _func_max_by(self, array, expref): - keyfunc = self._create_key_func(expref, - ['number', 'string'], - 'max_by') - if array: - return max(array, key=keyfunc) - else: - return None - - def _create_key_func(self, expref, allowed_types, function_name): - def keyfunc(x): - result = expref.visit(expref.expression, x) - actual_typename = type(result).__name__ - jmespath_type = self._convert_to_jmespath_type(actual_typename) - # allowed_types is in term of jmespath types, not python types. - if jmespath_type not in allowed_types: - raise exceptions.JMESPathTypeError( - function_name, result, jmespath_type, allowed_types) - return result - return keyfunc - - def _convert_to_jmespath_type(self, pyobject): - return TYPES_MAP.get(pyobject, 'unknown') +import math +import json + +from jmespath import exceptions +from jmespath.compat import string_type as STRING_TYPE +from jmespath.compat import get_methods, with_metaclass + + +# python types -> jmespath types +TYPES_MAP = { + 'bool': 'boolean', + 'list': 'array', + 'dict': 'object', + 'NoneType': 'null', + 'unicode': 'string', + 'str': 'string', + 'float': 'number', + 'int': 'number', + 'long': 'number', + 'OrderedDict': 'object', + '_Projection': 'array', + '_Expression': 'expref', +} + + +# jmespath types -> python types +REVERSE_TYPES_MAP = { + 'boolean': ('bool',), + 'array': ('list', '_Projection'), + 'object': ('dict', 'OrderedDict',), + 'null': ('NoneType',), + 'string': ('unicode', 'str'), + 'number': ('float', 'int', 'long'), + 'expref': ('_Expression',), +} + + +def signature(*arguments): + def _record_signature(func): + func.signature = arguments + return func + return _record_signature + + +class FunctionRegistry(type): + def __init__(cls, name, bases, attrs): + cls._populate_function_table() + super(FunctionRegistry, cls).__init__(name, bases, attrs) + + def _populate_function_table(cls): + function_table = {} + # Any method with a @signature decorator that also + # starts with "_func_" is registered as a function. + # _func_max_by -> max_by function. + for name, method in get_methods(cls): + if not name.startswith('_func_'): + continue + signature = getattr(method, 'signature', None) + if signature is not None: + function_table[name[6:]] = { + 'function': method, + 'signature': signature, + } + cls.FUNCTION_TABLE = function_table + + +class Functions(with_metaclass(FunctionRegistry, object)): + + FUNCTION_TABLE = { + } + + def call_function(self, function_name, resolved_args): + try: + spec = self.FUNCTION_TABLE[function_name] + except KeyError: + raise exceptions.UnknownFunctionError( + "Unknown function: %s()" % function_name) + function = spec['function'] + signature = spec['signature'] + self._validate_arguments(resolved_args, signature, function_name) + return function(self, *resolved_args) + + def _validate_arguments(self, args, signature, function_name): + if signature and signature[-1].get('variadic'): + if len(args) < len(signature): + raise exceptions.VariadictArityError( + len(signature), len(args), function_name) + elif len(args) != len(signature): + raise exceptions.ArityError( + len(signature), len(args), function_name) + return self._type_check(args, signature, function_name) + + def _type_check(self, actual, signature, function_name): + for i in range(len(signature)): + allowed_types = signature[i]['types'] + if allowed_types: + self._type_check_single(actual[i], allowed_types, + function_name) + + def _type_check_single(self, current, types, function_name): + # Type checking involves checking the top level type, + # and in the case of arrays, potentially checking the types + # of each element. + allowed_types, allowed_subtypes = self._get_allowed_pytypes(types) + # We're not using isinstance() on purpose. + # The type model for jmespath does not map + # 1-1 with python types (booleans are considered + # integers in python for example). + actual_typename = type(current).__name__ + if actual_typename not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, current, + self._convert_to_jmespath_type(actual_typename), types) + # If we're dealing with a list type, we can have + # additional restrictions on the type of the list + # elements (for example a function can require a + # list of numbers or a list of strings). + # Arrays are the only types that can have subtypes. + if allowed_subtypes: + self._subtype_check(current, allowed_subtypes, + types, function_name) + + def _get_allowed_pytypes(self, types): + allowed_types = [] + allowed_subtypes = [] + for t in types: + type_ = t.split('-', 1) + if len(type_) == 2: + type_, subtype = type_ + allowed_subtypes.append(REVERSE_TYPES_MAP[subtype]) + else: + type_ = type_[0] + allowed_types.extend(REVERSE_TYPES_MAP[type_]) + return allowed_types, allowed_subtypes + + def _subtype_check(self, current, allowed_subtypes, types, function_name): + if len(allowed_subtypes) == 1: + # The easy case, we know up front what type + # we need to validate. + allowed_subtypes = allowed_subtypes[0] + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed_subtypes: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + elif len(allowed_subtypes) > 1 and current: + # Dynamic type validation. Based on the first + # type we see, we validate that the remaining types + # match. + first = type(current[0]).__name__ + for subtypes in allowed_subtypes: + if first in subtypes: + allowed = subtypes + break + else: + raise exceptions.JMESPathTypeError( + function_name, current[0], first, types) + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + + @signature({'types': ['number']}) + def _func_abs(self, arg): + return abs(arg) + + @signature({'types': ['array-number']}) + def _func_avg(self, arg): + if arg: + return sum(arg) / float(len(arg)) + else: + return None + + @signature({'types': [], 'variadic': True}) + def _func_not_null(self, *arguments): + for argument in arguments: + if argument is not None: + return argument + + @signature({'types': []}) + def _func_to_array(self, arg): + if isinstance(arg, list): + return arg + else: + return [arg] + + @signature({'types': []}) + def _func_to_string(self, arg): + if isinstance(arg, STRING_TYPE): + return arg + else: + return json.dumps(arg, separators=(',', ':'), + default=str) + + @signature({'types': []}) + def _func_to_number(self, arg): + if isinstance(arg, (list, dict, bool)): + return None + elif arg is None: + return None + elif isinstance(arg, (int, float)): + return arg + else: + try: + return int(arg) + except ValueError: + try: + return float(arg) + except ValueError: + return None + + @signature({'types': ['array', 'string']}, {'types': []}) + def _func_contains(self, subject, search): + return search in subject + + @signature({'types': ['string', 'array', 'object']}) + def _func_length(self, arg): + return len(arg) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_ends_with(self, search, suffix): + return search.endswith(suffix) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_starts_with(self, search, suffix): + return search.startswith(suffix) + + @signature({'types': ['array', 'string']}) + def _func_reverse(self, arg): + if isinstance(arg, STRING_TYPE): + return arg[::-1] + else: + return list(reversed(arg)) + + @signature({"types": ['number']}) + def _func_ceil(self, arg): + return math.ceil(arg) + + @signature({"types": ['number']}) + def _func_floor(self, arg): + return math.floor(arg) + + @signature({"types": ['string']}, {"types": ['array-string']}) + def _func_join(self, separator, array): + return separator.join(array) + + @signature({'types': ['expref']}, {'types': ['array']}) + def _func_map(self, expref, arg): + result = [] + for element in arg: + result.append(expref.visit(expref.expression, element)) + return result + + @signature({"types": ['array-number', 'array-string']}) + def _func_max(self, arg): + if arg: + return max(arg) + else: + return None + + @signature({"types": ["object"], "variadic": True}) + def _func_merge(self, *arguments): + merged = {} + for arg in arguments: + merged.update(arg) + return merged + + @signature({"types": ['array-number', 'array-string']}) + def _func_min(self, arg): + if arg: + return min(arg) + else: + return None + + @signature({"types": ['array-string', 'array-number']}) + def _func_sort(self, arg): + return list(sorted(arg)) + + @signature({"types": ['array-number']}) + def _func_sum(self, arg): + return sum(arg) + + @signature({"types": ['object']}) + def _func_keys(self, arg): + # To be consistent with .values() + # should we also return the indices of a list? + return list(arg.keys()) + + @signature({"types": ['object']}) + def _func_values(self, arg): + return list(arg.values()) + + @signature({'types': []}) + def _func_type(self, arg): + if isinstance(arg, STRING_TYPE): + return "string" + elif isinstance(arg, bool): + return "boolean" + elif isinstance(arg, list): + return "array" + elif isinstance(arg, dict): + return "object" + elif isinstance(arg, (float, int)): + return "number" + elif arg is None: + return "null" + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_sort_by(self, array, expref): + if not array: + return array + # sort_by allows for the expref to be either a number of + # a string, so we have some special logic to handle this. + # We evaluate the first array element and verify that it's + # either a string of a number. We then create a key function + # that validates that type, which requires that remaining array + # elements resolve to the same type as the first element. + required_type = self._convert_to_jmespath_type( + type(expref.visit(expref.expression, array[0])).__name__) + if required_type not in ['number', 'string']: + raise exceptions.JMESPathTypeError( + 'sort_by', array[0], required_type, ['string', 'number']) + keyfunc = self._create_key_func(expref, + [required_type], + 'sort_by') + return list(sorted(array, key=keyfunc)) + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_min_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'min_by') + if array: + return min(array, key=keyfunc) + else: + return None + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_max_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'max_by') + if array: + return max(array, key=keyfunc) + else: + return None + + def _create_key_func(self, expref, allowed_types, function_name): + def keyfunc(x): + result = expref.visit(expref.expression, x) + actual_typename = type(result).__name__ + jmespath_type = self._convert_to_jmespath_type(actual_typename) + # allowed_types is in term of jmespath types, not python types. + if jmespath_type not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, result, jmespath_type, allowed_types) + return result + return keyfunc + + def _convert_to_jmespath_type(self, pyobject): + return TYPES_MAP.get(pyobject, 'unknown') diff --git a/contrib/python/jmespath/jmespath/lexer.py b/contrib/python/jmespath/jmespath/lexer.py index 6062946889d..8db05e37608 100644 --- a/contrib/python/jmespath/jmespath/lexer.py +++ b/contrib/python/jmespath/jmespath/lexer.py @@ -1,208 +1,208 @@ -import string -import warnings -from json import loads - -from jmespath.exceptions import LexerError, EmptyExpressionError - - -class Lexer(object): - START_IDENTIFIER = set(string.ascii_letters + '_') - VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') - VALID_NUMBER = set(string.digits) - WHITESPACE = set(" \t\n\r") - SIMPLE_TOKENS = { - '.': 'dot', - '*': 'star', - ']': 'rbracket', - ',': 'comma', - ':': 'colon', - '@': 'current', - '(': 'lparen', - ')': 'rparen', - '{': 'lbrace', - '}': 'rbrace', - } - - def tokenize(self, expression): - self._initialize_for_expression(expression) - while self._current is not None: - if self._current in self.SIMPLE_TOKENS: - yield {'type': self.SIMPLE_TOKENS[self._current], - 'value': self._current, - 'start': self._position, 'end': self._position + 1} - self._next() - elif self._current in self.START_IDENTIFIER: - start = self._position - buff = self._current - while self._next() in self.VALID_IDENTIFIER: - buff += self._current - yield {'type': 'unquoted_identifier', 'value': buff, - 'start': start, 'end': start + len(buff)} - elif self._current in self.WHITESPACE: - self._next() - elif self._current == '[': - start = self._position - next_char = self._next() - if next_char == ']': - self._next() - yield {'type': 'flatten', 'value': '[]', - 'start': start, 'end': start + 2} - elif next_char == '?': - self._next() - yield {'type': 'filter', 'value': '[?', - 'start': start, 'end': start + 2} - else: - yield {'type': 'lbracket', 'value': '[', - 'start': start, 'end': start + 1} - elif self._current == "'": - yield self._consume_raw_string_literal() - elif self._current == '|': - yield self._match_or_else('|', 'or', 'pipe') - elif self._current == '&': - yield self._match_or_else('&', 'and', 'expref') - elif self._current == '`': - yield self._consume_literal() - elif self._current in self.VALID_NUMBER: - start = self._position - buff = self._consume_number() - yield {'type': 'number', 'value': int(buff), - 'start': start, 'end': start + len(buff)} - elif self._current == '-': - # Negative number. - start = self._position - buff = self._consume_number() - if len(buff) > 1: - yield {'type': 'number', 'value': int(buff), - 'start': start, 'end': start + len(buff)} - else: - raise LexerError(lexer_position=start, - lexer_value=buff, - message="Unknown token '%s'" % buff) - elif self._current == '"': - yield self._consume_quoted_identifier() - elif self._current == '<': - yield self._match_or_else('=', 'lte', 'lt') - elif self._current == '>': - yield self._match_or_else('=', 'gte', 'gt') - elif self._current == '!': - yield self._match_or_else('=', 'ne', 'not') - elif self._current == '=': - if self._next() == '=': - yield {'type': 'eq', 'value': '==', - 'start': self._position - 1, 'end': self._position} - self._next() - else: - if self._current is None: - # If we're at the EOF, we never advanced - # the position so we don't need to rewind - # it back one location. - position = self._position - else: - position = self._position - 1 - raise LexerError( - lexer_position=position, - lexer_value='=', - message="Unknown token '='") - else: - raise LexerError(lexer_position=self._position, - lexer_value=self._current, - message="Unknown token %s" % self._current) - yield {'type': 'eof', 'value': '', - 'start': self._length, 'end': self._length} - - def _consume_number(self): - start = self._position - buff = self._current - while self._next() in self.VALID_NUMBER: - buff += self._current - return buff - - def _initialize_for_expression(self, expression): - if not expression: - raise EmptyExpressionError() - self._position = 0 - self._expression = expression - self._chars = list(self._expression) - self._current = self._chars[self._position] - self._length = len(self._expression) - - def _next(self): - if self._position == self._length - 1: - self._current = None - else: - self._position += 1 - self._current = self._chars[self._position] - return self._current - - def _consume_until(self, delimiter): - # Consume until the delimiter is reached, - # allowing for the delimiter to be escaped with "\". - start = self._position - buff = '' - self._next() - while self._current != delimiter: - if self._current == '\\': - buff += '\\' - self._next() - if self._current is None: - # We're at the EOF. - raise LexerError(lexer_position=start, - lexer_value=self._expression[start:], - message="Unclosed %s delimiter" % delimiter) - buff += self._current - self._next() - # Skip the closing delimiter. - self._next() - return buff - - def _consume_literal(self): - start = self._position - lexeme = self._consume_until('`').replace('\\`', '`') - try: - # Assume it is valid JSON and attempt to parse. - parsed_json = loads(lexeme) - except ValueError: - try: - # Invalid JSON values should be converted to quoted - # JSON strings during the JEP-12 deprecation period. - parsed_json = loads('"%s"' % lexeme.lstrip()) - warnings.warn("deprecated string literal syntax", - PendingDeprecationWarning) - except ValueError: - raise LexerError(lexer_position=start, - lexer_value=self._expression[start:], - message="Bad token %s" % lexeme) - token_len = self._position - start - return {'type': 'literal', 'value': parsed_json, - 'start': start, 'end': token_len} - - def _consume_quoted_identifier(self): - start = self._position - lexeme = '"' + self._consume_until('"') + '"' - try: - token_len = self._position - start - return {'type': 'quoted_identifier', 'value': loads(lexeme), - 'start': start, 'end': token_len} - except ValueError as e: - error_message = str(e).split(':')[0] - raise LexerError(lexer_position=start, - lexer_value=lexeme, - message=error_message) - - def _consume_raw_string_literal(self): - start = self._position - lexeme = self._consume_until("'").replace("\\'", "'") - token_len = self._position - start - return {'type': 'literal', 'value': lexeme, - 'start': start, 'end': token_len} - - def _match_or_else(self, expected, match_type, else_type): - start = self._position - current = self._current - next_char = self._next() - if next_char == expected: - self._next() - return {'type': match_type, 'value': current + next_char, - 'start': start, 'end': start + 1} - return {'type': else_type, 'value': current, - 'start': start, 'end': start} +import string +import warnings +from json import loads + +from jmespath.exceptions import LexerError, EmptyExpressionError + + +class Lexer(object): + START_IDENTIFIER = set(string.ascii_letters + '_') + VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') + VALID_NUMBER = set(string.digits) + WHITESPACE = set(" \t\n\r") + SIMPLE_TOKENS = { + '.': 'dot', + '*': 'star', + ']': 'rbracket', + ',': 'comma', + ':': 'colon', + '@': 'current', + '(': 'lparen', + ')': 'rparen', + '{': 'lbrace', + '}': 'rbrace', + } + + def tokenize(self, expression): + self._initialize_for_expression(expression) + while self._current is not None: + if self._current in self.SIMPLE_TOKENS: + yield {'type': self.SIMPLE_TOKENS[self._current], + 'value': self._current, + 'start': self._position, 'end': self._position + 1} + self._next() + elif self._current in self.START_IDENTIFIER: + start = self._position + buff = self._current + while self._next() in self.VALID_IDENTIFIER: + buff += self._current + yield {'type': 'unquoted_identifier', 'value': buff, + 'start': start, 'end': start + len(buff)} + elif self._current in self.WHITESPACE: + self._next() + elif self._current == '[': + start = self._position + next_char = self._next() + if next_char == ']': + self._next() + yield {'type': 'flatten', 'value': '[]', + 'start': start, 'end': start + 2} + elif next_char == '?': + self._next() + yield {'type': 'filter', 'value': '[?', + 'start': start, 'end': start + 2} + else: + yield {'type': 'lbracket', 'value': '[', + 'start': start, 'end': start + 1} + elif self._current == "'": + yield self._consume_raw_string_literal() + elif self._current == '|': + yield self._match_or_else('|', 'or', 'pipe') + elif self._current == '&': + yield self._match_or_else('&', 'and', 'expref') + elif self._current == '`': + yield self._consume_literal() + elif self._current in self.VALID_NUMBER: + start = self._position + buff = self._consume_number() + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + elif self._current == '-': + # Negative number. + start = self._position + buff = self._consume_number() + if len(buff) > 1: + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + else: + raise LexerError(lexer_position=start, + lexer_value=buff, + message="Unknown token '%s'" % buff) + elif self._current == '"': + yield self._consume_quoted_identifier() + elif self._current == '<': + yield self._match_or_else('=', 'lte', 'lt') + elif self._current == '>': + yield self._match_or_else('=', 'gte', 'gt') + elif self._current == '!': + yield self._match_or_else('=', 'ne', 'not') + elif self._current == '=': + if self._next() == '=': + yield {'type': 'eq', 'value': '==', + 'start': self._position - 1, 'end': self._position} + self._next() + else: + if self._current is None: + # If we're at the EOF, we never advanced + # the position so we don't need to rewind + # it back one location. + position = self._position + else: + position = self._position - 1 + raise LexerError( + lexer_position=position, + lexer_value='=', + message="Unknown token '='") + else: + raise LexerError(lexer_position=self._position, + lexer_value=self._current, + message="Unknown token %s" % self._current) + yield {'type': 'eof', 'value': '', + 'start': self._length, 'end': self._length} + + def _consume_number(self): + start = self._position + buff = self._current + while self._next() in self.VALID_NUMBER: + buff += self._current + return buff + + def _initialize_for_expression(self, expression): + if not expression: + raise EmptyExpressionError() + self._position = 0 + self._expression = expression + self._chars = list(self._expression) + self._current = self._chars[self._position] + self._length = len(self._expression) + + def _next(self): + if self._position == self._length - 1: + self._current = None + else: + self._position += 1 + self._current = self._chars[self._position] + return self._current + + def _consume_until(self, delimiter): + # Consume until the delimiter is reached, + # allowing for the delimiter to be escaped with "\". + start = self._position + buff = '' + self._next() + while self._current != delimiter: + if self._current == '\\': + buff += '\\' + self._next() + if self._current is None: + # We're at the EOF. + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Unclosed %s delimiter" % delimiter) + buff += self._current + self._next() + # Skip the closing delimiter. + self._next() + return buff + + def _consume_literal(self): + start = self._position + lexeme = self._consume_until('`').replace('\\`', '`') + try: + # Assume it is valid JSON and attempt to parse. + parsed_json = loads(lexeme) + except ValueError: + try: + # Invalid JSON values should be converted to quoted + # JSON strings during the JEP-12 deprecation period. + parsed_json = loads('"%s"' % lexeme.lstrip()) + warnings.warn("deprecated string literal syntax", + PendingDeprecationWarning) + except ValueError: + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Bad token %s" % lexeme) + token_len = self._position - start + return {'type': 'literal', 'value': parsed_json, + 'start': start, 'end': token_len} + + def _consume_quoted_identifier(self): + start = self._position + lexeme = '"' + self._consume_until('"') + '"' + try: + token_len = self._position - start + return {'type': 'quoted_identifier', 'value': loads(lexeme), + 'start': start, 'end': token_len} + except ValueError as e: + error_message = str(e).split(':')[0] + raise LexerError(lexer_position=start, + lexer_value=lexeme, + message=error_message) + + def _consume_raw_string_literal(self): + start = self._position + lexeme = self._consume_until("'").replace("\\'", "'") + token_len = self._position - start + return {'type': 'literal', 'value': lexeme, + 'start': start, 'end': token_len} + + def _match_or_else(self, expected, match_type, else_type): + start = self._position + current = self._current + next_char = self._next() + if next_char == expected: + self._next() + return {'type': match_type, 'value': current + next_char, + 'start': start, 'end': start + 1} + return {'type': else_type, 'value': current, + 'start': start, 'end': start} diff --git a/contrib/python/jmespath/jmespath/parser.py b/contrib/python/jmespath/jmespath/parser.py index 7508a555f83..eeac38fa897 100644 --- a/contrib/python/jmespath/jmespath/parser.py +++ b/contrib/python/jmespath/jmespath/parser.py @@ -1,527 +1,527 @@ -"""Top down operator precedence parser. - -This is an implementation of Vaughan R. Pratt's -"Top Down Operator Precedence" parser. -(http://dl.acm.org/citation.cfm?doid=512927.512931). - -These are some additional resources that help explain the -general idea behind a Pratt parser: - -* http://effbot.org/zone/simple-top-down-parsing.htm -* http://javascript.crockford.com/tdop/tdop.html - -A few notes on the implementation. - -* All the nud/led tokens are on the Parser class itself, and are dispatched - using getattr(). This keeps all the parsing logic contained to a single - class. -* We use two passes through the data. One to create a list of token, - then one pass through the tokens to create the AST. While the lexer actually - yields tokens, we convert it to a list so we can easily implement two tokens - of lookahead. A previous implementation used a fixed circular buffer, but it - was significantly slower. Also, the average jmespath expression typically - does not have a large amount of token so this is not an issue. And - interestingly enough, creating a token list first is actually faster than - consuming from the token iterator one token at a time. - -""" -import random - -from jmespath import lexer -from jmespath.compat import with_repr_method -from jmespath import ast -from jmespath import exceptions -from jmespath import visitor - - -class Parser(object): - BINDING_POWER = { - 'eof': 0, - 'unquoted_identifier': 0, - 'quoted_identifier': 0, - 'literal': 0, - 'rbracket': 0, - 'rparen': 0, - 'comma': 0, - 'rbrace': 0, - 'number': 0, - 'current': 0, - 'expref': 0, - 'colon': 0, - 'pipe': 1, - 'or': 2, - 'and': 3, - 'eq': 5, - 'gt': 5, - 'lt': 5, - 'gte': 5, - 'lte': 5, - 'ne': 5, - 'flatten': 9, - # Everything above stops a projection. - 'star': 20, - 'filter': 21, - 'dot': 40, - 'not': 45, - 'lbrace': 50, - 'lbracket': 55, - 'lparen': 60, - } - # The maximum binding power for a token that can stop - # a projection. - _PROJECTION_STOP = 10 - # The _MAX_SIZE most recent expressions are cached in - # _CACHE dict. - _CACHE = {} - _MAX_SIZE = 128 - - def __init__(self, lookahead=2): - self.tokenizer = None - self._tokens = [None] * lookahead - self._buffer_size = lookahead - self._index = 0 - - def parse(self, expression): - cached = self._CACHE.get(expression) - if cached is not None: - return cached - parsed_result = self._do_parse(expression) - self._CACHE[expression] = parsed_result - if len(self._CACHE) > self._MAX_SIZE: - self._free_cache_entries() - return parsed_result - - def _do_parse(self, expression): - try: - return self._parse(expression) - except exceptions.LexerError as e: - e.expression = expression - raise - except exceptions.IncompleteExpressionError as e: - e.set_expression(expression) - raise - except exceptions.ParseError as e: - e.expression = expression - raise - - def _parse(self, expression): - self.tokenizer = lexer.Lexer().tokenize(expression) - self._tokens = list(self.tokenizer) - self._index = 0 - parsed = self._expression(binding_power=0) - if not self._current_token() == 'eof': - t = self._lookahead_token(0) - raise exceptions.ParseError(t['start'], t['value'], t['type'], - "Unexpected token: %s" % t['value']) - return ParsedResult(expression, parsed) - - def _expression(self, binding_power=0): - left_token = self._lookahead_token(0) - self._advance() - nud_function = getattr( - self, '_token_nud_%s' % left_token['type'], - self._error_nud_token) - left = nud_function(left_token) - current_token = self._current_token() - while binding_power < self.BINDING_POWER[current_token]: - led = getattr(self, '_token_led_%s' % current_token, None) - if led is None: - error_token = self._lookahead_token(0) - self._error_led_token(error_token) - else: - self._advance() - left = led(left) - current_token = self._current_token() - return left - - def _token_nud_literal(self, token): - return ast.literal(token['value']) - - def _token_nud_unquoted_identifier(self, token): - return ast.field(token['value']) - - def _token_nud_quoted_identifier(self, token): - field = ast.field(token['value']) - # You can't have a quoted identifier as a function - # name. - if self._current_token() == 'lparen': - t = self._lookahead_token(0) - raise exceptions.ParseError( - 0, t['value'], t['type'], - 'Quoted identifier not allowed for function names.') - return field - - def _token_nud_star(self, token): - left = ast.identity() - if self._current_token() == 'rbracket': - right = ast.identity() - else: - right = self._parse_projection_rhs(self.BINDING_POWER['star']) - return ast.value_projection(left, right) - - def _token_nud_filter(self, token): - return self._token_led_filter(ast.identity()) - - def _token_nud_lbrace(self, token): - return self._parse_multi_select_hash() - - def _token_nud_lparen(self, token): - expression = self._expression() - self._match('rparen') - return expression - - def _token_nud_flatten(self, token): - left = ast.flatten(ast.identity()) - right = self._parse_projection_rhs( - self.BINDING_POWER['flatten']) - return ast.projection(left, right) - - def _token_nud_not(self, token): - expr = self._expression(self.BINDING_POWER['not']) - return ast.not_expression(expr) - - def _token_nud_lbracket(self, token): - if self._current_token() in ['number', 'colon']: - right = self._parse_index_expression() - # We could optimize this and remove the identity() node. - # We don't really need an index_expression node, we can - # just use emit an index node here if we're not dealing - # with a slice. - return self._project_if_slice(ast.identity(), right) - elif self._current_token() == 'star' and \ - self._lookahead(1) == 'rbracket': - self._advance() - self._advance() - right = self._parse_projection_rhs(self.BINDING_POWER['star']) - return ast.projection(ast.identity(), right) - else: - return self._parse_multi_select_list() - - def _parse_index_expression(self): - # We're here: - # [<current> - # ^ - # | current token - if (self._lookahead(0) == 'colon' or - self._lookahead(1) == 'colon'): - return self._parse_slice_expression() - else: - # Parse the syntax [number] - node = ast.index(self._lookahead_token(0)['value']) - self._advance() - self._match('rbracket') - return node - - def _parse_slice_expression(self): - # [start:end:step] - # Where start, end, and step are optional. - # The last colon is optional as well. - parts = [None, None, None] - index = 0 - current_token = self._current_token() - while not current_token == 'rbracket' and index < 3: - if current_token == 'colon': - index += 1 - if index == 3: - self._raise_parse_error_for_token( - self._lookahead_token(0), 'syntax error') - self._advance() - elif current_token == 'number': - parts[index] = self._lookahead_token(0)['value'] - self._advance() - else: - self._raise_parse_error_for_token( - self._lookahead_token(0), 'syntax error') - current_token = self._current_token() - self._match('rbracket') - return ast.slice(*parts) - - def _token_nud_current(self, token): - return ast.current_node() - - def _token_nud_expref(self, token): - expression = self._expression(self.BINDING_POWER['expref']) - return ast.expref(expression) - - def _token_led_dot(self, left): - if not self._current_token() == 'star': - right = self._parse_dot_rhs(self.BINDING_POWER['dot']) - if left['type'] == 'subexpression': - left['children'].append(right) - return left - else: - return ast.subexpression([left, right]) - else: - # We're creating a projection. - self._advance() - right = self._parse_projection_rhs( - self.BINDING_POWER['dot']) - return ast.value_projection(left, right) - - def _token_led_pipe(self, left): - right = self._expression(self.BINDING_POWER['pipe']) - return ast.pipe(left, right) - - def _token_led_or(self, left): - right = self._expression(self.BINDING_POWER['or']) - return ast.or_expression(left, right) - - def _token_led_and(self, left): - right = self._expression(self.BINDING_POWER['and']) - return ast.and_expression(left, right) - - def _token_led_lparen(self, left): - if left['type'] != 'field': - # 0 - first func arg or closing paren. - # -1 - '(' token - # -2 - invalid function "name". - prev_t = self._lookahead_token(-2) - raise exceptions.ParseError( - prev_t['start'], prev_t['value'], prev_t['type'], - "Invalid function name '%s'" % prev_t['value']) - name = left['value'] - args = [] - while not self._current_token() == 'rparen': - expression = self._expression() - if self._current_token() == 'comma': - self._match('comma') - args.append(expression) - self._match('rparen') - function_node = ast.function_expression(name, args) - return function_node - - def _token_led_filter(self, left): - # Filters are projections. - condition = self._expression(0) - self._match('rbracket') - if self._current_token() == 'flatten': - right = ast.identity() - else: - right = self._parse_projection_rhs(self.BINDING_POWER['filter']) - return ast.filter_projection(left, right, condition) - - def _token_led_eq(self, left): - return self._parse_comparator(left, 'eq') - - def _token_led_ne(self, left): - return self._parse_comparator(left, 'ne') - - def _token_led_gt(self, left): - return self._parse_comparator(left, 'gt') - - def _token_led_gte(self, left): - return self._parse_comparator(left, 'gte') - - def _token_led_lt(self, left): - return self._parse_comparator(left, 'lt') - - def _token_led_lte(self, left): - return self._parse_comparator(left, 'lte') - - def _token_led_flatten(self, left): - left = ast.flatten(left) - right = self._parse_projection_rhs( - self.BINDING_POWER['flatten']) - return ast.projection(left, right) - - def _token_led_lbracket(self, left): - token = self._lookahead_token(0) - if token['type'] in ['number', 'colon']: - right = self._parse_index_expression() - if left['type'] == 'index_expression': - # Optimization: if the left node is an index expr, - # we can avoid creating another node and instead just add - # the right node as a child of the left. - left['children'].append(right) - return left - else: - return self._project_if_slice(left, right) - else: - # We have a projection - self._match('star') - self._match('rbracket') - right = self._parse_projection_rhs(self.BINDING_POWER['star']) - return ast.projection(left, right) - - def _project_if_slice(self, left, right): - index_expr = ast.index_expression([left, right]) - if right['type'] == 'slice': - return ast.projection( - index_expr, - self._parse_projection_rhs(self.BINDING_POWER['star'])) - else: - return index_expr - - def _parse_comparator(self, left, comparator): - right = self._expression(self.BINDING_POWER[comparator]) - return ast.comparator(comparator, left, right) - - def _parse_multi_select_list(self): - expressions = [] - while True: - expression = self._expression() - expressions.append(expression) - if self._current_token() == 'rbracket': - break - else: - self._match('comma') - self._match('rbracket') - return ast.multi_select_list(expressions) - - def _parse_multi_select_hash(self): - pairs = [] - while True: - key_token = self._lookahead_token(0) - # Before getting the token value, verify it's - # an identifier. - self._match_multiple_tokens( - token_types=['quoted_identifier', 'unquoted_identifier']) - key_name = key_token['value'] - self._match('colon') - value = self._expression(0) - node = ast.key_val_pair(key_name=key_name, node=value) - pairs.append(node) - if self._current_token() == 'comma': - self._match('comma') - elif self._current_token() == 'rbrace': - self._match('rbrace') - break - return ast.multi_select_dict(nodes=pairs) - - def _parse_projection_rhs(self, binding_power): - # Parse the right hand side of the projection. - if self.BINDING_POWER[self._current_token()] < self._PROJECTION_STOP: - # BP of 10 are all the tokens that stop a projection. - right = ast.identity() - elif self._current_token() == 'lbracket': - right = self._expression(binding_power) - elif self._current_token() == 'filter': - right = self._expression(binding_power) - elif self._current_token() == 'dot': - self._match('dot') - right = self._parse_dot_rhs(binding_power) - else: - self._raise_parse_error_for_token(self._lookahead_token(0), - 'syntax error') - return right - - def _parse_dot_rhs(self, binding_power): - # From the grammar: - # expression '.' ( identifier / - # multi-select-list / - # multi-select-hash / - # function-expression / - # * - # In terms of tokens that means that after a '.', - # you can have: - lookahead = self._current_token() - # Common case "foo.bar", so first check for an identifier. - if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']: - return self._expression(binding_power) - elif lookahead == 'lbracket': - self._match('lbracket') - return self._parse_multi_select_list() - elif lookahead == 'lbrace': - self._match('lbrace') - return self._parse_multi_select_hash() - else: - t = self._lookahead_token(0) - allowed = ['quoted_identifier', 'unquoted_identifier', - 'lbracket', 'lbrace'] - msg = ( - "Expecting: %s, got: %s" % (allowed, t['type']) - ) - self._raise_parse_error_for_token(t, msg) - - def _error_nud_token(self, token): - if token['type'] == 'eof': - raise exceptions.IncompleteExpressionError( - token['start'], token['value'], token['type']) - self._raise_parse_error_for_token(token, 'invalid token') - - def _error_led_token(self, token): - self._raise_parse_error_for_token(token, 'invalid token') - - def _match(self, token_type=None): - # inline'd self._current_token() - if self._current_token() == token_type: - # inline'd self._advance() - self._advance() - else: - self._raise_parse_error_maybe_eof( - token_type, self._lookahead_token(0)) - - def _match_multiple_tokens(self, token_types): - if self._current_token() not in token_types: - self._raise_parse_error_maybe_eof( - token_types, self._lookahead_token(0)) - self._advance() - - def _advance(self): - self._index += 1 - - def _current_token(self): - return self._tokens[self._index]['type'] - - def _lookahead(self, number): - return self._tokens[self._index + number]['type'] - - def _lookahead_token(self, number): - return self._tokens[self._index + number] - - def _raise_parse_error_for_token(self, token, reason): - lex_position = token['start'] - actual_value = token['value'] - actual_type = token['type'] - raise exceptions.ParseError(lex_position, actual_value, - actual_type, reason) - - def _raise_parse_error_maybe_eof(self, expected_type, token): - lex_position = token['start'] - actual_value = token['value'] - actual_type = token['type'] - if actual_type == 'eof': - raise exceptions.IncompleteExpressionError( - lex_position, actual_value, actual_type) - message = 'Expecting: %s, got: %s' % (expected_type, - actual_type) - raise exceptions.ParseError( - lex_position, actual_value, actual_type, message) - - def _free_cache_entries(self): - for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)): +"""Top down operator precedence parser. + +This is an implementation of Vaughan R. Pratt's +"Top Down Operator Precedence" parser. +(http://dl.acm.org/citation.cfm?doid=512927.512931). + +These are some additional resources that help explain the +general idea behind a Pratt parser: + +* http://effbot.org/zone/simple-top-down-parsing.htm +* http://javascript.crockford.com/tdop/tdop.html + +A few notes on the implementation. + +* All the nud/led tokens are on the Parser class itself, and are dispatched + using getattr(). This keeps all the parsing logic contained to a single + class. +* We use two passes through the data. One to create a list of token, + then one pass through the tokens to create the AST. While the lexer actually + yields tokens, we convert it to a list so we can easily implement two tokens + of lookahead. A previous implementation used a fixed circular buffer, but it + was significantly slower. Also, the average jmespath expression typically + does not have a large amount of token so this is not an issue. And + interestingly enough, creating a token list first is actually faster than + consuming from the token iterator one token at a time. + +""" +import random + +from jmespath import lexer +from jmespath.compat import with_repr_method +from jmespath import ast +from jmespath import exceptions +from jmespath import visitor + + +class Parser(object): + BINDING_POWER = { + 'eof': 0, + 'unquoted_identifier': 0, + 'quoted_identifier': 0, + 'literal': 0, + 'rbracket': 0, + 'rparen': 0, + 'comma': 0, + 'rbrace': 0, + 'number': 0, + 'current': 0, + 'expref': 0, + 'colon': 0, + 'pipe': 1, + 'or': 2, + 'and': 3, + 'eq': 5, + 'gt': 5, + 'lt': 5, + 'gte': 5, + 'lte': 5, + 'ne': 5, + 'flatten': 9, + # Everything above stops a projection. + 'star': 20, + 'filter': 21, + 'dot': 40, + 'not': 45, + 'lbrace': 50, + 'lbracket': 55, + 'lparen': 60, + } + # The maximum binding power for a token that can stop + # a projection. + _PROJECTION_STOP = 10 + # The _MAX_SIZE most recent expressions are cached in + # _CACHE dict. + _CACHE = {} + _MAX_SIZE = 128 + + def __init__(self, lookahead=2): + self.tokenizer = None + self._tokens = [None] * lookahead + self._buffer_size = lookahead + self._index = 0 + + def parse(self, expression): + cached = self._CACHE.get(expression) + if cached is not None: + return cached + parsed_result = self._do_parse(expression) + self._CACHE[expression] = parsed_result + if len(self._CACHE) > self._MAX_SIZE: + self._free_cache_entries() + return parsed_result + + def _do_parse(self, expression): + try: + return self._parse(expression) + except exceptions.LexerError as e: + e.expression = expression + raise + except exceptions.IncompleteExpressionError as e: + e.set_expression(expression) + raise + except exceptions.ParseError as e: + e.expression = expression + raise + + def _parse(self, expression): + self.tokenizer = lexer.Lexer().tokenize(expression) + self._tokens = list(self.tokenizer) + self._index = 0 + parsed = self._expression(binding_power=0) + if not self._current_token() == 'eof': + t = self._lookahead_token(0) + raise exceptions.ParseError(t['start'], t['value'], t['type'], + "Unexpected token: %s" % t['value']) + return ParsedResult(expression, parsed) + + def _expression(self, binding_power=0): + left_token = self._lookahead_token(0) + self._advance() + nud_function = getattr( + self, '_token_nud_%s' % left_token['type'], + self._error_nud_token) + left = nud_function(left_token) + current_token = self._current_token() + while binding_power < self.BINDING_POWER[current_token]: + led = getattr(self, '_token_led_%s' % current_token, None) + if led is None: + error_token = self._lookahead_token(0) + self._error_led_token(error_token) + else: + self._advance() + left = led(left) + current_token = self._current_token() + return left + + def _token_nud_literal(self, token): + return ast.literal(token['value']) + + def _token_nud_unquoted_identifier(self, token): + return ast.field(token['value']) + + def _token_nud_quoted_identifier(self, token): + field = ast.field(token['value']) + # You can't have a quoted identifier as a function + # name. + if self._current_token() == 'lparen': + t = self._lookahead_token(0) + raise exceptions.ParseError( + 0, t['value'], t['type'], + 'Quoted identifier not allowed for function names.') + return field + + def _token_nud_star(self, token): + left = ast.identity() + if self._current_token() == 'rbracket': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.value_projection(left, right) + + def _token_nud_filter(self, token): + return self._token_led_filter(ast.identity()) + + def _token_nud_lbrace(self, token): + return self._parse_multi_select_hash() + + def _token_nud_lparen(self, token): + expression = self._expression() + self._match('rparen') + return expression + + def _token_nud_flatten(self, token): + left = ast.flatten(ast.identity()) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_nud_not(self, token): + expr = self._expression(self.BINDING_POWER['not']) + return ast.not_expression(expr) + + def _token_nud_lbracket(self, token): + if self._current_token() in ['number', 'colon']: + right = self._parse_index_expression() + # We could optimize this and remove the identity() node. + # We don't really need an index_expression node, we can + # just use emit an index node here if we're not dealing + # with a slice. + return self._project_if_slice(ast.identity(), right) + elif self._current_token() == 'star' and \ + self._lookahead(1) == 'rbracket': + self._advance() + self._advance() + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(ast.identity(), right) + else: + return self._parse_multi_select_list() + + def _parse_index_expression(self): + # We're here: + # [<current> + # ^ + # | current token + if (self._lookahead(0) == 'colon' or + self._lookahead(1) == 'colon'): + return self._parse_slice_expression() + else: + # Parse the syntax [number] + node = ast.index(self._lookahead_token(0)['value']) + self._advance() + self._match('rbracket') + return node + + def _parse_slice_expression(self): + # [start:end:step] + # Where start, end, and step are optional. + # The last colon is optional as well. + parts = [None, None, None] + index = 0 + current_token = self._current_token() + while not current_token == 'rbracket' and index < 3: + if current_token == 'colon': + index += 1 + if index == 3: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + self._advance() + elif current_token == 'number': + parts[index] = self._lookahead_token(0)['value'] + self._advance() + else: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + current_token = self._current_token() + self._match('rbracket') + return ast.slice(*parts) + + def _token_nud_current(self, token): + return ast.current_node() + + def _token_nud_expref(self, token): + expression = self._expression(self.BINDING_POWER['expref']) + return ast.expref(expression) + + def _token_led_dot(self, left): + if not self._current_token() == 'star': + right = self._parse_dot_rhs(self.BINDING_POWER['dot']) + if left['type'] == 'subexpression': + left['children'].append(right) + return left + else: + return ast.subexpression([left, right]) + else: + # We're creating a projection. + self._advance() + right = self._parse_projection_rhs( + self.BINDING_POWER['dot']) + return ast.value_projection(left, right) + + def _token_led_pipe(self, left): + right = self._expression(self.BINDING_POWER['pipe']) + return ast.pipe(left, right) + + def _token_led_or(self, left): + right = self._expression(self.BINDING_POWER['or']) + return ast.or_expression(left, right) + + def _token_led_and(self, left): + right = self._expression(self.BINDING_POWER['and']) + return ast.and_expression(left, right) + + def _token_led_lparen(self, left): + if left['type'] != 'field': + # 0 - first func arg or closing paren. + # -1 - '(' token + # -2 - invalid function "name". + prev_t = self._lookahead_token(-2) + raise exceptions.ParseError( + prev_t['start'], prev_t['value'], prev_t['type'], + "Invalid function name '%s'" % prev_t['value']) + name = left['value'] + args = [] + while not self._current_token() == 'rparen': + expression = self._expression() + if self._current_token() == 'comma': + self._match('comma') + args.append(expression) + self._match('rparen') + function_node = ast.function_expression(name, args) + return function_node + + def _token_led_filter(self, left): + # Filters are projections. + condition = self._expression(0) + self._match('rbracket') + if self._current_token() == 'flatten': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['filter']) + return ast.filter_projection(left, right, condition) + + def _token_led_eq(self, left): + return self._parse_comparator(left, 'eq') + + def _token_led_ne(self, left): + return self._parse_comparator(left, 'ne') + + def _token_led_gt(self, left): + return self._parse_comparator(left, 'gt') + + def _token_led_gte(self, left): + return self._parse_comparator(left, 'gte') + + def _token_led_lt(self, left): + return self._parse_comparator(left, 'lt') + + def _token_led_lte(self, left): + return self._parse_comparator(left, 'lte') + + def _token_led_flatten(self, left): + left = ast.flatten(left) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_led_lbracket(self, left): + token = self._lookahead_token(0) + if token['type'] in ['number', 'colon']: + right = self._parse_index_expression() + if left['type'] == 'index_expression': + # Optimization: if the left node is an index expr, + # we can avoid creating another node and instead just add + # the right node as a child of the left. + left['children'].append(right) + return left + else: + return self._project_if_slice(left, right) + else: + # We have a projection + self._match('star') + self._match('rbracket') + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(left, right) + + def _project_if_slice(self, left, right): + index_expr = ast.index_expression([left, right]) + if right['type'] == 'slice': + return ast.projection( + index_expr, + self._parse_projection_rhs(self.BINDING_POWER['star'])) + else: + return index_expr + + def _parse_comparator(self, left, comparator): + right = self._expression(self.BINDING_POWER[comparator]) + return ast.comparator(comparator, left, right) + + def _parse_multi_select_list(self): + expressions = [] + while True: + expression = self._expression() + expressions.append(expression) + if self._current_token() == 'rbracket': + break + else: + self._match('comma') + self._match('rbracket') + return ast.multi_select_list(expressions) + + def _parse_multi_select_hash(self): + pairs = [] + while True: + key_token = self._lookahead_token(0) + # Before getting the token value, verify it's + # an identifier. + self._match_multiple_tokens( + token_types=['quoted_identifier', 'unquoted_identifier']) + key_name = key_token['value'] + self._match('colon') + value = self._expression(0) + node = ast.key_val_pair(key_name=key_name, node=value) + pairs.append(node) + if self._current_token() == 'comma': + self._match('comma') + elif self._current_token() == 'rbrace': + self._match('rbrace') + break + return ast.multi_select_dict(nodes=pairs) + + def _parse_projection_rhs(self, binding_power): + # Parse the right hand side of the projection. + if self.BINDING_POWER[self._current_token()] < self._PROJECTION_STOP: + # BP of 10 are all the tokens that stop a projection. + right = ast.identity() + elif self._current_token() == 'lbracket': + right = self._expression(binding_power) + elif self._current_token() == 'filter': + right = self._expression(binding_power) + elif self._current_token() == 'dot': + self._match('dot') + right = self._parse_dot_rhs(binding_power) + else: + self._raise_parse_error_for_token(self._lookahead_token(0), + 'syntax error') + return right + + def _parse_dot_rhs(self, binding_power): + # From the grammar: + # expression '.' ( identifier / + # multi-select-list / + # multi-select-hash / + # function-expression / + # * + # In terms of tokens that means that after a '.', + # you can have: + lookahead = self._current_token() + # Common case "foo.bar", so first check for an identifier. + if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']: + return self._expression(binding_power) + elif lookahead == 'lbracket': + self._match('lbracket') + return self._parse_multi_select_list() + elif lookahead == 'lbrace': + self._match('lbrace') + return self._parse_multi_select_hash() + else: + t = self._lookahead_token(0) + allowed = ['quoted_identifier', 'unquoted_identifier', + 'lbracket', 'lbrace'] + msg = ( + "Expecting: %s, got: %s" % (allowed, t['type']) + ) + self._raise_parse_error_for_token(t, msg) + + def _error_nud_token(self, token): + if token['type'] == 'eof': + raise exceptions.IncompleteExpressionError( + token['start'], token['value'], token['type']) + self._raise_parse_error_for_token(token, 'invalid token') + + def _error_led_token(self, token): + self._raise_parse_error_for_token(token, 'invalid token') + + def _match(self, token_type=None): + # inline'd self._current_token() + if self._current_token() == token_type: + # inline'd self._advance() + self._advance() + else: + self._raise_parse_error_maybe_eof( + token_type, self._lookahead_token(0)) + + def _match_multiple_tokens(self, token_types): + if self._current_token() not in token_types: + self._raise_parse_error_maybe_eof( + token_types, self._lookahead_token(0)) + self._advance() + + def _advance(self): + self._index += 1 + + def _current_token(self): + return self._tokens[self._index]['type'] + + def _lookahead(self, number): + return self._tokens[self._index + number]['type'] + + def _lookahead_token(self, number): + return self._tokens[self._index + number] + + def _raise_parse_error_for_token(self, token, reason): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + raise exceptions.ParseError(lex_position, actual_value, + actual_type, reason) + + def _raise_parse_error_maybe_eof(self, expected_type, token): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + if actual_type == 'eof': + raise exceptions.IncompleteExpressionError( + lex_position, actual_value, actual_type) + message = 'Expecting: %s, got: %s' % (expected_type, + actual_type) + raise exceptions.ParseError( + lex_position, actual_value, actual_type, message) + + def _free_cache_entries(self): + for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)): self._CACHE.pop(key, None) - - @classmethod - def purge(cls): - """Clear the expression compilation cache.""" - cls._CACHE.clear() - - -@with_repr_method -class ParsedResult(object): - def __init__(self, expression, parsed): - self.expression = expression - self.parsed = parsed - - def search(self, value, options=None): - interpreter = visitor.TreeInterpreter(options) - result = interpreter.visit(self.parsed, value) - return result - - def _render_dot_file(self): - """Render the parsed AST as a dot file. - - Note that this is marked as an internal method because - the AST is an implementation detail and is subject - to change. This method can be used to help troubleshoot - or for development purposes, but is not considered part - of the public supported API. Use at your own risk. - - """ - renderer = visitor.GraphvizVisitor() - contents = renderer.visit(self.parsed) - return contents - - def __repr__(self): - return repr(self.parsed) + + @classmethod + def purge(cls): + """Clear the expression compilation cache.""" + cls._CACHE.clear() + + +@with_repr_method +class ParsedResult(object): + def __init__(self, expression, parsed): + self.expression = expression + self.parsed = parsed + + def search(self, value, options=None): + interpreter = visitor.TreeInterpreter(options) + result = interpreter.visit(self.parsed, value) + return result + + def _render_dot_file(self): + """Render the parsed AST as a dot file. + + Note that this is marked as an internal method because + the AST is an implementation detail and is subject + to change. This method can be used to help troubleshoot + or for development purposes, but is not considered part + of the public supported API. Use at your own risk. + + """ + renderer = visitor.GraphvizVisitor() + contents = renderer.visit(self.parsed) + return contents + + def __repr__(self): + return repr(self.parsed) diff --git a/contrib/python/jmespath/jmespath/visitor.py b/contrib/python/jmespath/jmespath/visitor.py index 34f4da5973b..b3e846b7614 100644 --- a/contrib/python/jmespath/jmespath/visitor.py +++ b/contrib/python/jmespath/jmespath/visitor.py @@ -1,328 +1,328 @@ -import operator - -from jmespath import functions -from jmespath.compat import string_type -from numbers import Number - - -def _equals(x, y): - if _is_special_integer_case(x, y): - return False - else: - return x == y - - -def _is_special_integer_case(x, y): - # We need to special case comparing 0 or 1 to - # True/False. While normally comparing any - # integer other than 0/1 to True/False will always - # return False. However 0/1 have this: - # >>> 0 == True - # False - # >>> 0 == False - # True - # >>> 1 == True - # True - # >>> 1 == False - # False - # - # Also need to consider that: - # >>> 0 in [True, False] - # True +import operator + +from jmespath import functions +from jmespath.compat import string_type +from numbers import Number + + +def _equals(x, y): + if _is_special_integer_case(x, y): + return False + else: + return x == y + + +def _is_special_integer_case(x, y): + # We need to special case comparing 0 or 1 to + # True/False. While normally comparing any + # integer other than 0/1 to True/False will always + # return False. However 0/1 have this: + # >>> 0 == True + # False + # >>> 0 == False + # True + # >>> 1 == True + # True + # >>> 1 == False + # False + # + # Also need to consider that: + # >>> 0 in [True, False] + # True if type(x) is int and (x == 0 or x == 1): - return y is True or y is False + return y is True or y is False elif type(y) is int and (y == 0 or y == 1): - return x is True or x is False - - -def _is_comparable(x): - # The spec doesn't officially support string types yet, - # but enough people are relying on this behavior that - # it's been added back. This should eventually become - # part of the official spec. - return _is_actual_number(x) or isinstance(x, string_type) - - -def _is_actual_number(x): - # We need to handle python's quirkiness with booleans, - # specifically: - # - # >>> isinstance(False, int) - # True - # >>> isinstance(True, int) - # True - if x is True or x is False: - return False - return isinstance(x, Number) - - -class Options(object): - """Options to control how a JMESPath function is evaluated.""" - def __init__(self, dict_cls=None, custom_functions=None): - #: The class to use when creating a dict. The interpreter - # may create dictionaries during the evaluation of a JMESPath - # expression. For example, a multi-select hash will - # create a dictionary. By default we use a dict() type. - # You can set this value to change what dict type is used. - # The most common reason you would change this is if you - # want to set a collections.OrderedDict so that you can - # have predictable key ordering. - self.dict_cls = dict_cls - self.custom_functions = custom_functions - - -class _Expression(object): - def __init__(self, expression, interpreter): - self.expression = expression - self.interpreter = interpreter - - def visit(self, node, *args, **kwargs): - return self.interpreter.visit(node, *args, **kwargs) - - -class Visitor(object): - def __init__(self): - self._method_cache = {} - - def visit(self, node, *args, **kwargs): - node_type = node['type'] - method = self._method_cache.get(node_type) - if method is None: - method = getattr( - self, 'visit_%s' % node['type'], self.default_visit) - self._method_cache[node_type] = method - return method(node, *args, **kwargs) - - def default_visit(self, node, *args, **kwargs): - raise NotImplementedError("default_visit") - - -class TreeInterpreter(Visitor): - COMPARATOR_FUNC = { - 'eq': _equals, - 'ne': lambda x, y: not _equals(x, y), - 'lt': operator.lt, - 'gt': operator.gt, - 'lte': operator.le, - 'gte': operator.ge - } - _EQUALITY_OPS = ['eq', 'ne'] - MAP_TYPE = dict - - def __init__(self, options=None): - super(TreeInterpreter, self).__init__() - self._dict_cls = self.MAP_TYPE - if options is None: - options = Options() - self._options = options - if options.dict_cls is not None: - self._dict_cls = self._options.dict_cls - if options.custom_functions is not None: - self._functions = self._options.custom_functions - else: - self._functions = functions.Functions() - - def default_visit(self, node, *args, **kwargs): - raise NotImplementedError(node['type']) - - def visit_subexpression(self, node, value): - result = value - for node in node['children']: - result = self.visit(node, result) - return result - - def visit_field(self, node, value): - try: - return value.get(node['value']) - except AttributeError: - return None - - def visit_comparator(self, node, value): - # Common case: comparator is == or != - comparator_func = self.COMPARATOR_FUNC[node['value']] - if node['value'] in self._EQUALITY_OPS: - return comparator_func( - self.visit(node['children'][0], value), - self.visit(node['children'][1], value) - ) - else: - # Ordering operators are only valid for numbers. - # Evaluating any other type with a comparison operator - # will yield a None value. - left = self.visit(node['children'][0], value) - right = self.visit(node['children'][1], value) - num_types = (int, float) - if not (_is_comparable(left) and - _is_comparable(right)): - return None - return comparator_func(left, right) - - def visit_current(self, node, value): - return value - - def visit_expref(self, node, value): - return _Expression(node['children'][0], self) - - def visit_function_expression(self, node, value): - resolved_args = [] - for child in node['children']: - current = self.visit(child, value) - resolved_args.append(current) - return self._functions.call_function(node['value'], resolved_args) - - def visit_filter_projection(self, node, value): - base = self.visit(node['children'][0], value) - if not isinstance(base, list): - return None - comparator_node = node['children'][2] - collected = [] - for element in base: - if self._is_true(self.visit(comparator_node, element)): - current = self.visit(node['children'][1], element) - if current is not None: - collected.append(current) - return collected - - def visit_flatten(self, node, value): - base = self.visit(node['children'][0], value) - if not isinstance(base, list): - # Can't flatten the object if it's not a list. - return None - merged_list = [] - for element in base: - if isinstance(element, list): - merged_list.extend(element) - else: - merged_list.append(element) - return merged_list - - def visit_identity(self, node, value): - return value - - def visit_index(self, node, value): - # Even though we can index strings, we don't - # want to support that. - if not isinstance(value, list): - return None - try: - return value[node['value']] - except IndexError: - return None - - def visit_index_expression(self, node, value): - result = value - for node in node['children']: - result = self.visit(node, result) - return result - - def visit_slice(self, node, value): - if not isinstance(value, list): - return None - s = slice(*node['children']) - return value[s] - - def visit_key_val_pair(self, node, value): - return self.visit(node['children'][0], value) - - def visit_literal(self, node, value): - return node['value'] - - def visit_multi_select_dict(self, node, value): - if value is None: - return None - collected = self._dict_cls() - for child in node['children']: - collected[child['value']] = self.visit(child, value) - return collected - - def visit_multi_select_list(self, node, value): - if value is None: - return None - collected = [] - for child in node['children']: - collected.append(self.visit(child, value)) - return collected - - def visit_or_expression(self, node, value): - matched = self.visit(node['children'][0], value) - if self._is_false(matched): - matched = self.visit(node['children'][1], value) - return matched - - def visit_and_expression(self, node, value): - matched = self.visit(node['children'][0], value) - if self._is_false(matched): - return matched - return self.visit(node['children'][1], value) - - def visit_not_expression(self, node, value): - original_result = self.visit(node['children'][0], value) + return x is True or x is False + + +def _is_comparable(x): + # The spec doesn't officially support string types yet, + # but enough people are relying on this behavior that + # it's been added back. This should eventually become + # part of the official spec. + return _is_actual_number(x) or isinstance(x, string_type) + + +def _is_actual_number(x): + # We need to handle python's quirkiness with booleans, + # specifically: + # + # >>> isinstance(False, int) + # True + # >>> isinstance(True, int) + # True + if x is True or x is False: + return False + return isinstance(x, Number) + + +class Options(object): + """Options to control how a JMESPath function is evaluated.""" + def __init__(self, dict_cls=None, custom_functions=None): + #: The class to use when creating a dict. The interpreter + # may create dictionaries during the evaluation of a JMESPath + # expression. For example, a multi-select hash will + # create a dictionary. By default we use a dict() type. + # You can set this value to change what dict type is used. + # The most common reason you would change this is if you + # want to set a collections.OrderedDict so that you can + # have predictable key ordering. + self.dict_cls = dict_cls + self.custom_functions = custom_functions + + +class _Expression(object): + def __init__(self, expression, interpreter): + self.expression = expression + self.interpreter = interpreter + + def visit(self, node, *args, **kwargs): + return self.interpreter.visit(node, *args, **kwargs) + + +class Visitor(object): + def __init__(self): + self._method_cache = {} + + def visit(self, node, *args, **kwargs): + node_type = node['type'] + method = self._method_cache.get(node_type) + if method is None: + method = getattr( + self, 'visit_%s' % node['type'], self.default_visit) + self._method_cache[node_type] = method + return method(node, *args, **kwargs) + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError("default_visit") + + +class TreeInterpreter(Visitor): + COMPARATOR_FUNC = { + 'eq': _equals, + 'ne': lambda x, y: not _equals(x, y), + 'lt': operator.lt, + 'gt': operator.gt, + 'lte': operator.le, + 'gte': operator.ge + } + _EQUALITY_OPS = ['eq', 'ne'] + MAP_TYPE = dict + + def __init__(self, options=None): + super(TreeInterpreter, self).__init__() + self._dict_cls = self.MAP_TYPE + if options is None: + options = Options() + self._options = options + if options.dict_cls is not None: + self._dict_cls = self._options.dict_cls + if options.custom_functions is not None: + self._functions = self._options.custom_functions + else: + self._functions = functions.Functions() + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError(node['type']) + + def visit_subexpression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_field(self, node, value): + try: + return value.get(node['value']) + except AttributeError: + return None + + def visit_comparator(self, node, value): + # Common case: comparator is == or != + comparator_func = self.COMPARATOR_FUNC[node['value']] + if node['value'] in self._EQUALITY_OPS: + return comparator_func( + self.visit(node['children'][0], value), + self.visit(node['children'][1], value) + ) + else: + # Ordering operators are only valid for numbers. + # Evaluating any other type with a comparison operator + # will yield a None value. + left = self.visit(node['children'][0], value) + right = self.visit(node['children'][1], value) + num_types = (int, float) + if not (_is_comparable(left) and + _is_comparable(right)): + return None + return comparator_func(left, right) + + def visit_current(self, node, value): + return value + + def visit_expref(self, node, value): + return _Expression(node['children'][0], self) + + def visit_function_expression(self, node, value): + resolved_args = [] + for child in node['children']: + current = self.visit(child, value) + resolved_args.append(current) + return self._functions.call_function(node['value'], resolved_args) + + def visit_filter_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + comparator_node = node['children'][2] + collected = [] + for element in base: + if self._is_true(self.visit(comparator_node, element)): + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_flatten(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + # Can't flatten the object if it's not a list. + return None + merged_list = [] + for element in base: + if isinstance(element, list): + merged_list.extend(element) + else: + merged_list.append(element) + return merged_list + + def visit_identity(self, node, value): + return value + + def visit_index(self, node, value): + # Even though we can index strings, we don't + # want to support that. + if not isinstance(value, list): + return None + try: + return value[node['value']] + except IndexError: + return None + + def visit_index_expression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_slice(self, node, value): + if not isinstance(value, list): + return None + s = slice(*node['children']) + return value[s] + + def visit_key_val_pair(self, node, value): + return self.visit(node['children'][0], value) + + def visit_literal(self, node, value): + return node['value'] + + def visit_multi_select_dict(self, node, value): + if value is None: + return None + collected = self._dict_cls() + for child in node['children']: + collected[child['value']] = self.visit(child, value) + return collected + + def visit_multi_select_list(self, node, value): + if value is None: + return None + collected = [] + for child in node['children']: + collected.append(self.visit(child, value)) + return collected + + def visit_or_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + matched = self.visit(node['children'][1], value) + return matched + + def visit_and_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + return matched + return self.visit(node['children'][1], value) + + def visit_not_expression(self, node, value): + original_result = self.visit(node['children'][0], value) if type(original_result) is int and original_result == 0: - # Special case for 0, !0 should be false, not true. - # 0 is not a special cased integer in jmespath. - return False - return not original_result - - def visit_pipe(self, node, value): - result = value - for node in node['children']: - result = self.visit(node, result) - return result - - def visit_projection(self, node, value): - base = self.visit(node['children'][0], value) - if not isinstance(base, list): - return None - collected = [] - for element in base: - current = self.visit(node['children'][1], element) - if current is not None: - collected.append(current) - return collected - - def visit_value_projection(self, node, value): - base = self.visit(node['children'][0], value) - try: - base = base.values() - except AttributeError: - return None - collected = [] - for element in base: - current = self.visit(node['children'][1], element) - if current is not None: - collected.append(current) - return collected - - def _is_false(self, value): - # This looks weird, but we're explicitly using equality checks - # because the truth/false values are different between - # python and jmespath. - return (value == '' or value == [] or value == {} or value is None or - value is False) - - def _is_true(self, value): - return not self._is_false(value) - - -class GraphvizVisitor(Visitor): - def __init__(self): - super(GraphvizVisitor, self).__init__() - self._lines = [] - self._count = 1 - - def visit(self, node, *args, **kwargs): - self._lines.append('digraph AST {') - current = '%s%s' % (node['type'], self._count) - self._count += 1 - self._visit(node, current) - self._lines.append('}') - return '\n'.join(self._lines) - - def _visit(self, node, current): - self._lines.append('%s [label="%s(%s)"]' % ( - current, node['type'], node.get('value', ''))) - for child in node.get('children', []): - child_name = '%s%s' % (child['type'], self._count) - self._count += 1 - self._lines.append(' %s -> %s' % (current, child_name)) - self._visit(child, child_name) + # Special case for 0, !0 should be false, not true. + # 0 is not a special cased integer in jmespath. + return False + return not original_result + + def visit_pipe(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_value_projection(self, node, value): + base = self.visit(node['children'][0], value) + try: + base = base.values() + except AttributeError: + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def _is_false(self, value): + # This looks weird, but we're explicitly using equality checks + # because the truth/false values are different between + # python and jmespath. + return (value == '' or value == [] or value == {} or value is None or + value is False) + + def _is_true(self, value): + return not self._is_false(value) + + +class GraphvizVisitor(Visitor): + def __init__(self): + super(GraphvizVisitor, self).__init__() + self._lines = [] + self._count = 1 + + def visit(self, node, *args, **kwargs): + self._lines.append('digraph AST {') + current = '%s%s' % (node['type'], self._count) + self._count += 1 + self._visit(node, current) + self._lines.append('}') + return '\n'.join(self._lines) + + def _visit(self, node, current): + self._lines.append('%s [label="%s(%s)"]' % ( + current, node['type'], node.get('value', ''))) + for child in node.get('children', []): + child_name = '%s%s' % (child['type'], self._count) + self._count += 1 + self._lines.append(' %s -> %s' % (current, child_name)) + self._visit(child, child_name) diff --git a/contrib/python/jmespath/ya.make b/contrib/python/jmespath/ya.make index c4d88682062..1ffbd236bc5 100644 --- a/contrib/python/jmespath/ya.make +++ b/contrib/python/jmespath/ya.make @@ -1,32 +1,32 @@ PY23_LIBRARY() - + OWNER(g:python-contrib) - + VERSION(0.10.0) - + LICENSE(MIT) NO_LINT() -PY_SRCS( - TOP_LEVEL +PY_SRCS( + TOP_LEVEL jmespath/__init__.py jmespath/ast.py jmespath/compat.py - jmespath/exceptions.py - jmespath/functions.py - jmespath/lexer.py - jmespath/parser.py + jmespath/exceptions.py + jmespath/functions.py + jmespath/lexer.py + jmespath/parser.py jmespath/visitor.py -) - +) + RESOURCE_FILES( PREFIX contrib/python/jmespath/ .dist-info/METADATA .dist-info/top_level.txt ) -END() +END() RECURSE_FOR_TESTS( tests |
