diff options
| author | robot-contrib <[email protected]> | 2022-05-18 00:43:36 +0300 |
|---|---|---|
| committer | robot-contrib <[email protected]> | 2022-05-18 00:43:36 +0300 |
| commit | 9e5f436a8b2a27bcc7802e443ea3ef3e41a82a75 (patch) | |
| tree | 78b522cab9f76336e62064d4d8ff7c897659b20e /contrib/python/pure-eval | |
| parent | 8113a823ffca6451bb5ff8f0334560885a939a24 (diff) | |
Update contrib/python/ipython/py3 to 8.3.0
ref:e84342d4d30476f9148137f37fd0c6405fd36f55
Diffstat (limited to 'contrib/python/pure-eval')
| -rw-r--r-- | contrib/python/pure-eval/.dist-info/METADATA | 229 | ||||
| -rw-r--r-- | contrib/python/pure-eval/.dist-info/top_level.txt | 1 | ||||
| -rw-r--r-- | contrib/python/pure-eval/LICENSE.txt | 21 | ||||
| -rw-r--r-- | contrib/python/pure-eval/README.md | 204 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/__init__.py | 8 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/core.py | 449 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/my_getattr_static.py | 138 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/py.typed | 1 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/utils.py | 201 | ||||
| -rw-r--r-- | contrib/python/pure-eval/pure_eval/version.py | 1 |
10 files changed, 1253 insertions, 0 deletions
diff --git a/contrib/python/pure-eval/.dist-info/METADATA b/contrib/python/pure-eval/.dist-info/METADATA new file mode 100644 index 00000000000..931f69c3484 --- /dev/null +++ b/contrib/python/pure-eval/.dist-info/METADATA @@ -0,0 +1,229 @@ +Metadata-Version: 2.1 +Name: pure-eval +Version: 0.2.2 +Summary: Safely evaluate AST nodes without side effects +Home-page: http://github.com/alexmojaki/pure_eval +Author: Alex Hall +Author-email: [email protected] +License: MIT +Platform: UNKNOWN +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Description-Content-Type: text/markdown +License-File: LICENSE.txt +Provides-Extra: tests +Requires-Dist: pytest ; extra == 'tests' + +# `pure_eval` + +[](https://travis-ci.org/alexmojaki/pure_eval) [](https://coveralls.io/github/alexmojaki/pure_eval?branch=master) [](https://pypi.python.org/pypi/pure_eval) + +This is a Python package that lets you safely evaluate certain AST nodes without triggering arbitrary code that may have unwanted side effects. + +It can be installed from PyPI: + + pip install pure_eval + +To demonstrate usage, suppose we have an object defined as follows: + +```python +class Rectangle: + def __init__(self, width, height): + self.width = width + self.height = height + + @property + def area(self): + print("Calculating area...") + return self.width * self.height + + +rect = Rectangle(3, 5) +``` + +Given the `rect` object, we want to evaluate whatever expressions we can in this source code: + +```python +source = "(rect.width, rect.height, rect.area)" +``` + +This library works with the AST, so let's parse the source code and peek inside: + +```python +import ast + +tree = ast.parse(source) +the_tuple = tree.body[0].value +for node in the_tuple.elts: + print(ast.dump(node)) +``` + +Output: + +```python +Attribute(value=Name(id='rect', ctx=Load()), attr='width', ctx=Load()) +Attribute(value=Name(id='rect', ctx=Load()), attr='height', ctx=Load()) +Attribute(value=Name(id='rect', ctx=Load()), attr='area', ctx=Load()) +``` + +Now to actually use the library. First construct an Evaluator: + +```python +from pure_eval import Evaluator + +evaluator = Evaluator({"rect": rect}) +``` + +The argument to `Evaluator` should be a mapping from variable names to their values. Or if you have access to the stack frame where `rect` is defined, you can instead use: + +```python +evaluator = Evaluator.from_frame(frame) +``` + +Now to evaluate some nodes, using `evaluator[node]`: + +```python +print("rect.width:", evaluator[the_tuple.elts[0]]) +print("rect:", evaluator[the_tuple.elts[0].value]) +``` + +Output: + +``` +rect.width: 3 +rect: <__main__.Rectangle object at 0x105b0dd30> +``` + +OK, but you could have done the same thing with `eval`. The useful part is that it will refuse to evaluate the property `rect.area` because that would trigger unknown code. If we try, it'll raise a `CannotEval` exception. + +```python +from pure_eval import CannotEval + +try: + print("rect.area:", evaluator[the_tuple.elts[2]]) # fails +except CannotEval as e: + print(e) # prints CannotEval +``` + +To find all the expressions that can be evaluated in a tree: + +```python +for node, value in evaluator.find_expressions(tree): + print(ast.dump(node), value) +``` + +Output: + +```python +Attribute(value=Name(id='rect', ctx=Load()), attr='width', ctx=Load()) 3 +Attribute(value=Name(id='rect', ctx=Load()), attr='height', ctx=Load()) 5 +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +``` + +Note that this includes `rect` three times, once for each appearance in the source code. Since all these nodes are equivalent, we can group them together: + +```python +from pure_eval import group_expressions + +for nodes, values in group_expressions(evaluator.find_expressions(tree)): + print(len(nodes), "nodes with value:", values) +``` + +Output: + +``` +1 nodes with value: 3 +1 nodes with value: 5 +3 nodes with value: <__main__.Rectangle object at 0x10d374d30> +``` + +If we want to list all the expressions in a tree, we may want to filter out certain expressions whose values are obvious. For example, suppose we have a function `foo`: + +```python +def foo(): + pass +``` + +If we refer to `foo` by its name as usual, then that's not interesting: + +```python +from pure_eval import is_expression_interesting + +node = ast.parse('foo').body[0].value +print(ast.dump(node)) +print(is_expression_interesting(node, foo)) +``` + +Output: + +```python +Name(id='foo', ctx=Load()) +False +``` + +But if we refer to it by a different name, then it's interesting: + +```python +node = ast.parse('bar').body[0].value +print(ast.dump(node)) +print(is_expression_interesting(node, foo)) +``` + +Output: + +```python +Name(id='bar', ctx=Load()) +True +``` + +In general `is_expression_interesting` returns False for the following values: +- Literals (e.g. `123`, `'abc'`, `[1, 2, 3]`, `{'a': (), 'b': ([1, 2], [3])}`) +- Variables or attributes whose name is equal to the value's `__name__`, such as `foo` above or `self.foo` if it was a method. +- Builtins (e.g. `len`) referred to by their usual name. + +To make things easier, you can combine finding expressions, grouping them, and filtering out the obvious ones with: + +```python +evaluator.interesting_expressions_grouped(root) +``` + +To get the source code of an AST node, I recommend [asttokens](https://github.com/gristlabs/asttokens). + +Here's a complete example that brings it all together: + +```python +from asttokens import ASTTokens +from pure_eval import Evaluator + +source = """ +x = 1 +d = {x: 2} +y = d[x] +""" + +names = {} +exec(source, names) +atok = ASTTokens(source, parse=True) +for nodes, value in Evaluator(names).interesting_expressions_grouped(atok.tree): + print(atok.get_text(nodes[0]), "=", value) +``` + +Output: + +```python +x = 1 +d = {1: 2} +y = 2 +d[x] = 2 +``` + + diff --git a/contrib/python/pure-eval/.dist-info/top_level.txt b/contrib/python/pure-eval/.dist-info/top_level.txt new file mode 100644 index 00000000000..e50c81f634f --- /dev/null +++ b/contrib/python/pure-eval/.dist-info/top_level.txt @@ -0,0 +1 @@ +pure_eval diff --git a/contrib/python/pure-eval/LICENSE.txt b/contrib/python/pure-eval/LICENSE.txt new file mode 100644 index 00000000000..473e36e246e --- /dev/null +++ b/contrib/python/pure-eval/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Alex Hall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/contrib/python/pure-eval/README.md b/contrib/python/pure-eval/README.md new file mode 100644 index 00000000000..a4edbfc0a57 --- /dev/null +++ b/contrib/python/pure-eval/README.md @@ -0,0 +1,204 @@ +# `pure_eval` + +[](https://travis-ci.org/alexmojaki/pure_eval) [](https://coveralls.io/github/alexmojaki/pure_eval?branch=master) [](https://pypi.python.org/pypi/pure_eval) + +This is a Python package that lets you safely evaluate certain AST nodes without triggering arbitrary code that may have unwanted side effects. + +It can be installed from PyPI: + + pip install pure_eval + +To demonstrate usage, suppose we have an object defined as follows: + +```python +class Rectangle: + def __init__(self, width, height): + self.width = width + self.height = height + + @property + def area(self): + print("Calculating area...") + return self.width * self.height + + +rect = Rectangle(3, 5) +``` + +Given the `rect` object, we want to evaluate whatever expressions we can in this source code: + +```python +source = "(rect.width, rect.height, rect.area)" +``` + +This library works with the AST, so let's parse the source code and peek inside: + +```python +import ast + +tree = ast.parse(source) +the_tuple = tree.body[0].value +for node in the_tuple.elts: + print(ast.dump(node)) +``` + +Output: + +```python +Attribute(value=Name(id='rect', ctx=Load()), attr='width', ctx=Load()) +Attribute(value=Name(id='rect', ctx=Load()), attr='height', ctx=Load()) +Attribute(value=Name(id='rect', ctx=Load()), attr='area', ctx=Load()) +``` + +Now to actually use the library. First construct an Evaluator: + +```python +from pure_eval import Evaluator + +evaluator = Evaluator({"rect": rect}) +``` + +The argument to `Evaluator` should be a mapping from variable names to their values. Or if you have access to the stack frame where `rect` is defined, you can instead use: + +```python +evaluator = Evaluator.from_frame(frame) +``` + +Now to evaluate some nodes, using `evaluator[node]`: + +```python +print("rect.width:", evaluator[the_tuple.elts[0]]) +print("rect:", evaluator[the_tuple.elts[0].value]) +``` + +Output: + +``` +rect.width: 3 +rect: <__main__.Rectangle object at 0x105b0dd30> +``` + +OK, but you could have done the same thing with `eval`. The useful part is that it will refuse to evaluate the property `rect.area` because that would trigger unknown code. If we try, it'll raise a `CannotEval` exception. + +```python +from pure_eval import CannotEval + +try: + print("rect.area:", evaluator[the_tuple.elts[2]]) # fails +except CannotEval as e: + print(e) # prints CannotEval +``` + +To find all the expressions that can be evaluated in a tree: + +```python +for node, value in evaluator.find_expressions(tree): + print(ast.dump(node), value) +``` + +Output: + +```python +Attribute(value=Name(id='rect', ctx=Load()), attr='width', ctx=Load()) 3 +Attribute(value=Name(id='rect', ctx=Load()), attr='height', ctx=Load()) 5 +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +Name(id='rect', ctx=Load()) <__main__.Rectangle object at 0x105568d30> +``` + +Note that this includes `rect` three times, once for each appearance in the source code. Since all these nodes are equivalent, we can group them together: + +```python +from pure_eval import group_expressions + +for nodes, values in group_expressions(evaluator.find_expressions(tree)): + print(len(nodes), "nodes with value:", values) +``` + +Output: + +``` +1 nodes with value: 3 +1 nodes with value: 5 +3 nodes with value: <__main__.Rectangle object at 0x10d374d30> +``` + +If we want to list all the expressions in a tree, we may want to filter out certain expressions whose values are obvious. For example, suppose we have a function `foo`: + +```python +def foo(): + pass +``` + +If we refer to `foo` by its name as usual, then that's not interesting: + +```python +from pure_eval import is_expression_interesting + +node = ast.parse('foo').body[0].value +print(ast.dump(node)) +print(is_expression_interesting(node, foo)) +``` + +Output: + +```python +Name(id='foo', ctx=Load()) +False +``` + +But if we refer to it by a different name, then it's interesting: + +```python +node = ast.parse('bar').body[0].value +print(ast.dump(node)) +print(is_expression_interesting(node, foo)) +``` + +Output: + +```python +Name(id='bar', ctx=Load()) +True +``` + +In general `is_expression_interesting` returns False for the following values: +- Literals (e.g. `123`, `'abc'`, `[1, 2, 3]`, `{'a': (), 'b': ([1, 2], [3])}`) +- Variables or attributes whose name is equal to the value's `__name__`, such as `foo` above or `self.foo` if it was a method. +- Builtins (e.g. `len`) referred to by their usual name. + +To make things easier, you can combine finding expressions, grouping them, and filtering out the obvious ones with: + +```python +evaluator.interesting_expressions_grouped(root) +``` + +To get the source code of an AST node, I recommend [asttokens](https://github.com/gristlabs/asttokens). + +Here's a complete example that brings it all together: + +```python +from asttokens import ASTTokens +from pure_eval import Evaluator + +source = """ +x = 1 +d = {x: 2} +y = d[x] +""" + +names = {} +exec(source, names) +atok = ASTTokens(source, parse=True) +for nodes, value in Evaluator(names).interesting_expressions_grouped(atok.tree): + print(atok.get_text(nodes[0]), "=", value) +``` + +Output: + +```python +x = 1 +d = {1: 2} +y = 2 +d[x] = 2 +``` diff --git a/contrib/python/pure-eval/pure_eval/__init__.py b/contrib/python/pure-eval/pure_eval/__init__.py new file mode 100644 index 00000000000..0040e318a6e --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/__init__.py @@ -0,0 +1,8 @@ +from .core import Evaluator, CannotEval, group_expressions, is_expression_interesting +from .my_getattr_static import getattr_static + +try: + from .version import __version__ +except ImportError: + # version.py is auto-generated with the git tag when building + __version__ = "???" diff --git a/contrib/python/pure-eval/pure_eval/core.py b/contrib/python/pure-eval/pure_eval/core.py new file mode 100644 index 00000000000..748f0518d4f --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/core.py @@ -0,0 +1,449 @@ +import ast +import builtins +import operator +from collections import ChainMap, OrderedDict, deque +from contextlib import suppress +from types import FrameType +from typing import Any, Tuple, Iterable, List, Mapping, Dict, Union, Set + +from pure_eval.my_getattr_static import getattr_static +from pure_eval.utils import ( + CannotEval, + has_ast_name, + copy_ast_without_context, + is_standard_types, + of_standard_types, + is_any, + of_type, + ensure_dict, +) + + +class Evaluator: + def __init__(self, names: Mapping[str, Any]): + """ + Construct a new evaluator with the given variable names. + This is a low level API, typically you will use `Evaluator.from_frame(frame)`. + + :param names: a mapping from variable names to their values. + """ + + self.names = names + self._cache = {} # type: Dict[ast.expr, Any] + + @classmethod + def from_frame(cls, frame: FrameType) -> 'Evaluator': + """ + Construct an Evaluator that can look up variables from the given frame. + + :param frame: a frame object, e.g. from a traceback or `inspect.currentframe().f_back`. + """ + + return cls(ChainMap( + ensure_dict(frame.f_locals), + ensure_dict(frame.f_globals), + ensure_dict(frame.f_builtins), + )) + + def __getitem__(self, node: ast.expr) -> Any: + """ + Find the value of the given node. + If it cannot be evaluated safely, this raises `CannotEval`. + The result is cached either way. + + :param node: an AST expression to evaluate + :return: the value of the node + """ + + if not isinstance(node, ast.expr): + raise TypeError("node should be an ast.expr, not {!r}".format(type(node).__name__)) + + with suppress(KeyError): + result = self._cache[node] + if result is CannotEval: + raise CannotEval + else: + return result + + try: + self._cache[node] = result = self._handle(node) + return result + except CannotEval: + self._cache[node] = CannotEval + raise + + def _handle(self, node: ast.expr) -> Any: + """ + This is where the evaluation happens. + Users should use `__getitem__`, i.e. `evaluator[node]`, + as it provides caching. + + :param node: an AST expression to evaluate + :return: the value of the node + """ + + with suppress(Exception): + return ast.literal_eval(node) + + if isinstance(node, ast.Name): + try: + return self.names[node.id] + except KeyError: + raise CannotEval + elif isinstance(node, ast.Attribute): + value = self[node.value] + attr = node.attr + return getattr_static(value, attr) + elif isinstance(node, ast.Subscript): + return self._handle_subscript(node) + elif isinstance(node, (ast.List, ast.Tuple, ast.Set, ast.Dict)): + return self._handle_container(node) + elif isinstance(node, ast.UnaryOp): + return self._handle_unary(node) + elif isinstance(node, ast.BinOp): + return self._handle_binop(node) + elif isinstance(node, ast.BoolOp): + return self._handle_boolop(node) + elif isinstance(node, ast.Compare): + return self._handle_compare(node) + elif isinstance(node, ast.Call): + return self._handle_call(node) + raise CannotEval + + def _handle_call(self, node): + if node.keywords: + raise CannotEval + func = self[node.func] + args = [self[arg] for arg in node.args] + + if ( + is_any( + func, + slice, + int, + range, + round, + complex, + list, + tuple, + abs, + hex, + bin, + oct, + bool, + ord, + float, + len, + chr, + ) + or len(args) == 0 + and is_any(func, set, dict, str, frozenset, bytes, bytearray, object) + or len(args) >= 2 + and is_any(func, str, divmod, bytes, bytearray, pow) + ): + args = [ + of_standard_types(arg, check_dict_values=False, deep=False) + for arg in args + ] + try: + return func(*args) + except Exception as e: + raise CannotEval from e + + if len(args) == 1: + arg = args[0] + if is_any(func, id, type): + try: + return func(arg) + except Exception as e: + raise CannotEval from e + if is_any(func, all, any, sum): + of_type(arg, tuple, frozenset, list, set, dict, OrderedDict, deque) + for x in arg: + of_standard_types(x, check_dict_values=False, deep=False) + try: + return func(arg) + except Exception as e: + raise CannotEval from e + + if is_any( + func, sorted, min, max, hash, set, dict, ascii, str, repr, frozenset + ): + of_standard_types(arg, check_dict_values=True, deep=True) + try: + return func(arg) + except Exception as e: + raise CannotEval from e + raise CannotEval + + def _handle_compare(self, node): + left = self[node.left] + result = True + + for op, right in zip(node.ops, node.comparators): + right = self[right] + + op_type = type(op) + op_func = { + ast.Eq: operator.eq, + ast.NotEq: operator.ne, + ast.Lt: operator.lt, + ast.LtE: operator.le, + ast.Gt: operator.gt, + ast.GtE: operator.ge, + ast.Is: operator.is_, + ast.IsNot: operator.is_not, + ast.In: (lambda a, b: a in b), + ast.NotIn: (lambda a, b: a not in b), + }[op_type] + + if op_type not in (ast.Is, ast.IsNot): + of_standard_types(left, check_dict_values=False, deep=True) + of_standard_types(right, check_dict_values=False, deep=True) + + try: + result = op_func(left, right) + except Exception as e: + raise CannotEval from e + if not result: + return result + left = right + + return result + + def _handle_boolop(self, node): + left = of_standard_types( + self[node.values[0]], check_dict_values=False, deep=False + ) + + for right in node.values[1:]: + # We need short circuiting so that the whole operation can be evaluated + # even if the right operand can't + if isinstance(node.op, ast.Or): + left = left or of_standard_types( + self[right], check_dict_values=False, deep=False + ) + else: + assert isinstance(node.op, ast.And) + left = left and of_standard_types( + self[right], check_dict_values=False, deep=False + ) + return left + + def _handle_binop(self, node): + op_type = type(node.op) + op = { + ast.Add: operator.add, + ast.Sub: operator.sub, + ast.Mult: operator.mul, + ast.Div: operator.truediv, + ast.FloorDiv: operator.floordiv, + ast.Mod: operator.mod, + ast.Pow: operator.pow, + ast.LShift: operator.lshift, + ast.RShift: operator.rshift, + ast.BitOr: operator.or_, + ast.BitXor: operator.xor, + ast.BitAnd: operator.and_, + }.get(op_type) + if not op: + raise CannotEval + left = self[node.left] + hash_type = is_any(type(left), set, frozenset, dict, OrderedDict) + left = of_standard_types(left, check_dict_values=False, deep=hash_type) + formatting = type(left) in (str, bytes) and op_type == ast.Mod + + right = of_standard_types( + self[node.right], + check_dict_values=formatting, + deep=formatting or hash_type, + ) + try: + return op(left, right) + except Exception as e: + raise CannotEval from e + + def _handle_unary(self, node: ast.UnaryOp): + value = of_standard_types( + self[node.operand], check_dict_values=False, deep=False + ) + op_type = type(node.op) + op = { + ast.USub: operator.neg, + ast.UAdd: operator.pos, + ast.Not: operator.not_, + ast.Invert: operator.invert, + }[op_type] + try: + return op(value) + except Exception as e: + raise CannotEval from e + + def _handle_subscript(self, node): + value = self[node.value] + of_standard_types( + value, check_dict_values=False, deep=is_any(type(value), dict, OrderedDict) + ) + index = node.slice + if isinstance(index, ast.Slice): + index = slice( + *[ + None if p is None else self[p] + for p in [index.lower, index.upper, index.step] + ] + ) + elif isinstance(index, ast.ExtSlice): + raise CannotEval + else: + if isinstance(index, ast.Index): + index = index.value + index = self[index] + of_standard_types(index, check_dict_values=False, deep=True) + + try: + return value[index] + except Exception: + raise CannotEval + + def _handle_container( + self, + node: Union[ast.List, ast.Tuple, ast.Set, ast.Dict] + ) -> Union[List, Tuple, Set, Dict]: + """Handle container nodes, including List, Set, Tuple and Dict""" + if isinstance(node, ast.Dict): + elts = node.keys + if None in elts: # ** unpacking inside {}, not yet supported + raise CannotEval + else: + elts = node.elts + elts = [self[elt] for elt in elts] + if isinstance(node, ast.List): + return elts + if isinstance(node, ast.Tuple): + return tuple(elts) + + # Set and Dict + if not all( + is_standard_types(elt, check_dict_values=False, deep=True) for elt in elts + ): + raise CannotEval + + if isinstance(node, ast.Set): + try: + return set(elts) + except TypeError: + raise CannotEval + + assert isinstance(node, ast.Dict) + + pairs = [(elt, self[val]) for elt, val in zip(elts, node.values)] + try: + return dict(pairs) + except TypeError: + raise CannotEval + + def find_expressions(self, root: ast.AST) -> Iterable[Tuple[ast.expr, Any]]: + """ + Find all expressions in the given tree that can be safely evaluated. + This is a low level API, typically you will use `interesting_expressions_grouped`. + + :param root: any AST node + :return: generator of pairs (tuples) of expression nodes and their corresponding values. + """ + + for node in ast.walk(root): + if not isinstance(node, ast.expr): + continue + + try: + value = self[node] + except CannotEval: + continue + + yield node, value + + def interesting_expressions_grouped(self, root: ast.AST) -> List[Tuple[List[ast.expr], Any]]: + """ + Find all interesting expressions in the given tree that can be safely evaluated, + grouping equivalent nodes together. + + For more control and details, see: + - Evaluator.find_expressions + - is_expression_interesting + - group_expressions + + :param root: any AST node + :return: A list of pairs (tuples) containing: + - A list of equivalent AST expressions + - The value of the first expression node + (which should be the same for all nodes, unless threads are involved) + """ + + return group_expressions( + pair + for pair in self.find_expressions(root) + if is_expression_interesting(*pair) + ) + + +def is_expression_interesting(node: ast.expr, value: Any) -> bool: + """ + Determines if an expression is potentially interesting, at least in my opinion. + Returns False for the following expressions whose value is generally obvious: + - Literals (e.g. 123, 'abc', [1, 2, 3], {'a': (), 'b': ([1, 2], [3])}) + - Variables or attributes whose name is equal to the value's __name__. + For example, a function `def foo(): ...` is not interesting when referred to + as `foo` as it usually would, but `bar` can be interesting if `bar is foo`. + Similarly the method `self.foo` is not interesting. + - Builtins (e.g. `len`) referred to by their usual name. + + This is a low level API, typically you will use `interesting_expressions_grouped`. + + :param node: an AST expression + :param value: the value of the node + :return: a boolean: True if the expression is interesting, False otherwise + """ + + with suppress(ValueError): + ast.literal_eval(node) + return False + + # TODO exclude inner modules, e.g. numpy.random.__name__ == 'numpy.random' != 'random' + # TODO exclude common module abbreviations, e.g. numpy as np, pandas as pd + if has_ast_name(value, node): + return False + + if ( + isinstance(node, ast.Name) + and getattr(builtins, node.id, object()) is value + ): + return False + + return True + + +def group_expressions(expressions: Iterable[Tuple[ast.expr, Any]]) -> List[Tuple[List[ast.expr], Any]]: + """ + Organise expression nodes and their values such that equivalent nodes are together. + Two nodes are considered equivalent if they have the same structure, + ignoring context (Load, Store, or Delete) and location (lineno, col_offset). + For example, this will group together the same variable name mentioned multiple times in an expression. + + This will not check the values of the nodes. Equivalent nodes should have the same values, + unless threads are involved. + + This is a low level API, typically you will use `interesting_expressions_grouped`. + + :param expressions: pairs of AST expressions and their values, as obtained from + `Evaluator.find_expressions`, or `(node, evaluator[node])`. + :return: A list of pairs (tuples) containing: + - A list of equivalent AST expressions + - The value of the first expression node + (which should be the same for all nodes, unless threads are involved) + """ + + result = {} + for node, value in expressions: + dump = ast.dump(copy_ast_without_context(node)) + result.setdefault(dump, ([], value))[0].append(node) + return list(result.values()) diff --git a/contrib/python/pure-eval/pure_eval/my_getattr_static.py b/contrib/python/pure-eval/pure_eval/my_getattr_static.py new file mode 100644 index 00000000000..c750b1acc3f --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/my_getattr_static.py @@ -0,0 +1,138 @@ +import types + +from pure_eval.utils import of_type, CannotEval + +_sentinel = object() + + +def _static_getmro(klass): + return type.__dict__['__mro__'].__get__(klass) + + +def _check_instance(obj, attr): + instance_dict = {} + try: + instance_dict = object.__getattribute__(obj, "__dict__") + except AttributeError: + pass + return dict.get(instance_dict, attr, _sentinel) + + +def _check_class(klass, attr): + for entry in _static_getmro(klass): + if _shadowed_dict(type(entry)) is _sentinel: + try: + return entry.__dict__[attr] + except KeyError: + pass + else: + break + return _sentinel + + +def _is_type(obj): + try: + _static_getmro(obj) + except TypeError: + return False + return True + + +def _shadowed_dict(klass): + dict_attr = type.__dict__["__dict__"] + for entry in _static_getmro(klass): + try: + class_dict = dict_attr.__get__(entry)["__dict__"] + except KeyError: + pass + else: + if not (type(class_dict) is types.GetSetDescriptorType and + class_dict.__name__ == "__dict__" and + class_dict.__objclass__ is entry): + return class_dict + return _sentinel + + +def getattr_static(obj, attr): + """Retrieve attributes without triggering dynamic lookup via the + descriptor protocol, __getattr__ or __getattribute__. + + Note: this function may not be able to retrieve all attributes + that getattr can fetch (like dynamically created attributes) + and may find attributes that getattr can't (like descriptors + that raise AttributeError). It can also return descriptor objects + instead of instance members in some cases. See the + documentation for details. + """ + instance_result = _sentinel + if not _is_type(obj): + klass = type(obj) + dict_attr = _shadowed_dict(klass) + if (dict_attr is _sentinel or + type(dict_attr) is types.MemberDescriptorType): + instance_result = _check_instance(obj, attr) + else: + raise CannotEval + else: + klass = obj + + klass_result = _check_class(klass, attr) + + if instance_result is not _sentinel and klass_result is not _sentinel: + if (_check_class(type(klass_result), '__get__') is not _sentinel and + _check_class(type(klass_result), '__set__') is not _sentinel): + return _resolve_descriptor(klass_result, obj, klass) + + if instance_result is not _sentinel: + return instance_result + if klass_result is not _sentinel: + get = _check_class(type(klass_result), '__get__') + if get is _sentinel: + return klass_result + else: + if obj is klass: + instance = None + else: + instance = obj + return _resolve_descriptor(klass_result, instance, klass) + + if obj is klass: + # for types we check the metaclass too + for entry in _static_getmro(type(klass)): + if _shadowed_dict(type(entry)) is _sentinel: + try: + result = entry.__dict__[attr] + get = _check_class(type(result), '__get__') + if get is not _sentinel: + raise CannotEval + return result + except KeyError: + pass + raise CannotEval + + +class _foo: + __slots__ = ['foo'] + method = lambda: 0 + + +slot_descriptor = _foo.foo +wrapper_descriptor = str.__dict__['__add__'] +method_descriptor = str.__dict__['startswith'] +user_method_descriptor = _foo.__dict__['method'] + +safe_descriptors_raw = [ + slot_descriptor, + wrapper_descriptor, + method_descriptor, + user_method_descriptor, +] + +safe_descriptor_types = list(map(type, safe_descriptors_raw)) + + +def _resolve_descriptor(d, instance, owner): + try: + return type(of_type(d, *safe_descriptor_types)).__get__(d, instance, owner) + except AttributeError as e: + raise CannotEval from e diff --git a/contrib/python/pure-eval/pure_eval/py.typed b/contrib/python/pure-eval/pure_eval/py.typed new file mode 100644 index 00000000000..298c64a9041 --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. The pure_eval package uses inline types. diff --git a/contrib/python/pure-eval/pure_eval/utils.py b/contrib/python/pure-eval/pure_eval/utils.py new file mode 100644 index 00000000000..a8a37302daa --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/utils.py @@ -0,0 +1,201 @@ +from collections import OrderedDict, deque +from datetime import date, time, datetime +from decimal import Decimal +from fractions import Fraction +import ast +import enum +import typing + + +class CannotEval(Exception): + def __repr__(self): + return self.__class__.__name__ + + __str__ = __repr__ + + +def is_any(x, *args): + return any( + x is arg + for arg in args + ) + + +def of_type(x, *types): + if is_any(type(x), *types): + return x + else: + raise CannotEval + + +def of_standard_types(x, *, check_dict_values: bool, deep: bool): + if is_standard_types(x, check_dict_values=check_dict_values, deep=deep): + return x + else: + raise CannotEval + + +def is_standard_types(x, *, check_dict_values: bool, deep: bool): + try: + return _is_standard_types_deep(x, check_dict_values, deep)[0] + except RecursionError: + return False + + +def _is_standard_types_deep(x, check_dict_values: bool, deep: bool): + typ = type(x) + if is_any( + typ, + str, + int, + bool, + float, + bytes, + complex, + date, + time, + datetime, + Fraction, + Decimal, + type(None), + object, + ): + return True, 0 + + if is_any(typ, tuple, frozenset, list, set, dict, OrderedDict, deque, slice): + if typ in [slice]: + length = 0 + else: + length = len(x) + assert isinstance(deep, bool) + if not deep: + return True, length + + if check_dict_values and typ in (dict, OrderedDict): + items = (v for pair in x.items() for v in pair) + elif typ is slice: + items = [x.start, x.stop, x.step] + else: + items = x + for item in items: + if length > 100000: + return False, length + is_standard, item_length = _is_standard_types_deep( + item, check_dict_values, deep + ) + if not is_standard: + return False, length + length += item_length + return True, length + + return False, 0 + + +class _E(enum.Enum): + pass + + +class _C: + def foo(self): pass # pragma: nocover + + def bar(self): pass # pragma: nocover + + @classmethod + def cm(cls): pass # pragma: nocover + + @staticmethod + def sm(): pass # pragma: nocover + + +safe_name_samples = { + "len": len, + "append": list.append, + "__add__": list.__add__, + "insert": [].insert, + "__mul__": [].__mul__, + "fromkeys": dict.__dict__['fromkeys'], + "is_any": is_any, + "__repr__": CannotEval.__repr__, + "foo": _C().foo, + "bar": _C.bar, + "cm": _C.cm, + "sm": _C.sm, + "ast": ast, + "CannotEval": CannotEval, + "_E": _E, +} + +typing_annotation_samples = { + name: getattr(typing, name) + for name in "List Dict Tuple Set Callable Mapping".split() +} + +safe_name_types = tuple({ + type(f) + for f in safe_name_samples.values() +}) + + +typing_annotation_types = tuple({ + type(f) + for f in typing_annotation_samples.values() +}) + + +def eq_checking_types(a, b): + return type(a) is type(b) and a == b + + +def ast_name(node): + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Attribute): + return node.attr + else: + return None + + +def safe_name(value): + typ = type(value) + if is_any(typ, *safe_name_types): + return value.__name__ + elif value is typing.Optional: + return "Optional" + elif value is typing.Union: + return "Union" + elif is_any(typ, *typing_annotation_types): + return getattr(value, "__name__", None) or getattr(value, "_name", None) + else: + return None + + +def has_ast_name(value, node): + value_name = safe_name(value) + if type(value_name) is not str: + return False + return eq_checking_types(ast_name(node), value_name) + + +def copy_ast_without_context(x): + if isinstance(x, ast.AST): + kwargs = { + field: copy_ast_without_context(getattr(x, field)) + for field in x._fields + if field != 'ctx' + if hasattr(x, field) + } + return type(x)(**kwargs) + elif isinstance(x, list): + return list(map(copy_ast_without_context, x)) + else: + return x + + +def ensure_dict(x): + """ + Handles invalid non-dict inputs + """ + try: + return dict(x) + except Exception: + return {} diff --git a/contrib/python/pure-eval/pure_eval/version.py b/contrib/python/pure-eval/pure_eval/version.py new file mode 100644 index 00000000000..9dd16a34511 --- /dev/null +++ b/contrib/python/pure-eval/pure_eval/version.py @@ -0,0 +1 @@ +__version__ = '0.2.2'
\ No newline at end of file |
