diff options
author | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 12:29:46 +0300 |
---|---|---|
committer | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 13:14:22 +0300 |
commit | 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch) | |
tree | a8fb3181d5947c0d78cf402aa56e686130179049 /contrib/tools/python3/Lib/lib2to3 | |
parent | a44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff) | |
download | ydb-9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80.tar.gz |
publishFullContrib: true for ydb
<HIDDEN_URL>
commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/tools/python3/Lib/lib2to3')
73 files changed, 8391 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/lib2to3/__init__.py b/contrib/tools/python3/Lib/lib2to3/__init__.py new file mode 100644 index 0000000000..177405c809 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/__init__.py @@ -0,0 +1,8 @@ +import warnings + + +warnings.warn( + "lib2to3 package is deprecated and may not be able to parse Python 3.10+", + DeprecationWarning, + stacklevel=2, +) diff --git a/contrib/tools/python3/Lib/lib2to3/__main__.py b/contrib/tools/python3/Lib/lib2to3/__main__.py new file mode 100644 index 0000000000..80688baf27 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/__main__.py @@ -0,0 +1,4 @@ +import sys +from .main import main + +sys.exit(main("lib2to3.fixes")) diff --git a/contrib/tools/python3/Lib/lib2to3/btm_matcher.py b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py new file mode 100644 index 0000000000..3b78868038 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py @@ -0,0 +1,163 @@ +"""A bottom-up tree matching algorithm implementation meant to speed +up 2to3's matching process. After the tree patterns are reduced to +their rarest linear path, a linear Aho-Corasick automaton is +created. The linear automaton traverses the linear paths from the +leaves to the root of the AST and returns a set of nodes for further +matching. This reduces significantly the number of candidate nodes.""" + +__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" + +import logging +import itertools +from collections import defaultdict + +from . import pytree +from .btm_utils import reduce_tree + +class BMNode(object): + """Class for a node of the Aho-Corasick automaton used in matching""" + count = itertools.count() + def __init__(self): + self.transition_table = {} + self.fixers = [] + self.id = next(BMNode.count) + self.content = '' + +class BottomMatcher(object): + """The main matcher class. After instantiating the patterns should + be added using the add_fixer method""" + + def __init__(self): + self.match = set() + self.root = BMNode() + self.nodes = [self.root] + self.fixers = [] + self.logger = logging.getLogger("RefactoringTool") + + def add_fixer(self, fixer): + """Reduces a fixer's pattern tree to a linear path and adds it + to the matcher(a common Aho-Corasick automaton). The fixer is + appended on the matching states and called when they are + reached""" + self.fixers.append(fixer) + tree = reduce_tree(fixer.pattern_tree) + linear = tree.get_linear_subpattern() + match_nodes = self.add(linear, start=self.root) + for match_node in match_nodes: + match_node.fixers.append(fixer) + + def add(self, pattern, start): + "Recursively adds a linear pattern to the AC automaton" + #print("adding pattern", pattern, "to", start) + if not pattern: + #print("empty pattern") + return [start] + if isinstance(pattern[0], tuple): + #alternatives + #print("alternatives") + match_nodes = [] + for alternative in pattern[0]: + #add all alternatives, and add the rest of the pattern + #to each end node + end_nodes = self.add(alternative, start=start) + for end in end_nodes: + match_nodes.extend(self.add(pattern[1:], end)) + return match_nodes + else: + #single token + #not last + if pattern[0] not in start.transition_table: + #transition did not exist, create new + next_node = BMNode() + start.transition_table[pattern[0]] = next_node + else: + #transition exists already, follow + next_node = start.transition_table[pattern[0]] + + if pattern[1:]: + end_nodes = self.add(pattern[1:], start=next_node) + else: + end_nodes = [next_node] + return end_nodes + + def run(self, leaves): + """The main interface with the bottom matcher. The tree is + traversed from the bottom using the constructed + automaton. Nodes are only checked once as the tree is + retraversed. When the automaton fails, we give it one more + shot(in case the above tree matches as a whole with the + rejected leaf), then we break for the next leaf. There is the + special case of multiple arguments(see code comments) where we + recheck the nodes + + Args: + The leaves of the AST tree to be matched + + Returns: + A dictionary of node matches with fixers as the keys + """ + current_ac_node = self.root + results = defaultdict(list) + for leaf in leaves: + current_ast_node = leaf + while current_ast_node: + current_ast_node.was_checked = True + for child in current_ast_node.children: + # multiple statements, recheck + if isinstance(child, pytree.Leaf) and child.value == ";": + current_ast_node.was_checked = False + break + if current_ast_node.type == 1: + #name + node_token = current_ast_node.value + else: + node_token = current_ast_node.type + + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + else: + #matching failed, reset automaton + current_ac_node = self.root + if (current_ast_node.parent is not None + and current_ast_node.parent.was_checked): + #the rest of the tree upwards has been checked, next leaf + break + + #recheck the rejected node once from the root + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + + current_ast_node = current_ast_node.parent + return results + + def print_ac(self): + "Prints a graphviz diagram of the BM automaton(for debugging)" + print("digraph g{") + def print_node(node): + for subnode_key in node.transition_table.keys(): + subnode = node.transition_table[subnode_key] + print("%d -> %d [label=%s] //%s" % + (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) + if subnode_key == 1: + print(subnode.content) + print_node(subnode) + print_node(self.root) + print("}") + +# taken from pytree.py for debugging; only used by print_ac +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) diff --git a/contrib/tools/python3/Lib/lib2to3/btm_utils.py b/contrib/tools/python3/Lib/lib2to3/btm_utils.py new file mode 100644 index 0000000000..b61afdba69 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/btm_utils.py @@ -0,0 +1,280 @@ +"Utility functions used by the btm_matcher module" + +from . import pytree +from .pgen2 import grammar, token +from .pygram import pattern_symbols, python_symbols + +syms = pattern_symbols +pysyms = python_symbols +tokens = grammar.opmap +token_labels = token + +TYPE_ANY = -1 +TYPE_ALTERNATIVES = -2 +TYPE_GROUP = -3 + +class MinNode(object): + """This class serves as an intermediate representation of the + pattern tree during the conversion to sets of leaf-to-root + subpatterns""" + + def __init__(self, type=None, name=None): + self.type = type + self.name = name + self.children = [] + self.leaf = False + self.parent = None + self.alternatives = [] + self.group = [] + + def __repr__(self): + return str(self.type) + ' ' + str(self.name) + + def leaf_to_root(self): + """Internal method. Returns a characteristic path of the + pattern tree. This method must be run for all leaves until the + linear subpatterns are merged into a single""" + node = self + subp = [] + while node: + if node.type == TYPE_ALTERNATIVES: + node.alternatives.append(subp) + if len(node.alternatives) == len(node.children): + #last alternative + subp = [tuple(node.alternatives)] + node.alternatives = [] + node = node.parent + continue + else: + node = node.parent + subp = None + break + + if node.type == TYPE_GROUP: + node.group.append(subp) + #probably should check the number of leaves + if len(node.group) == len(node.children): + subp = get_characteristic_subpattern(node.group) + node.group = [] + node = node.parent + continue + else: + node = node.parent + subp = None + break + + if node.type == token_labels.NAME and node.name: + #in case of type=name, use the name instead + subp.append(node.name) + else: + subp.append(node.type) + + node = node.parent + return subp + + def get_linear_subpattern(self): + """Drives the leaf_to_root method. The reason that + leaf_to_root must be run multiple times is because we need to + reject 'group' matches; for example the alternative form + (a | b c) creates a group [b c] that needs to be matched. Since + matching multiple linear patterns overcomes the automaton's + capabilities, leaf_to_root merges each group into a single + choice based on 'characteristic'ity, + + i.e. (a|b c) -> (a|b) if b more characteristic than c + + Returns: The most 'characteristic'(as defined by + get_characteristic_subpattern) path for the compiled pattern + tree. + """ + + for l in self.leaves(): + subp = l.leaf_to_root() + if subp: + return subp + + def leaves(self): + "Generator that returns the leaves of the tree" + for child in self.children: + yield from child.leaves() + if not self.children: + yield self + +def reduce_tree(node, parent=None): + """ + Internal function. Reduces a compiled pattern tree to an + intermediate representation suitable for feeding the + automaton. This also trims off any optional pattern elements(like + [a], a*). + """ + + new_node = None + #switch on the node type + if node.type == syms.Matcher: + #skip + node = node.children[0] + + if node.type == syms.Alternatives : + #2 cases + if len(node.children) <= 2: + #just a single 'Alternative', skip this node + new_node = reduce_tree(node.children[0], parent) + else: + #real alternatives + new_node = MinNode(type=TYPE_ALTERNATIVES) + #skip odd children('|' tokens) + for child in node.children: + if node.children.index(child)%2: + continue + reduced = reduce_tree(child, new_node) + if reduced is not None: + new_node.children.append(reduced) + elif node.type == syms.Alternative: + if len(node.children) > 1: + + new_node = MinNode(type=TYPE_GROUP) + for child in node.children: + reduced = reduce_tree(child, new_node) + if reduced: + new_node.children.append(reduced) + if not new_node.children: + # delete the group if all of the children were reduced to None + new_node = None + + else: + new_node = reduce_tree(node.children[0], parent) + + elif node.type == syms.Unit: + if (isinstance(node.children[0], pytree.Leaf) and + node.children[0].value == '('): + #skip parentheses + return reduce_tree(node.children[1], parent) + if ((isinstance(node.children[0], pytree.Leaf) and + node.children[0].value == '[') + or + (len(node.children)>1 and + hasattr(node.children[1], "value") and + node.children[1].value == '[')): + #skip whole unit if its optional + return None + + leaf = True + details_node = None + alternatives_node = None + has_repeater = False + repeater_node = None + has_variable_name = False + + for child in node.children: + if child.type == syms.Details: + leaf = False + details_node = child + elif child.type == syms.Repeater: + has_repeater = True + repeater_node = child + elif child.type == syms.Alternatives: + alternatives_node = child + if hasattr(child, 'value') and child.value == '=': # variable name + has_variable_name = True + + #skip variable name + if has_variable_name: + #skip variable name, '=' + name_leaf = node.children[2] + if hasattr(name_leaf, 'value') and name_leaf.value == '(': + # skip parenthesis + name_leaf = node.children[3] + else: + name_leaf = node.children[0] + + #set node type + if name_leaf.type == token_labels.NAME: + #(python) non-name or wildcard + if name_leaf.value == 'any': + new_node = MinNode(type=TYPE_ANY) + else: + if hasattr(token_labels, name_leaf.value): + new_node = MinNode(type=getattr(token_labels, name_leaf.value)) + else: + new_node = MinNode(type=getattr(pysyms, name_leaf.value)) + + elif name_leaf.type == token_labels.STRING: + #(python) name or character; remove the apostrophes from + #the string value + name = name_leaf.value.strip("'") + if name in tokens: + new_node = MinNode(type=tokens[name]) + else: + new_node = MinNode(type=token_labels.NAME, name=name) + elif name_leaf.type == syms.Alternatives: + new_node = reduce_tree(alternatives_node, parent) + + #handle repeaters + if has_repeater: + if repeater_node.children[0].value == '*': + #reduce to None + new_node = None + elif repeater_node.children[0].value == '+': + #reduce to a single occurrence i.e. do nothing + pass + else: + #TODO: handle {min, max} repeaters + raise NotImplementedError + + #add children + if details_node and new_node is not None: + for child in details_node.children[1:-1]: + #skip '<', '>' markers + reduced = reduce_tree(child, new_node) + if reduced is not None: + new_node.children.append(reduced) + if new_node: + new_node.parent = parent + return new_node + + +def get_characteristic_subpattern(subpatterns): + """Picks the most characteristic from a list of linear patterns + Current order used is: + names > common_names > common_chars + """ + if not isinstance(subpatterns, list): + return subpatterns + if len(subpatterns)==1: + return subpatterns[0] + + # first pick out the ones containing variable names + subpatterns_with_names = [] + subpatterns_with_common_names = [] + common_names = ['in', 'for', 'if' , 'not', 'None'] + subpatterns_with_common_chars = [] + common_chars = "[]().,:" + for subpattern in subpatterns: + if any(rec_test(subpattern, lambda x: type(x) is str)): + if any(rec_test(subpattern, + lambda x: isinstance(x, str) and x in common_chars)): + subpatterns_with_common_chars.append(subpattern) + elif any(rec_test(subpattern, + lambda x: isinstance(x, str) and x in common_names)): + subpatterns_with_common_names.append(subpattern) + + else: + subpatterns_with_names.append(subpattern) + + if subpatterns_with_names: + subpatterns = subpatterns_with_names + elif subpatterns_with_common_names: + subpatterns = subpatterns_with_common_names + elif subpatterns_with_common_chars: + subpatterns = subpatterns_with_common_chars + # of the remaining subpatterns pick out the longest one + return max(subpatterns, key=len) + +def rec_test(sequence, test_func): + """Tests test_func on all items of sequence and items of included + sub-iterables""" + for x in sequence: + if isinstance(x, (list, tuple)): + yield from rec_test(x, test_func) + else: + yield test_func(x) diff --git a/contrib/tools/python3/Lib/lib2to3/fixer_base.py b/contrib/tools/python3/Lib/lib2to3/fixer_base.py new file mode 100644 index 0000000000..df581a4dea --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixer_base.py @@ -0,0 +1,186 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Base class for fixers (optional, but recommended).""" + +# Python imports +import itertools + +# Local imports +from .patcomp import PatternCompiler +from . import pygram +from .fixer_util import does_tree_import + +class BaseFix(object): + + """Optional base class for fixers. + + The subclass name must be FixFooBar where FooBar is the result of + removing underscores and capitalizing the words of the fix name. + For example, the class name for a fixer named 'has_key' should be + FixHasKey. + """ + + PATTERN = None # Most subclasses should override with a string literal + pattern = None # Compiled pattern, set by compile_pattern() + pattern_tree = None # Tree representation of the pattern + options = None # Options object passed to initializer + filename = None # The filename (set by set_filename) + numbers = itertools.count(1) # For new_name() + used_names = set() # A set of all used NAMEs + order = "post" # Does the fixer prefer pre- or post-order traversal + explicit = False # Is this ignored by refactor.py -f all? + run_order = 5 # Fixers will be sorted by run order before execution + # Lower numbers will be run first. + _accept_type = None # [Advanced and not public] This tells RefactoringTool + # which node type to accept when there's not a pattern. + + keep_line_order = False # For the bottom matcher: match with the + # original line order + BM_compatible = False # Compatibility with the bottom matching + # module; every fixer should set this + # manually + + # Shortcut for access to Python grammar symbols + syms = pygram.python_symbols + + def __init__(self, options, log): + """Initializer. Subclass may override. + + Args: + options: a dict containing the options passed to RefactoringTool + that could be used to customize the fixer through the command line. + log: a list to append warnings and other messages to. + """ + self.options = options + self.log = log + self.compile_pattern() + + def compile_pattern(self): + """Compiles self.PATTERN into self.pattern. + + Subclass may override if it doesn't want to use + self.{pattern,PATTERN} in .match(). + """ + if self.PATTERN is not None: + PC = PatternCompiler() + self.pattern, self.pattern_tree = PC.compile_pattern(self.PATTERN, + with_tree=True) + + def set_filename(self, filename): + """Set the filename. + + The main refactoring tool should call this. + """ + self.filename = filename + + def match(self, node): + """Returns match for a given parse tree node. + + Should return a true or false object (not necessarily a bool). + It may return a non-empty dict of matching sub-nodes as + returned by a matching pattern. + + Subclass may override. + """ + results = {"node": node} + return self.pattern.match(node, results) and results + + def transform(self, node, results): + """Returns the transformation for a given parse tree node. + + Args: + node: the root of the parse tree that matched the fixer. + results: a dict mapping symbolic names to part of the match. + + Returns: + None, or a node that is a modified copy of the + argument node. The node argument may also be modified in-place to + effect the same change. + + Subclass *must* override. + """ + raise NotImplementedError() + + def new_name(self, template="xxx_todo_changeme"): + """Return a string suitable for use as an identifier + + The new name is guaranteed not to conflict with other identifiers. + """ + name = template + while name in self.used_names: + name = template + str(next(self.numbers)) + self.used_names.add(name) + return name + + def log_message(self, message): + if self.first_log: + self.first_log = False + self.log.append("### In file %s ###" % self.filename) + self.log.append(message) + + def cannot_convert(self, node, reason=None): + """Warn the user that a given chunk of code is not valid Python 3, + but that it cannot be converted automatically. + + First argument is the top-level node for the code in question. + Optional second argument is why it can't be converted. + """ + lineno = node.get_lineno() + for_output = node.clone() + for_output.prefix = "" + msg = "Line %d: could not convert: %s" + self.log_message(msg % (lineno, for_output)) + if reason: + self.log_message(reason) + + def warning(self, node, reason): + """Used for warning the user about possible uncertainty in the + translation. + + First argument is the top-level node for the code in question. + Optional second argument is why it can't be converted. + """ + lineno = node.get_lineno() + self.log_message("Line %d: %s" % (lineno, reason)) + + def start_tree(self, tree, filename): + """Some fixers need to maintain tree-wide state. + This method is called once, at the start of tree fix-up. + + tree - the root node of the tree to be processed. + filename - the name of the file the tree came from. + """ + self.used_names = tree.used_names + self.set_filename(filename) + self.numbers = itertools.count(1) + self.first_log = True + + def finish_tree(self, tree, filename): + """Some fixers need to maintain tree-wide state. + This method is called once, at the conclusion of tree fix-up. + + tree - the root node of the tree to be processed. + filename - the name of the file the tree came from. + """ + pass + + +class ConditionalFix(BaseFix): + """ Base class for fixers which not execute if an import is found. """ + + # This is the name of the import which, if found, will cause the test to be skipped + skip_on = None + + def start_tree(self, *args): + super(ConditionalFix, self).start_tree(*args) + self._should_skip = None + + def should_skip(self, node): + if self._should_skip is not None: + return self._should_skip + pkg = self.skip_on.split(".") + name = pkg[-1] + pkg = ".".join(pkg[:-1]) + self._should_skip = does_tree_import(pkg, name, node) + return self._should_skip diff --git a/contrib/tools/python3/Lib/lib2to3/fixer_util.py b/contrib/tools/python3/Lib/lib2to3/fixer_util.py new file mode 100644 index 0000000000..c2a3a47f50 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixer_util.py @@ -0,0 +1,453 @@ +"""Utility functions, node construction macros, etc.""" +# Author: Collin Winter + +# Local imports +from .pgen2 import token +from .pytree import Leaf, Node +from .pygram import python_symbols as syms +from . import patcomp + + +########################################################### +### Common node-construction "macros" +########################################################### + +def KeywordArg(keyword, value): + return Node(syms.argument, + [keyword, Leaf(token.EQUAL, "="), value]) + +def LParen(): + return Leaf(token.LPAR, "(") + +def RParen(): + return Leaf(token.RPAR, ")") + +def Assign(target, source): + """Build an assignment statement""" + if not isinstance(target, list): + target = [target] + if not isinstance(source, list): + source.prefix = " " + source = [source] + + return Node(syms.atom, + target + [Leaf(token.EQUAL, "=", prefix=" ")] + source) + +def Name(name, prefix=None): + """Return a NAME leaf""" + return Leaf(token.NAME, name, prefix=prefix) + +def Attr(obj, attr): + """A node tuple for obj.attr""" + return [obj, Node(syms.trailer, [Dot(), attr])] + +def Comma(): + """A comma leaf""" + return Leaf(token.COMMA, ",") + +def Dot(): + """A period (.) leaf""" + return Leaf(token.DOT, ".") + +def ArgList(args, lparen=LParen(), rparen=RParen()): + """A parenthesised argument list, used by Call()""" + node = Node(syms.trailer, [lparen.clone(), rparen.clone()]) + if args: + node.insert_child(1, Node(syms.arglist, args)) + return node + +def Call(func_name, args=None, prefix=None): + """A function call""" + node = Node(syms.power, [func_name, ArgList(args)]) + if prefix is not None: + node.prefix = prefix + return node + +def Newline(): + """A newline literal""" + return Leaf(token.NEWLINE, "\n") + +def BlankLine(): + """A blank line""" + return Leaf(token.NEWLINE, "") + +def Number(n, prefix=None): + return Leaf(token.NUMBER, n, prefix=prefix) + +def Subscript(index_node): + """A numeric or string subscript""" + return Node(syms.trailer, [Leaf(token.LBRACE, "["), + index_node, + Leaf(token.RBRACE, "]")]) + +def String(string, prefix=None): + """A string leaf""" + return Leaf(token.STRING, string, prefix=prefix) + +def ListComp(xp, fp, it, test=None): + """A list comprehension of the form [xp for fp in it if test]. + + If test is None, the "if test" part is omitted. + """ + xp.prefix = "" + fp.prefix = " " + it.prefix = " " + for_leaf = Leaf(token.NAME, "for") + for_leaf.prefix = " " + in_leaf = Leaf(token.NAME, "in") + in_leaf.prefix = " " + inner_args = [for_leaf, fp, in_leaf, it] + if test: + test.prefix = " " + if_leaf = Leaf(token.NAME, "if") + if_leaf.prefix = " " + inner_args.append(Node(syms.comp_if, [if_leaf, test])) + inner = Node(syms.listmaker, [xp, Node(syms.comp_for, inner_args)]) + return Node(syms.atom, + [Leaf(token.LBRACE, "["), + inner, + Leaf(token.RBRACE, "]")]) + +def FromImport(package_name, name_leafs): + """ Return an import statement in the form: + from package import name_leafs""" + # XXX: May not handle dotted imports properly (eg, package_name='foo.bar') + #assert package_name == '.' or '.' not in package_name, "FromImport has "\ + # "not been tested with dotted package names -- use at your own "\ + # "peril!" + + for leaf in name_leafs: + # Pull the leaves out of their old tree + leaf.remove() + + children = [Leaf(token.NAME, "from"), + Leaf(token.NAME, package_name, prefix=" "), + Leaf(token.NAME, "import", prefix=" "), + Node(syms.import_as_names, name_leafs)] + imp = Node(syms.import_from, children) + return imp + +def ImportAndCall(node, results, names): + """Returns an import statement and calls a method + of the module: + + import module + module.name()""" + obj = results["obj"].clone() + if obj.type == syms.arglist: + newarglist = obj.clone() + else: + newarglist = Node(syms.arglist, [obj.clone()]) + after = results["after"] + if after: + after = [n.clone() for n in after] + new = Node(syms.power, + Attr(Name(names[0]), Name(names[1])) + + [Node(syms.trailer, + [results["lpar"].clone(), + newarglist, + results["rpar"].clone()])] + after) + new.prefix = node.prefix + return new + + +########################################################### +### Determine whether a node represents a given literal +########################################################### + +def is_tuple(node): + """Does the node represent a tuple literal?""" + if isinstance(node, Node) and node.children == [LParen(), RParen()]: + return True + return (isinstance(node, Node) + and len(node.children) == 3 + and isinstance(node.children[0], Leaf) + and isinstance(node.children[1], Node) + and isinstance(node.children[2], Leaf) + and node.children[0].value == "(" + and node.children[2].value == ")") + +def is_list(node): + """Does the node represent a list literal?""" + return (isinstance(node, Node) + and len(node.children) > 1 + and isinstance(node.children[0], Leaf) + and isinstance(node.children[-1], Leaf) + and node.children[0].value == "[" + and node.children[-1].value == "]") + + +########################################################### +### Misc +########################################################### + +def parenthesize(node): + return Node(syms.atom, [LParen(), node, RParen()]) + + +consuming_calls = {"sorted", "list", "set", "any", "all", "tuple", "sum", + "min", "max", "enumerate"} + +def attr_chain(obj, attr): + """Follow an attribute chain. + + If you have a chain of objects where a.foo -> b, b.foo-> c, etc, + use this to iterate over all objects in the chain. Iteration is + terminated by getattr(x, attr) is None. + + Args: + obj: the starting object + attr: the name of the chaining attribute + + Yields: + Each successive object in the chain. + """ + next = getattr(obj, attr) + while next: + yield next + next = getattr(next, attr) + +p0 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + """ +p1 = """ +power< + ( 'iter' | 'list' | 'tuple' | 'sorted' | 'set' | 'sum' | + 'any' | 'all' | 'enumerate' | (any* trailer< '.' 'join' >) ) + trailer< '(' node=any ')' > + any* +> +""" +p2 = """ +power< + ( 'sorted' | 'enumerate' ) + trailer< '(' arglist<node=any any*> ')' > + any* +> +""" +pats_built = False +def in_special_context(node): + """ Returns true if node is in an environment where all that is required + of it is being iterable (ie, it doesn't matter if it returns a list + or an iterator). + See test_map_nochange in test_fixers.py for some examples and tests. + """ + global p0, p1, p2, pats_built + if not pats_built: + p0 = patcomp.compile_pattern(p0) + p1 = patcomp.compile_pattern(p1) + p2 = patcomp.compile_pattern(p2) + pats_built = True + patterns = [p0, p1, p2] + for pattern, parent in zip(patterns, attr_chain(node, "parent")): + results = {} + if pattern.match(parent, results) and results["node"] is node: + return True + return False + +def is_probably_builtin(node): + """ + Check that something isn't an attribute or function name etc. + """ + prev = node.prev_sibling + if prev is not None and prev.type == token.DOT: + # Attribute lookup. + return False + parent = node.parent + if parent.type in (syms.funcdef, syms.classdef): + return False + if parent.type == syms.expr_stmt and parent.children[0] is node: + # Assignment. + return False + if parent.type == syms.parameters or \ + (parent.type == syms.typedargslist and ( + (prev is not None and prev.type == token.COMMA) or + parent.children[0] is node + )): + # The name of an argument. + return False + return True + +def find_indentation(node): + """Find the indentation of *node*.""" + while node is not None: + if node.type == syms.suite and len(node.children) > 2: + indent = node.children[1] + if indent.type == token.INDENT: + return indent.value + node = node.parent + return "" + +########################################################### +### The following functions are to find bindings in a suite +########################################################### + +def make_suite(node): + if node.type == syms.suite: + return node + node = node.clone() + parent, node.parent = node.parent, None + suite = Node(syms.suite, [node]) + suite.parent = parent + return suite + +def find_root(node): + """Find the top level namespace.""" + # Scamper up to the top level namespace + while node.type != syms.file_input: + node = node.parent + if not node: + raise ValueError("root found before file_input node was found.") + return node + +def does_tree_import(package, name, node): + """ Returns true if name is imported from package at the + top level of the tree which node belongs to. + To cover the case of an import like 'import foo', use + None for the package and 'foo' for the name. """ + binding = find_binding(name, find_root(node), package) + return bool(binding) + +def is_import(node): + """Returns true if the node is an import statement.""" + return node.type in (syms.import_name, syms.import_from) + +def touch_import(package, name, node): + """ Works like `does_tree_import` but adds an import statement + if it was not imported. """ + def is_import_stmt(node): + return (node.type == syms.simple_stmt and node.children and + is_import(node.children[0])) + + root = find_root(node) + + if does_tree_import(package, name, root): + return + + # figure out where to insert the new import. First try to find + # the first import and then skip to the last one. + insert_pos = offset = 0 + for idx, node in enumerate(root.children): + if not is_import_stmt(node): + continue + for offset, node2 in enumerate(root.children[idx:]): + if not is_import_stmt(node2): + break + insert_pos = idx + offset + break + + # if there are no imports where we can insert, find the docstring. + # if that also fails, we stick to the beginning of the file + if insert_pos == 0: + for idx, node in enumerate(root.children): + if (node.type == syms.simple_stmt and node.children and + node.children[0].type == token.STRING): + insert_pos = idx + 1 + break + + if package is None: + import_ = Node(syms.import_name, [ + Leaf(token.NAME, "import"), + Leaf(token.NAME, name, prefix=" ") + ]) + else: + import_ = FromImport(package, [Leaf(token.NAME, name, prefix=" ")]) + + children = [import_, Newline()] + root.insert_child(insert_pos, Node(syms.simple_stmt, children)) + + +_def_syms = {syms.classdef, syms.funcdef} +def find_binding(name, node, package=None): + """ Returns the node which binds variable name, otherwise None. + If optional argument package is supplied, only imports will + be returned. + See test cases for examples.""" + for child in node.children: + ret = None + if child.type == syms.for_stmt: + if _find(name, child.children[1]): + return child + n = find_binding(name, make_suite(child.children[-1]), package) + if n: ret = n + elif child.type in (syms.if_stmt, syms.while_stmt): + n = find_binding(name, make_suite(child.children[-1]), package) + if n: ret = n + elif child.type == syms.try_stmt: + n = find_binding(name, make_suite(child.children[2]), package) + if n: + ret = n + else: + for i, kid in enumerate(child.children[3:]): + if kid.type == token.COLON and kid.value == ":": + # i+3 is the colon, i+4 is the suite + n = find_binding(name, make_suite(child.children[i+4]), package) + if n: ret = n + elif child.type in _def_syms and child.children[1].value == name: + ret = child + elif _is_import_binding(child, name, package): + ret = child + elif child.type == syms.simple_stmt: + ret = find_binding(name, child, package) + elif child.type == syms.expr_stmt: + if _find(name, child.children[0]): + ret = child + + if ret: + if not package: + return ret + if is_import(ret): + return ret + return None + +_block_syms = {syms.funcdef, syms.classdef, syms.trailer} +def _find(name, node): + nodes = [node] + while nodes: + node = nodes.pop() + if node.type > 256 and node.type not in _block_syms: + nodes.extend(node.children) + elif node.type == token.NAME and node.value == name: + return node + return None + +def _is_import_binding(node, name, package=None): + """ Will return node if node will import name, or node + will import * from package. None is returned otherwise. + See test cases for examples. """ + + if node.type == syms.import_name and not package: + imp = node.children[1] + if imp.type == syms.dotted_as_names: + for child in imp.children: + if child.type == syms.dotted_as_name: + if child.children[2].value == name: + return node + elif child.type == token.NAME and child.value == name: + return node + elif imp.type == syms.dotted_as_name: + last = imp.children[-1] + if last.type == token.NAME and last.value == name: + return node + elif imp.type == token.NAME and imp.value == name: + return node + elif node.type == syms.import_from: + # str(...) is used to make life easier here, because + # from a.b import parses to ['import', ['a', '.', 'b'], ...] + if package and str(node.children[1]).strip() != package: + return None + n = node.children[3] + if package and _find("as", n): + # See test_from_import_as for explanation + return None + elif n.type == syms.import_as_names and _find(name, n): + return node + elif n.type == syms.import_as_name: + child = n.children[2] + if child.type == token.NAME and child.value == name: + return node + elif n.type == token.NAME and n.value == name: + return node + elif package and n.type == token.STAR: + return node + return None diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/__init__.py b/contrib/tools/python3/Lib/lib2to3/fixes/__init__.py new file mode 100644 index 0000000000..b93054b3ec --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/__init__.py @@ -0,0 +1 @@ +# Dummy file to make this directory a package. diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_apply.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_apply.py new file mode 100644 index 0000000000..6408582c42 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_apply.py @@ -0,0 +1,68 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for apply(). + +This converts apply(func, v, k) into (func)(*v, **k).""" + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Call, Comma, parenthesize + +class FixApply(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'apply' + trailer< + '(' + arglist< + (not argument<NAME '=' any>) func=any ',' + (not argument<NAME '=' any>) args=any [',' + (not argument<NAME '=' any>) kwds=any] [','] + > + ')' + > + > + """ + + def transform(self, node, results): + syms = self.syms + assert results + func = results["func"] + args = results["args"] + kwds = results.get("kwds") + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + if args: + if (args.type == self.syms.argument and + args.children[0].value in {'**', '*'}): + return # Make no change. + if kwds and (kwds.type == self.syms.argument and + kwds.children[0].value == '**'): + return # Make no change. + prefix = node.prefix + func = func.clone() + if (func.type not in (token.NAME, syms.atom) and + (func.type != syms.power or + func.children[-2].type == token.DOUBLESTAR)): + # Need to parenthesize + func = parenthesize(func) + func.prefix = "" + args = args.clone() + args.prefix = "" + if kwds is not None: + kwds = kwds.clone() + kwds.prefix = "" + l_newargs = [pytree.Leaf(token.STAR, "*"), args] + if kwds is not None: + l_newargs.extend([Comma(), + pytree.Leaf(token.DOUBLESTAR, "**"), + kwds]) + l_newargs[-2].prefix = " " # that's the ** token + # XXX Sometimes we could be cleverer, e.g. apply(f, (x, y) + t) + # can be translated into f(x, y, *t) instead of f(*(x, y) + t) + #new = pytree.Node(syms.power, (func, ArgList(l_newargs))) + return Call(func, l_newargs, prefix=prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_asserts.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_asserts.py new file mode 100644 index 0000000000..5bcec885f5 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_asserts.py @@ -0,0 +1,34 @@ +"""Fixer that replaces deprecated unittest method names.""" + +# Author: Ezio Melotti + +from ..fixer_base import BaseFix +from ..fixer_util import Name + +NAMES = dict( + assert_="assertTrue", + assertEquals="assertEqual", + assertNotEquals="assertNotEqual", + assertAlmostEquals="assertAlmostEqual", + assertNotAlmostEquals="assertNotAlmostEqual", + assertRegexpMatches="assertRegex", + assertRaisesRegexp="assertRaisesRegex", + failUnlessEqual="assertEqual", + failIfEqual="assertNotEqual", + failUnlessAlmostEqual="assertAlmostEqual", + failIfAlmostEqual="assertNotAlmostEqual", + failUnless="assertTrue", + failUnlessRaises="assertRaises", + failIf="assertFalse", +) + + +class FixAsserts(BaseFix): + + PATTERN = """ + power< any+ trailer< '.' meth=(%s)> any* > + """ % '|'.join(map(repr, NAMES)) + + def transform(self, node, results): + name = results["meth"][0] + name.replace(Name(NAMES[str(name)], prefix=name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_basestring.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_basestring.py new file mode 100644 index 0000000000..5fe69a0f03 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_basestring.py @@ -0,0 +1,14 @@ +"""Fixer for basestring -> str.""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixBasestring(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = "'basestring'" + + def transform(self, node, results): + return Name("str", prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_buffer.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_buffer.py new file mode 100644 index 0000000000..f9a1958ad3 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_buffer.py @@ -0,0 +1,22 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes buffer(...) into memoryview(...).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixBuffer(fixer_base.BaseFix): + BM_compatible = True + + explicit = True # The user must ask for this fixer + + PATTERN = """ + power< name='buffer' trailer< '(' [any] ')' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("memoryview", prefix=name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_dict.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_dict.py new file mode 100644 index 0000000000..d3655c9f1b --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_dict.py @@ -0,0 +1,106 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for dict methods. + +d.keys() -> list(d.keys()) +d.items() -> list(d.items()) +d.values() -> list(d.values()) + +d.iterkeys() -> iter(d.keys()) +d.iteritems() -> iter(d.items()) +d.itervalues() -> iter(d.values()) + +d.viewkeys() -> d.keys() +d.viewitems() -> d.items() +d.viewvalues() -> d.values() + +Except in certain very specific contexts: the iter() can be dropped +when the context is list(), sorted(), iter() or for...in; the list() +can be dropped when the context is list() or sorted() (but not iter() +or for...in!). Special contexts that apply to both: list(), sorted(), tuple() +set(), any(), all(), sum(). + +Note: iter(d.keys()) could be written as iter(d) but since the +original d.iterkeys() was also redundant we don't fix this. And there +are (rare) contexts where it makes a difference (e.g. when passing it +as an argument to a function that introspects the argument). +""" + +# Local imports +from .. import pytree +from .. import patcomp +from .. import fixer_base +from ..fixer_util import Name, Call, Dot +from .. import fixer_util + + +iter_exempt = fixer_util.consuming_calls | {"iter"} + + +class FixDict(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< head=any+ + trailer< '.' method=('keys'|'items'|'values'| + 'iterkeys'|'iteritems'|'itervalues'| + 'viewkeys'|'viewitems'|'viewvalues') > + parens=trailer< '(' ')' > + tail=any* + > + """ + + def transform(self, node, results): + head = results["head"] + method = results["method"][0] # Extract node for method name + tail = results["tail"] + syms = self.syms + method_name = method.value + isiter = method_name.startswith("iter") + isview = method_name.startswith("view") + if isiter or isview: + method_name = method_name[4:] + assert method_name in ("keys", "items", "values"), repr(method) + head = [n.clone() for n in head] + tail = [n.clone() for n in tail] + special = not tail and self.in_special_context(node, isiter) + args = head + [pytree.Node(syms.trailer, + [Dot(), + Name(method_name, + prefix=method.prefix)]), + results["parens"].clone()] + new = pytree.Node(syms.power, args) + if not (special or isview): + new.prefix = "" + new = Call(Name("iter" if isiter else "list"), [new]) + if tail: + new = pytree.Node(syms.power, [new] + tail) + new.prefix = node.prefix + return new + + P1 = "power< func=NAME trailer< '(' node=any ')' > any* >" + p1 = patcomp.compile_pattern(P1) + + P2 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + """ + p2 = patcomp.compile_pattern(P2) + + def in_special_context(self, node, isiter): + if node.parent is None: + return False + results = {} + if (node.parent.parent is not None and + self.p1.match(node.parent.parent, results) and + results["node"] is node): + if isiter: + # iter(d.iterkeys()) -> iter(d.keys()), etc. + return results["func"].value in iter_exempt + else: + # list(d.keys()) -> list(d.keys()), etc. + return results["func"].value in fixer_util.consuming_calls + if not isiter: + return False + # for ... in d.iterkeys() -> for ... in d.keys(), etc. + return self.p2.match(node.parent, results) and results["node"] is node diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_except.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_except.py new file mode 100644 index 0000000000..49bd3d5ab7 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_except.py @@ -0,0 +1,93 @@ +"""Fixer for except statements with named exceptions. + +The following cases will be converted: + +- "except E, T:" where T is a name: + + except E as T: + +- "except E, T:" where T is not a name, tuple or list: + + except E as t: + T = t + + This is done because the target of an "except" clause must be a + name. + +- "except E, T:" where T is a tuple or list literal: + + except E as t: + T = t.args +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Assign, Attr, Name, is_tuple, is_list, syms + +def find_excepts(nodes): + for i, n in enumerate(nodes): + if n.type == syms.except_clause: + if n.children[0].value == 'except': + yield (n, nodes[i+2]) + +class FixExcept(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + try_stmt< 'try' ':' (simple_stmt | suite) + cleanup=(except_clause ':' (simple_stmt | suite))+ + tail=(['except' ':' (simple_stmt | suite)] + ['else' ':' (simple_stmt | suite)] + ['finally' ':' (simple_stmt | suite)]) > + """ + + def transform(self, node, results): + syms = self.syms + + tail = [n.clone() for n in results["tail"]] + + try_cleanup = [ch.clone() for ch in results["cleanup"]] + for except_clause, e_suite in find_excepts(try_cleanup): + if len(except_clause.children) == 4: + (E, comma, N) = except_clause.children[1:4] + comma.replace(Name("as", prefix=" ")) + + if N.type != token.NAME: + # Generate a new N for the except clause + new_N = Name(self.new_name(), prefix=" ") + target = N.clone() + target.prefix = "" + N.replace(new_N) + new_N = new_N.clone() + + # Insert "old_N = new_N" as the first statement in + # the except body. This loop skips leading whitespace + # and indents + #TODO(cwinter) suite-cleanup + suite_stmts = e_suite.children + for i, stmt in enumerate(suite_stmts): + if isinstance(stmt, pytree.Node): + break + + # The assignment is different if old_N is a tuple or list + # In that case, the assignment is old_N = new_N.args + if is_tuple(N) or is_list(N): + assign = Assign(target, Attr(new_N, Name('args'))) + else: + assign = Assign(target, new_N) + + #TODO(cwinter) stopgap until children becomes a smart list + for child in reversed(suite_stmts[:i]): + e_suite.insert_child(0, child) + e_suite.insert_child(i, assign) + elif N.prefix == "": + # No space after a comma is legal; no space after "as", + # not so much. + N.prefix = " " + + #TODO(cwinter) fix this when children becomes a smart list + children = [c.clone() for c in node.children[:3]] + try_cleanup + tail + return pytree.Node(node.type, children) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_exec.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_exec.py new file mode 100644 index 0000000000..ab921ee80c --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_exec.py @@ -0,0 +1,39 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for exec. + +This converts usages of the exec statement into calls to a built-in +exec() function. + +exec code in ns1, ns2 -> exec(code, ns1, ns2) +""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Comma, Name, Call + + +class FixExec(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + exec_stmt< 'exec' a=any 'in' b=any [',' c=any] > + | + exec_stmt< 'exec' (not atom<'(' [any] ')'>) a=any > + """ + + def transform(self, node, results): + assert results + syms = self.syms + a = results["a"] + b = results.get("b") + c = results.get("c") + args = [a.clone()] + args[0].prefix = "" + if b is not None: + args.extend([Comma(), b.clone()]) + if c is not None: + args.extend([Comma(), c.clone()]) + + return Call(Name("exec"), args, prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_execfile.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_execfile.py new file mode 100644 index 0000000000..b6c786fd4e --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_execfile.py @@ -0,0 +1,53 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for execfile. + +This converts usages of the execfile function into calls to the built-in +exec() function. +""" + +from .. import fixer_base +from ..fixer_util import (Comma, Name, Call, LParen, RParen, Dot, Node, + ArgList, String, syms) + + +class FixExecfile(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'execfile' trailer< '(' arglist< filename=any [',' globals=any [',' locals=any ] ] > ')' > > + | + power< 'execfile' trailer< '(' filename=any ')' > > + """ + + def transform(self, node, results): + assert results + filename = results["filename"] + globals = results.get("globals") + locals = results.get("locals") + + # Copy over the prefix from the right parentheses end of the execfile + # call. + execfile_paren = node.children[-1].children[-1].clone() + # Construct open().read(). + open_args = ArgList([filename.clone(), Comma(), String('"rb"', ' ')], + rparen=execfile_paren) + open_call = Node(syms.power, [Name("open"), open_args]) + read = [Node(syms.trailer, [Dot(), Name('read')]), + Node(syms.trailer, [LParen(), RParen()])] + open_expr = [open_call] + read + # Wrap the open call in a compile call. This is so the filename will be + # preserved in the execed code. + filename_arg = filename.clone() + filename_arg.prefix = " " + exec_str = String("'exec'", " ") + compile_args = open_expr + [Comma(), filename_arg, Comma(), exec_str] + compile_call = Call(Name("compile"), compile_args, "") + # Finally, replace the execfile call with an exec call. + args = [compile_call] + if globals is not None: + args.extend([Comma(), globals.clone()]) + if locals is not None: + args.extend([Comma(), locals.clone()]) + return Call(Name("exec"), args, prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_exitfunc.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_exitfunc.py new file mode 100644 index 0000000000..2e47887afe --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_exitfunc.py @@ -0,0 +1,72 @@ +""" +Convert use of sys.exitfunc to use the atexit module. +""" + +# Author: Benjamin Peterson + +from lib2to3 import pytree, fixer_base +from lib2to3.fixer_util import Name, Attr, Call, Comma, Newline, syms + + +class FixExitfunc(fixer_base.BaseFix): + keep_line_order = True + BM_compatible = True + + PATTERN = """ + ( + sys_import=import_name<'import' + ('sys' + | + dotted_as_names< (any ',')* 'sys' (',' any)* > + ) + > + | + expr_stmt< + power< 'sys' trailer< '.' 'exitfunc' > > + '=' func=any > + ) + """ + + def __init__(self, *args): + super(FixExitfunc, self).__init__(*args) + + def start_tree(self, tree, filename): + super(FixExitfunc, self).start_tree(tree, filename) + self.sys_import = None + + def transform(self, node, results): + # First, find the sys import. We'll just hope it's global scope. + if "sys_import" in results: + if self.sys_import is None: + self.sys_import = results["sys_import"] + return + + func = results["func"].clone() + func.prefix = "" + register = pytree.Node(syms.power, + Attr(Name("atexit"), Name("register")) + ) + call = Call(register, [func], node.prefix) + node.replace(call) + + if self.sys_import is None: + # That's interesting. + self.warning(node, "Can't find sys import; Please add an atexit " + "import at the top of your file.") + return + + # Now add an atexit import after the sys import. + names = self.sys_import.children[1] + if names.type == syms.dotted_as_names: + names.append_child(Comma()) + names.append_child(Name("atexit", " ")) + else: + containing_stmt = self.sys_import.parent + position = containing_stmt.children.index(self.sys_import) + stmt_container = containing_stmt.parent + new_import = pytree.Node(syms.import_name, + [Name("import"), Name("atexit", " ")] + ) + new = pytree.Node(syms.simple_stmt, [new_import]) + containing_stmt.insert_child(position + 1, Newline()) + containing_stmt.insert_child(position + 2, new) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_filter.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_filter.py new file mode 100644 index 0000000000..38e9078f11 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_filter.py @@ -0,0 +1,94 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes filter(F, X) into list(filter(F, X)). + +We avoid the transformation if the filter() call is directly contained +in iter(<>), list(<>), tuple(<>), sorted(<>), ...join(<>), or +for V in <>:. + +NOTE: This is still not correct if the original code was depending on +filter(F, X) to return a string if X is a string and a tuple if X is a +tuple. That would require type inference, which we don't do. Let +Python 2.6 figure it out. +""" + +# Local imports +from .. import fixer_base +from ..pytree import Node +from ..pygram import python_symbols as syms +from ..fixer_util import Name, ArgList, ListComp, in_special_context, parenthesize + + +class FixFilter(fixer_base.ConditionalFix): + BM_compatible = True + + PATTERN = """ + filter_lambda=power< + 'filter' + trailer< + '(' + arglist< + lambdef< 'lambda' + (fp=NAME | vfpdef< '(' fp=NAME ')'> ) ':' xp=any + > + ',' + it=any + > + ')' + > + [extra_trailers=trailer*] + > + | + power< + 'filter' + trailer< '(' arglist< none='None' ',' seq=any > ')' > + [extra_trailers=trailer*] + > + | + power< + 'filter' + args=trailer< '(' [any] ')' > + [extra_trailers=trailer*] + > + """ + + skip_on = "future_builtins.filter" + + def transform(self, node, results): + if self.should_skip(node): + return + + trailers = [] + if 'extra_trailers' in results: + for t in results['extra_trailers']: + trailers.append(t.clone()) + + if "filter_lambda" in results: + xp = results.get("xp").clone() + if xp.type == syms.test: + xp.prefix = "" + xp = parenthesize(xp) + + new = ListComp(results.get("fp").clone(), + results.get("fp").clone(), + results.get("it").clone(), xp) + new = Node(syms.power, [new] + trailers, prefix="") + + elif "none" in results: + new = ListComp(Name("_f"), + Name("_f"), + results["seq"].clone(), + Name("_f")) + new = Node(syms.power, [new] + trailers, prefix="") + + else: + if in_special_context(node): + return None + + args = results['args'].clone() + new = Node(syms.power, [Name("filter"), args], prefix="") + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = "" + new.prefix = node.prefix + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_funcattrs.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_funcattrs.py new file mode 100644 index 0000000000..67f3e18e06 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_funcattrs.py @@ -0,0 +1,21 @@ +"""Fix function attribute names (f.func_x -> f.__x__).""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixFuncattrs(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< any+ trailer< '.' attr=('func_closure' | 'func_doc' | 'func_globals' + | 'func_name' | 'func_defaults' | 'func_code' + | 'func_dict') > any* > + """ + + def transform(self, node, results): + attr = results["attr"][0] + attr.replace(Name(("__%s__" % attr.value[5:]), + prefix=attr.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_future.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_future.py new file mode 100644 index 0000000000..fbcb86af07 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_future.py @@ -0,0 +1,22 @@ +"""Remove __future__ imports + +from __future__ import foo is replaced with an empty line. +""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import BlankLine + +class FixFuture(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """import_from< 'from' module_name="__future__" 'import' any >""" + + # This should be run last -- some things check for the import + run_order = 10 + + def transform(self, node, results): + new = BlankLine() + new.prefix = node.prefix + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_getcwdu.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_getcwdu.py new file mode 100644 index 0000000000..087eaedcb2 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_getcwdu.py @@ -0,0 +1,19 @@ +""" +Fixer that changes os.getcwdu() to os.getcwd(). +""" +# Author: Victor Stinner + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixGetcwdu(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + power< 'os' trailer< dot='.' name='getcwdu' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("getcwd", prefix=name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_has_key.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_has_key.py new file mode 100644 index 0000000000..439708c992 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_has_key.py @@ -0,0 +1,109 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for has_key(). + +Calls to .has_key() methods are expressed in terms of the 'in' +operator: + + d.has_key(k) -> k in d + +CAVEATS: +1) While the primary target of this fixer is dict.has_key(), the + fixer will change any has_key() method call, regardless of its + class. + +2) Cases like this will not be converted: + + m = d.has_key + if m(k): + ... + + Only *calls* to has_key() are converted. While it is possible to + convert the above to something like + + m = d.__contains__ + if m(k): + ... + + this is currently not done. +""" + +# Local imports +from .. import pytree +from .. import fixer_base +from ..fixer_util import Name, parenthesize + + +class FixHasKey(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + anchor=power< + before=any+ + trailer< '.' 'has_key' > + trailer< + '(' + ( not(arglist | argument<any '=' any>) arg=any + | arglist<(not argument<any '=' any>) arg=any ','> + ) + ')' + > + after=any* + > + | + negation=not_test< + 'not' + anchor=power< + before=any+ + trailer< '.' 'has_key' > + trailer< + '(' + ( not(arglist | argument<any '=' any>) arg=any + | arglist<(not argument<any '=' any>) arg=any ','> + ) + ')' + > + > + > + """ + + def transform(self, node, results): + assert results + syms = self.syms + if (node.parent.type == syms.not_test and + self.pattern.match(node.parent)): + # Don't transform a node matching the first alternative of the + # pattern when its parent matches the second alternative + return None + negation = results.get("negation") + anchor = results["anchor"] + prefix = node.prefix + before = [n.clone() for n in results["before"]] + arg = results["arg"].clone() + after = results.get("after") + if after: + after = [n.clone() for n in after] + if arg.type in (syms.comparison, syms.not_test, syms.and_test, + syms.or_test, syms.test, syms.lambdef, syms.argument): + arg = parenthesize(arg) + if len(before) == 1: + before = before[0] + else: + before = pytree.Node(syms.power, before) + before.prefix = " " + n_op = Name("in", prefix=" ") + if negation: + n_not = Name("not", prefix=" ") + n_op = pytree.Node(syms.comp_op, (n_not, n_op)) + new = pytree.Node(syms.comparison, (arg, n_op, before)) + if after: + new = parenthesize(new) + new = pytree.Node(syms.power, (new,) + tuple(after)) + if node.parent.type in (syms.comparison, syms.expr, syms.xor_expr, + syms.and_expr, syms.shift_expr, + syms.arith_expr, syms.term, + syms.factor, syms.power): + new = parenthesize(new) + new.prefix = prefix + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_idioms.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_idioms.py new file mode 100644 index 0000000000..6905913d7c --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_idioms.py @@ -0,0 +1,152 @@ +"""Adjust some old Python 2 idioms to their modern counterparts. + +* Change some type comparisons to isinstance() calls: + type(x) == T -> isinstance(x, T) + type(x) is T -> isinstance(x, T) + type(x) != T -> not isinstance(x, T) + type(x) is not T -> not isinstance(x, T) + +* Change "while 1:" into "while True:". + +* Change both + + v = list(EXPR) + v.sort() + foo(v) + +and the more general + + v = EXPR + v.sort() + foo(v) + +into + + v = sorted(EXPR) + foo(v) +""" +# Author: Jacques Frechet, Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Comma, Name, Node, BlankLine, syms + +CMP = "(n='!=' | '==' | 'is' | n=comp_op< 'is' 'not' >)" +TYPE = "power< 'type' trailer< '(' x=any ')' > >" + +class FixIdioms(fixer_base.BaseFix): + explicit = True # The user must ask for this fixer + + PATTERN = r""" + isinstance=comparison< %s %s T=any > + | + isinstance=comparison< T=any %s %s > + | + while_stmt< 'while' while='1' ':' any+ > + | + sorted=any< + any* + simple_stmt< + expr_stmt< id1=any '=' + power< list='list' trailer< '(' (not arglist<any+>) any ')' > > + > + '\n' + > + sort= + simple_stmt< + power< id2=any + trailer< '.' 'sort' > trailer< '(' ')' > + > + '\n' + > + next=any* + > + | + sorted=any< + any* + simple_stmt< expr_stmt< id1=any '=' expr=any > '\n' > + sort= + simple_stmt< + power< id2=any + trailer< '.' 'sort' > trailer< '(' ')' > + > + '\n' + > + next=any* + > + """ % (TYPE, CMP, CMP, TYPE) + + def match(self, node): + r = super(FixIdioms, self).match(node) + # If we've matched one of the sort/sorted subpatterns above, we + # want to reject matches where the initial assignment and the + # subsequent .sort() call involve different identifiers. + if r and "sorted" in r: + if r["id1"] == r["id2"]: + return r + return None + return r + + def transform(self, node, results): + if "isinstance" in results: + return self.transform_isinstance(node, results) + elif "while" in results: + return self.transform_while(node, results) + elif "sorted" in results: + return self.transform_sort(node, results) + else: + raise RuntimeError("Invalid match") + + def transform_isinstance(self, node, results): + x = results["x"].clone() # The thing inside of type() + T = results["T"].clone() # The type being compared against + x.prefix = "" + T.prefix = " " + test = Call(Name("isinstance"), [x, Comma(), T]) + if "n" in results: + test.prefix = " " + test = Node(syms.not_test, [Name("not"), test]) + test.prefix = node.prefix + return test + + def transform_while(self, node, results): + one = results["while"] + one.replace(Name("True", prefix=one.prefix)) + + def transform_sort(self, node, results): + sort_stmt = results["sort"] + next_stmt = results["next"] + list_call = results.get("list") + simple_expr = results.get("expr") + + if list_call: + list_call.replace(Name("sorted", prefix=list_call.prefix)) + elif simple_expr: + new = simple_expr.clone() + new.prefix = "" + simple_expr.replace(Call(Name("sorted"), [new], + prefix=simple_expr.prefix)) + else: + raise RuntimeError("should not have reached here") + sort_stmt.remove() + + btwn = sort_stmt.prefix + # Keep any prefix lines between the sort_stmt and the list_call and + # shove them right after the sorted() call. + if "\n" in btwn: + if next_stmt: + # The new prefix should be everything from the sort_stmt's + # prefix up to the last newline, then the old prefix after a new + # line. + prefix_lines = (btwn.rpartition("\n")[0], next_stmt[0].prefix) + next_stmt[0].prefix = "\n".join(prefix_lines) + else: + assert list_call.parent + assert list_call.next_sibling is None + # Put a blank line after list_call and set its prefix. + end_line = BlankLine() + list_call.parent.append_child(end_line) + assert list_call.next_sibling is end_line + # The new prefix should be everything up to the first new line + # of sort_stmt's prefix. + end_line.prefix = btwn.rpartition("\n")[0] diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_import.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_import.py new file mode 100644 index 0000000000..734ca29469 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_import.py @@ -0,0 +1,99 @@ +"""Fixer for import statements. +If spam is being imported from the local directory, this import: + from spam import eggs +Becomes: + from .spam import eggs + +And this import: + import spam +Becomes: + from . import spam +""" + +# Local imports +from .. import fixer_base +from os.path import dirname, join, exists, sep +from ..fixer_util import FromImport, syms, token + + +def traverse_imports(names): + """ + Walks over all the names imported in a dotted_as_names node. + """ + pending = [names] + while pending: + node = pending.pop() + if node.type == token.NAME: + yield node.value + elif node.type == syms.dotted_name: + yield "".join([ch.value for ch in node.children]) + elif node.type == syms.dotted_as_name: + pending.append(node.children[0]) + elif node.type == syms.dotted_as_names: + pending.extend(node.children[::-2]) + else: + raise AssertionError("unknown node type") + + +class FixImport(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + import_from< 'from' imp=any 'import' ['('] any [')'] > + | + import_name< 'import' imp=any > + """ + + def start_tree(self, tree, name): + super(FixImport, self).start_tree(tree, name) + self.skip = "absolute_import" in tree.future_features + + def transform(self, node, results): + if self.skip: + return + imp = results['imp'] + + if node.type == syms.import_from: + # Some imps are top-level (eg: 'import ham') + # some are first level (eg: 'import ham.eggs') + # some are third level (eg: 'import ham.eggs as spam') + # Hence, the loop + while not hasattr(imp, 'value'): + imp = imp.children[0] + if self.probably_a_local_import(imp.value): + imp.value = "." + imp.value + imp.changed() + else: + have_local = False + have_absolute = False + for mod_name in traverse_imports(imp): + if self.probably_a_local_import(mod_name): + have_local = True + else: + have_absolute = True + if have_absolute: + if have_local: + # We won't handle both sibling and absolute imports in the + # same statement at the moment. + self.warning(node, "absolute and local imports together") + return + + new = FromImport(".", [imp]) + new.prefix = node.prefix + return new + + def probably_a_local_import(self, imp_name): + if imp_name.startswith("."): + # Relative imports are certainly not local imports. + return False + imp_name = imp_name.split(".", 1)[0] + base_path = dirname(self.filename) + base_path = join(base_path, imp_name) + # If there is no __init__.py next to the file its not in a package + # so can't be a relative import. + if not exists(join(dirname(base_path), "__init__.py")): + return False + for ext in [".py", sep, ".pyc", ".so", ".sl", ".pyd"]: + if exists(base_path + ext): + return True + return False diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports.py new file mode 100644 index 0000000000..aaf4f2f642 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports.py @@ -0,0 +1,145 @@ +"""Fix incompatible imports and module references.""" +# Authors: Collin Winter, Nick Edds + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, attr_chain + +MAPPING = {'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + # anydbm and whichdb are handled by fix_imports2 + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + #'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', +} + + +def alternates(members): + return "(" + "|".join(map(repr, members)) + ")" + + +def build_pattern(mapping=MAPPING): + mod_list = ' | '.join(["module_name='%s'" % key for key in mapping]) + bare_names = alternates(mapping.keys()) + + yield """name_import=import_name< 'import' ((%s) | + multiple_imports=dotted_as_names< any* (%s) any* >) > + """ % (mod_list, mod_list) + yield """import_from< 'from' (%s) 'import' ['('] + ( any | import_as_name< any 'as' any > | + import_as_names< any* >) [')'] > + """ % mod_list + yield """import_name< 'import' (dotted_as_name< (%s) 'as' any > | + multiple_imports=dotted_as_names< + any* dotted_as_name< (%s) 'as' any > any* >) > + """ % (mod_list, mod_list) + + # Find usages of module members in code e.g. thread.foo(bar) + yield "power< bare_with_attr=(%s) trailer<'.' any > any* >" % bare_names + + +class FixImports(fixer_base.BaseFix): + + BM_compatible = True + keep_line_order = True + # This is overridden in fix_imports2. + mapping = MAPPING + + # We want to run this fixer late, so fix_import doesn't try to make stdlib + # renames into relative imports. + run_order = 6 + + def build_pattern(self): + return "|".join(build_pattern(self.mapping)) + + def compile_pattern(self): + # We override this, so MAPPING can be pragmatically altered and the + # changes will be reflected in PATTERN. + self.PATTERN = self.build_pattern() + super(FixImports, self).compile_pattern() + + # Don't match the node if it's within another match. + def match(self, node): + match = super(FixImports, self).match + results = match(node) + if results: + # Module usage could be in the trailer of an attribute lookup, so we + # might have nested matches when "bare_with_attr" is present. + if "bare_with_attr" not in results and \ + any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + def start_tree(self, tree, filename): + super(FixImports, self).start_tree(tree, filename) + self.replace = {} + + def transform(self, node, results): + import_mod = results.get("module_name") + if import_mod: + mod_name = import_mod.value + new_name = self.mapping[mod_name] + import_mod.replace(Name(new_name, prefix=import_mod.prefix)) + if "name_import" in results: + # If it's not a "from x import x, y" or "import x as y" import, + # marked its usage to be replaced. + self.replace[mod_name] = new_name + if "multiple_imports" in results: + # This is a nasty hack to fix multiple imports on a line (e.g., + # "import StringIO, urlparse"). The problem is that I can't + # figure out an easy way to make a pattern recognize the keys of + # MAPPING randomly sprinkled in an import statement. + results = self.match(node) + if results: + self.transform(node, results) + else: + # Replace usage of the module. + bare_name = results["bare_with_attr"][0] + new_name = self.replace.get(bare_name.value) + if new_name: + bare_name.replace(Name(new_name, prefix=bare_name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports2.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports2.py new file mode 100644 index 0000000000..9a33c67b1d --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_imports2.py @@ -0,0 +1,16 @@ +"""Fix incompatible imports and module references that must be fixed after +fix_imports.""" +from . import fix_imports + + +MAPPING = { + 'whichdb': 'dbm', + 'anydbm': 'dbm', + } + + +class FixImports2(fix_imports.FixImports): + + run_order = 7 + + mapping = MAPPING diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_input.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_input.py new file mode 100644 index 0000000000..9cf9a48c47 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_input.py @@ -0,0 +1,26 @@ +"""Fixer that changes input(...) into eval(input(...)).""" +# Author: Andre Roberge + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Name +from .. import patcomp + + +context = patcomp.compile_pattern("power< 'eval' trailer< '(' any ')' > >") + + +class FixInput(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< 'input' args=trailer< '(' [any] ')' > > + """ + + def transform(self, node, results): + # If we're already wrapped in an eval() call, we're done. + if context.match(node.parent.parent): + return + + new = node.clone() + new.prefix = "" + return Call(Name("eval"), [new], prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_intern.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_intern.py new file mode 100644 index 0000000000..d752843092 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_intern.py @@ -0,0 +1,39 @@ +# Copyright 2006 Georg Brandl. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for intern(). + +intern(s) -> sys.intern(s)""" + +# Local imports +from .. import fixer_base +from ..fixer_util import ImportAndCall, touch_import + + +class FixIntern(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'intern' + trailer< lpar='(' + ( not(arglist | argument<any '=' any>) obj=any + | obj=arglist<(not argument<any '=' any>) any ','> ) + rpar=')' > + after=any* + > + """ + + def transform(self, node, results): + if results: + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + obj = results['obj'] + if obj: + if (obj.type == self.syms.argument and + obj.children[0].value in {'**', '*'}): + return # Make no change. + names = ('sys', 'intern') + new = ImportAndCall(node, results, names) + touch_import(None, 'sys', node) + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_isinstance.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_isinstance.py new file mode 100644 index 0000000000..bebb1de120 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_isinstance.py @@ -0,0 +1,52 @@ +# Copyright 2008 Armin Ronacher. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that cleans up a tuple argument to isinstance after the tokens +in it were fixed. This is mainly used to remove double occurrences of +tokens as a leftover of the long -> int / unicode -> str conversion. + +eg. isinstance(x, (int, long)) -> isinstance(x, (int, int)) + -> isinstance(x, int) +""" + +from .. import fixer_base +from ..fixer_util import token + + +class FixIsinstance(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< + 'isinstance' + trailer< '(' arglist< any ',' atom< '(' + args=testlist_gexp< any+ > + ')' > > ')' > + > + """ + + run_order = 6 + + def transform(self, node, results): + names_inserted = set() + testlist = results["args"] + args = testlist.children + new_args = [] + iterator = enumerate(args) + for idx, arg in iterator: + if arg.type == token.NAME and arg.value in names_inserted: + if idx < len(args) - 1 and args[idx + 1].type == token.COMMA: + next(iterator) + continue + else: + new_args.append(arg) + if arg.type == token.NAME: + names_inserted.add(arg.value) + if new_args and new_args[-1].type == token.COMMA: + del new_args[-1] + if len(new_args) == 1: + atom = testlist.parent + new_args[0].prefix = atom.prefix + atom.replace(new_args[0]) + else: + args[:] = new_args + node.changed() diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools.py new file mode 100644 index 0000000000..8e78d6c689 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools.py @@ -0,0 +1,43 @@ +""" Fixer for itertools.(imap|ifilter|izip) --> (map|filter|zip) and + itertools.ifilterfalse --> itertools.filterfalse (bugs 2360-2363) + + imports from itertools are fixed in fix_itertools_import.py + + If itertools is imported as something else (ie: import itertools as it; + it.izip(spam, eggs)) method calls will not get fixed. + """ + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixItertools(fixer_base.BaseFix): + BM_compatible = True + it_funcs = "('imap'|'ifilter'|'izip'|'izip_longest'|'ifilterfalse')" + PATTERN = """ + power< it='itertools' + trailer< + dot='.' func=%(it_funcs)s > trailer< '(' [any] ')' > > + | + power< func=%(it_funcs)s trailer< '(' [any] ')' > > + """ %(locals()) + + # Needs to be run after fix_(map|zip|filter) + run_order = 6 + + def transform(self, node, results): + prefix = None + func = results['func'][0] + if ('it' in results and + func.value not in ('ifilterfalse', 'izip_longest')): + dot, it = (results['dot'], results['it']) + # Remove the 'itertools' + prefix = it.prefix + it.remove() + # Replace the node which contains ('.', 'function') with the + # function (to be consistent with the second part of the pattern) + dot.remove() + func.parent.replace(func) + + prefix = prefix or func.prefix + func.replace(Name(func.value[1:], prefix=prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools_imports.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools_imports.py new file mode 100644 index 0000000000..0ddbc7b842 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_itertools_imports.py @@ -0,0 +1,57 @@ +""" Fixer for imports of itertools.(imap|ifilter|izip|ifilterfalse) """ + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import BlankLine, syms, token + + +class FixItertoolsImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + import_from< 'from' 'itertools' 'import' imports=any > + """ %(locals()) + + def transform(self, node, results): + imports = results['imports'] + if imports.type == syms.import_as_name or not imports.children: + children = [imports] + else: + children = imports.children + for child in children[::2]: + if child.type == token.NAME: + member = child.value + name_node = child + elif child.type == token.STAR: + # Just leave the import as is. + return + else: + assert child.type == syms.import_as_name + name_node = child.children[0] + member_name = name_node.value + if member_name in ('imap', 'izip', 'ifilter'): + child.value = None + child.remove() + elif member_name in ('ifilterfalse', 'izip_longest'): + node.changed() + name_node.value = ('filterfalse' if member_name[1] == 'f' + else 'zip_longest') + + # Make sure the import statement is still sane + children = imports.children[:] or [imports] + remove_comma = True + for child in children: + if remove_comma and child.type == token.COMMA: + child.remove() + else: + remove_comma ^= True + + while children and children[-1].type == token.COMMA: + children.pop().remove() + + # If there are no imports left, just get rid of the entire statement + if (not (imports.children or getattr(imports, 'value', None)) or + imports.parent is None): + p = node.prefix + node = BlankLine() + node.prefix = p + return node diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_long.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_long.py new file mode 100644 index 0000000000..f227c9f498 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_long.py @@ -0,0 +1,19 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that turns 'long' into 'int' everywhere. +""" + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import is_probably_builtin + + +class FixLong(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "'long'" + + def transform(self, node, results): + if is_probably_builtin(node): + node.value = "int" + node.changed() diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_map.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_map.py new file mode 100644 index 0000000000..78cf81c6f9 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_map.py @@ -0,0 +1,110 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes map(F, ...) into list(map(F, ...)) unless there +exists a 'from future_builtins import map' statement in the top-level +namespace. + +As a special case, map(None, X) is changed into list(X). (This is +necessary because the semantics are changed in this case -- the new +map(None, X) is equivalent to [(x,) for x in X].) + +We avoid the transformation (except for the special case mentioned +above) if the map() call is directly contained in iter(<>), list(<>), +tuple(<>), sorted(<>), ...join(<>), or for V in <>:. + +NOTE: This is still not correct if the original code was depending on +map(F, X, Y, ...) to go on until the longest argument is exhausted, +substituting None for missing values -- like zip(), it now stops as +soon as the shortest argument is exhausted. +""" + +# Local imports +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, ArgList, Call, ListComp, in_special_context +from ..pygram import python_symbols as syms +from ..pytree import Node + + +class FixMap(fixer_base.ConditionalFix): + BM_compatible = True + + PATTERN = """ + map_none=power< + 'map' + trailer< '(' arglist< 'None' ',' arg=any [','] > ')' > + [extra_trailers=trailer*] + > + | + map_lambda=power< + 'map' + trailer< + '(' + arglist< + lambdef< 'lambda' + (fp=NAME | vfpdef< '(' fp=NAME ')'> ) ':' xp=any + > + ',' + it=any + > + ')' + > + [extra_trailers=trailer*] + > + | + power< + 'map' args=trailer< '(' [any] ')' > + [extra_trailers=trailer*] + > + """ + + skip_on = 'future_builtins.map' + + def transform(self, node, results): + if self.should_skip(node): + return + + trailers = [] + if 'extra_trailers' in results: + for t in results['extra_trailers']: + trailers.append(t.clone()) + + if node.parent.type == syms.simple_stmt: + self.warning(node, "You should use a for loop here") + new = node.clone() + new.prefix = "" + new = Call(Name("list"), [new]) + elif "map_lambda" in results: + new = ListComp(results["xp"].clone(), + results["fp"].clone(), + results["it"].clone()) + new = Node(syms.power, [new] + trailers, prefix="") + + else: + if "map_none" in results: + new = results["arg"].clone() + new.prefix = "" + else: + if "args" in results: + args = results["args"] + if args.type == syms.trailer and \ + args.children[1].type == syms.arglist and \ + args.children[1].children[0].type == token.NAME and \ + args.children[1].children[0].value == "None": + self.warning(node, "cannot convert map(None, ...) " + "with multiple arguments because map() " + "now truncates to the shortest sequence") + return + + new = Node(syms.power, [Name("map"), args.clone()]) + new.prefix = "" + + if in_special_context(node): + return None + + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = "" + + new.prefix = node.prefix + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_metaclass.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_metaclass.py new file mode 100644 index 0000000000..fe547b2228 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_metaclass.py @@ -0,0 +1,228 @@ +"""Fixer for __metaclass__ = X -> (metaclass=X) methods. + + The various forms of classef (inherits nothing, inherits once, inherits + many) don't parse the same in the CST so we look at ALL classes for + a __metaclass__ and if we find one normalize the inherits to all be + an arglist. + + For one-liner classes ('class X: pass') there is no indent/dedent so + we normalize those into having a suite. + + Moving the __metaclass__ into the classdef can also cause the class + body to be empty so there is some special casing for that as well. + + This fixer also tries very hard to keep original indenting and spacing + in all those corner cases. + +""" +# Author: Jack Diederich + +# Local imports +from .. import fixer_base +from ..pygram import token +from ..fixer_util import syms, Node, Leaf + + +def has_metaclass(parent): + """ we have to check the cls_node without changing it. + There are two possibilities: + 1) clsdef => suite => simple_stmt => expr_stmt => Leaf('__meta') + 2) clsdef => simple_stmt => expr_stmt => Leaf('__meta') + """ + for node in parent.children: + if node.type == syms.suite: + return has_metaclass(node) + elif node.type == syms.simple_stmt and node.children: + expr_node = node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + left_side = expr_node.children[0] + if isinstance(left_side, Leaf) and \ + left_side.value == '__metaclass__': + return True + return False + + +def fixup_parse_tree(cls_node): + """ one-line classes don't get a suite in the parse tree so we add + one to normalize the tree + """ + for node in cls_node.children: + if node.type == syms.suite: + # already in the preferred format, do nothing + return + + # !%@#! one-liners have no suite node, we have to fake one up + for i, node in enumerate(cls_node.children): + if node.type == token.COLON: + break + else: + raise ValueError("No class suite and no ':'!") + + # move everything into a suite node + suite = Node(syms.suite, []) + while cls_node.children[i+1:]: + move_node = cls_node.children[i+1] + suite.append_child(move_node.clone()) + move_node.remove() + cls_node.append_child(suite) + node = suite + + +def fixup_simple_stmt(parent, i, stmt_node): + """ if there is a semi-colon all the parts count as part of the same + simple_stmt. We just want the __metaclass__ part so we move + everything after the semi-colon into its own simple_stmt node + """ + for semi_ind, node in enumerate(stmt_node.children): + if node.type == token.SEMI: # *sigh* + break + else: + return + + node.remove() # kill the semicolon + new_expr = Node(syms.expr_stmt, []) + new_stmt = Node(syms.simple_stmt, [new_expr]) + while stmt_node.children[semi_ind:]: + move_node = stmt_node.children[semi_ind] + new_expr.append_child(move_node.clone()) + move_node.remove() + parent.insert_child(i, new_stmt) + new_leaf1 = new_stmt.children[0].children[0] + old_leaf1 = stmt_node.children[0].children[0] + new_leaf1.prefix = old_leaf1.prefix + + +def remove_trailing_newline(node): + if node.children and node.children[-1].type == token.NEWLINE: + node.children[-1].remove() + + +def find_metas(cls_node): + # find the suite node (Mmm, sweet nodes) + for node in cls_node.children: + if node.type == syms.suite: + break + else: + raise ValueError("No class suite!") + + # look for simple_stmt[ expr_stmt[ Leaf('__metaclass__') ] ] + for i, simple_node in list(enumerate(node.children)): + if simple_node.type == syms.simple_stmt and simple_node.children: + expr_node = simple_node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + # Check if the expr_node is a simple assignment. + left_node = expr_node.children[0] + if isinstance(left_node, Leaf) and \ + left_node.value == '__metaclass__': + # We found an assignment to __metaclass__. + fixup_simple_stmt(node, i, simple_node) + remove_trailing_newline(simple_node) + yield (node, i, simple_node) + + +def fixup_indent(suite): + """ If an INDENT is followed by a thing with a prefix then nuke the prefix + Otherwise we get in trouble when removing __metaclass__ at suite start + """ + kids = suite.children[::-1] + # find the first indent + while kids: + node = kids.pop() + if node.type == token.INDENT: + break + + # find the first Leaf + while kids: + node = kids.pop() + if isinstance(node, Leaf) and node.type != token.DEDENT: + if node.prefix: + node.prefix = '' + return + else: + kids.extend(node.children[::-1]) + + +class FixMetaclass(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + classdef<any*> + """ + + def transform(self, node, results): + if not has_metaclass(node): + return + + fixup_parse_tree(node) + + # find metaclasses, keep the last one + last_metaclass = None + for suite, i, stmt in find_metas(node): + last_metaclass = stmt + stmt.remove() + + text_type = node.children[0].type # always Leaf(nnn, 'class') + + # figure out what kind of classdef we have + if len(node.children) == 7: + # Node(classdef, ['class', 'name', '(', arglist, ')', ':', suite]) + # 0 1 2 3 4 5 6 + if node.children[3].type == syms.arglist: + arglist = node.children[3] + # Node(classdef, ['class', 'name', '(', 'Parent', ')', ':', suite]) + else: + parent = node.children[3].clone() + arglist = Node(syms.arglist, [parent]) + node.set_child(3, arglist) + elif len(node.children) == 6: + # Node(classdef, ['class', 'name', '(', ')', ':', suite]) + # 0 1 2 3 4 5 + arglist = Node(syms.arglist, []) + node.insert_child(3, arglist) + elif len(node.children) == 4: + # Node(classdef, ['class', 'name', ':', suite]) + # 0 1 2 3 + arglist = Node(syms.arglist, []) + node.insert_child(2, Leaf(token.RPAR, ')')) + node.insert_child(2, arglist) + node.insert_child(2, Leaf(token.LPAR, '(')) + else: + raise ValueError("Unexpected class definition") + + # now stick the metaclass in the arglist + meta_txt = last_metaclass.children[0].children[0] + meta_txt.value = 'metaclass' + orig_meta_prefix = meta_txt.prefix + + if arglist.children: + arglist.append_child(Leaf(token.COMMA, ',')) + meta_txt.prefix = ' ' + else: + meta_txt.prefix = '' + + # compact the expression "metaclass = Meta" -> "metaclass=Meta" + expr_stmt = last_metaclass.children[0] + assert expr_stmt.type == syms.expr_stmt + expr_stmt.children[1].prefix = '' + expr_stmt.children[2].prefix = '' + + arglist.append_child(last_metaclass) + + fixup_indent(suite) + + # check for empty suite + if not suite.children: + # one-liner that was just __metaclass_ + suite.remove() + pass_leaf = Leaf(text_type, 'pass') + pass_leaf.prefix = orig_meta_prefix + node.append_child(pass_leaf) + node.append_child(Leaf(token.NEWLINE, '\n')) + + elif len(suite.children) > 1 and \ + (suite.children[-2].type == token.INDENT and + suite.children[-1].type == token.DEDENT): + # there was only one line in the class body and it was __metaclass__ + pass_leaf = Leaf(text_type, 'pass') + suite.insert_child(-1, pass_leaf) + suite.insert_child(-1, Leaf(token.NEWLINE, '\n')) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_methodattrs.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_methodattrs.py new file mode 100644 index 0000000000..7f9004f00e --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_methodattrs.py @@ -0,0 +1,24 @@ +"""Fix bound method attributes (method.im_? -> method.__?__). +""" +# Author: Christian Heimes + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +MAP = { + "im_func" : "__func__", + "im_self" : "__self__", + "im_class" : "__self__.__class__" + } + +class FixMethodattrs(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< any+ trailer< '.' attr=('im_func' | 'im_self' | 'im_class') > any* > + """ + + def transform(self, node, results): + attr = results["attr"][0] + new = MAP[attr.value] + attr.replace(Name(new, prefix=attr.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_ne.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_ne.py new file mode 100644 index 0000000000..e3ee10f4a6 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_ne.py @@ -0,0 +1,23 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that turns <> into !=.""" + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base + + +class FixNe(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.NOTEQUAL + + def match(self, node): + # Override + return node.value == "<>" + + def transform(self, node, results): + new = pytree.Leaf(token.NOTEQUAL, "!=", prefix=node.prefix) + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_next.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_next.py new file mode 100644 index 0000000000..9f6305e1d4 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_next.py @@ -0,0 +1,103 @@ +"""Fixer for it.next() -> next(it), per PEP 3114.""" +# Author: Collin Winter + +# Things that currently aren't covered: +# - listcomp "next" names aren't warned +# - "with" statement targets aren't checked + +# Local imports +from ..pgen2 import token +from ..pygram import python_symbols as syms +from .. import fixer_base +from ..fixer_util import Name, Call, find_binding + +bind_warning = "Calls to builtin next() possibly shadowed by global binding" + + +class FixNext(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > > + | + power< head=any+ trailer< '.' attr='next' > not trailer< '(' ')' > > + | + classdef< 'class' any+ ':' + suite< any* + funcdef< 'def' + name='next' + parameters< '(' NAME ')' > any+ > + any* > > + | + global=global_stmt< 'global' any* 'next' any* > + """ + + order = "pre" # Pre-order tree traversal + + def start_tree(self, tree, filename): + super(FixNext, self).start_tree(tree, filename) + + n = find_binding('next', tree) + if n: + self.warning(n, bind_warning) + self.shadowed_next = True + else: + self.shadowed_next = False + + def transform(self, node, results): + assert results + + base = results.get("base") + attr = results.get("attr") + name = results.get("name") + + if base: + if self.shadowed_next: + attr.replace(Name("__next__", prefix=attr.prefix)) + else: + base = [n.clone() for n in base] + base[0].prefix = "" + node.replace(Call(Name("next", prefix=node.prefix), base)) + elif name: + n = Name("__next__", prefix=name.prefix) + name.replace(n) + elif attr: + # We don't do this transformation if we're assigning to "x.next". + # Unfortunately, it doesn't seem possible to do this in PATTERN, + # so it's being done here. + if is_assign_target(node): + head = results["head"] + if "".join([str(n) for n in head]).strip() == '__builtin__': + self.warning(node, bind_warning) + return + attr.replace(Name("__next__")) + elif "global" in results: + self.warning(node, bind_warning) + self.shadowed_next = True + + +### The following functions help test if node is part of an assignment +### target. + +def is_assign_target(node): + assign = find_assign(node) + if assign is None: + return False + + for child in assign.children: + if child.type == token.EQUAL: + return False + elif is_subtree(child, node): + return True + return False + +def find_assign(node): + if node.type == syms.expr_stmt: + return node + if node.type == syms.simple_stmt or node.parent is None: + return None + return find_assign(node.parent) + +def is_subtree(root, node): + if root == node: + return True + return any(is_subtree(c, node) for c in root.children) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_nonzero.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_nonzero.py new file mode 100644 index 0000000000..c2295969a7 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_nonzero.py @@ -0,0 +1,21 @@ +"""Fixer for __nonzero__ -> __bool__ methods.""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixNonzero(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + classdef< 'class' any+ ':' + suite< any* + funcdef< 'def' name='__nonzero__' + parameters< '(' NAME ')' > any+ > + any* > > + """ + + def transform(self, node, results): + name = results["name"] + new = Name("__bool__", prefix=name.prefix) + name.replace(new) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_numliterals.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_numliterals.py new file mode 100644 index 0000000000..79207d4aa3 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_numliterals.py @@ -0,0 +1,28 @@ +"""Fixer that turns 1L into 1, 0755 into 0o755. +""" +# Copyright 2007 Georg Brandl. +# Licensed to PSF under a Contributor Agreement. + +# Local imports +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Number + + +class FixNumliterals(fixer_base.BaseFix): + # This is so simple that we don't need the pattern compiler. + + _accept_type = token.NUMBER + + def match(self, node): + # Override + return (node.value.startswith("0") or node.value[-1] in "Ll") + + def transform(self, node, results): + val = node.value + if val[-1] in 'Ll': + val = val[:-1] + elif val.startswith('0') and val.isdigit() and len(set(val)) > 1: + val = "0o" + val[1:] + + return Number(val, prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_operator.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_operator.py new file mode 100644 index 0000000000..d303cd2018 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_operator.py @@ -0,0 +1,97 @@ +"""Fixer for operator functions. + +operator.isCallable(obj) -> callable(obj) +operator.sequenceIncludes(obj) -> operator.contains(obj) +operator.isSequenceType(obj) -> isinstance(obj, collections.abc.Sequence) +operator.isMappingType(obj) -> isinstance(obj, collections.abc.Mapping) +operator.isNumberType(obj) -> isinstance(obj, numbers.Number) +operator.repeat(obj, n) -> operator.mul(obj, n) +operator.irepeat(obj, n) -> operator.imul(obj, n) +""" + +import collections.abc + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import Call, Name, String, touch_import + + +def invocation(s): + def dec(f): + f.invocation = s + return f + return dec + + +class FixOperator(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + methods = """ + method=('isCallable'|'sequenceIncludes' + |'isSequenceType'|'isMappingType'|'isNumberType' + |'repeat'|'irepeat') + """ + obj = "'(' obj=any ')'" + PATTERN = """ + power< module='operator' + trailer< '.' %(methods)s > trailer< %(obj)s > > + | + power< %(methods)s trailer< %(obj)s > > + """ % dict(methods=methods, obj=obj) + + def transform(self, node, results): + method = self._check_method(node, results) + if method is not None: + return method(node, results) + + @invocation("operator.contains(%s)") + def _sequenceIncludes(self, node, results): + return self._handle_rename(node, results, "contains") + + @invocation("callable(%s)") + def _isCallable(self, node, results): + obj = results["obj"] + return Call(Name("callable"), [obj.clone()], prefix=node.prefix) + + @invocation("operator.mul(%s)") + def _repeat(self, node, results): + return self._handle_rename(node, results, "mul") + + @invocation("operator.imul(%s)") + def _irepeat(self, node, results): + return self._handle_rename(node, results, "imul") + + @invocation("isinstance(%s, collections.abc.Sequence)") + def _isSequenceType(self, node, results): + return self._handle_type2abc(node, results, "collections.abc", "Sequence") + + @invocation("isinstance(%s, collections.abc.Mapping)") + def _isMappingType(self, node, results): + return self._handle_type2abc(node, results, "collections.abc", "Mapping") + + @invocation("isinstance(%s, numbers.Number)") + def _isNumberType(self, node, results): + return self._handle_type2abc(node, results, "numbers", "Number") + + def _handle_rename(self, node, results, name): + method = results["method"][0] + method.value = name + method.changed() + + def _handle_type2abc(self, node, results, module, abc): + touch_import(None, module, node) + obj = results["obj"] + args = [obj.clone(), String(", " + ".".join([module, abc]))] + return Call(Name("isinstance"), args, prefix=node.prefix) + + def _check_method(self, node, results): + method = getattr(self, "_" + results["method"][0].value) + if isinstance(method, collections.abc.Callable): + if "module" in results: + return method + else: + sub = (str(results["obj"]),) + invocation_str = method.invocation % sub + self.warning(node, "You should use '%s' here." % invocation_str) + return None diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_paren.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_paren.py new file mode 100644 index 0000000000..df3da5f523 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_paren.py @@ -0,0 +1,44 @@ +"""Fixer that adds parentheses where they are required + +This converts ``[x for x in 1, 2]`` to ``[x for x in (1, 2)]``.""" + +# By Taek Joo Kim and Benjamin Peterson + +# Local imports +from .. import fixer_base +from ..fixer_util import LParen, RParen + +# XXX This doesn't support nested for loops like [x for x in 1, 2 for x in 1, 2] +class FixParen(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + atom< ('[' | '(') + (listmaker< any + comp_for< + 'for' NAME 'in' + target=testlist_safe< any (',' any)+ [','] + > + [any] + > + > + | + testlist_gexp< any + comp_for< + 'for' NAME 'in' + target=testlist_safe< any (',' any)+ [','] + > + [any] + > + >) + (']' | ')') > + """ + + def transform(self, node, results): + target = results["target"] + + lparen = LParen() + lparen.prefix = target.prefix + target.prefix = "" # Make it hug the parentheses + target.insert_child(0, lparen) + target.append_child(RParen()) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_print.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_print.py new file mode 100644 index 0000000000..8780322265 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_print.py @@ -0,0 +1,87 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for print. + +Change: + 'print' into 'print()' + 'print ...' into 'print(...)' + 'print ... ,' into 'print(..., end=" ")' + 'print >>x, ...' into 'print(..., file=x)' + +No changes are applied if print_function is imported from __future__ + +""" + +# Local imports +from .. import patcomp +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, Comma, String + + +parend_expr = patcomp.compile_pattern( + """atom< '(' [atom|STRING|NAME] ')' >""" + ) + + +class FixPrint(fixer_base.BaseFix): + + BM_compatible = True + + PATTERN = """ + simple_stmt< any* bare='print' any* > | print_stmt + """ + + def transform(self, node, results): + assert results + + bare_print = results.get("bare") + + if bare_print: + # Special-case print all by itself + bare_print.replace(Call(Name("print"), [], + prefix=bare_print.prefix)) + return + assert node.children[0] == Name("print") + args = node.children[1:] + if len(args) == 1 and parend_expr.match(args[0]): + # We don't want to keep sticking parens around an + # already-parenthesised expression. + return + + sep = end = file = None + if args and args[-1] == Comma(): + args = args[:-1] + end = " " + if args and args[0] == pytree.Leaf(token.RIGHTSHIFT, ">>"): + assert len(args) >= 2 + file = args[1].clone() + args = args[3:] # Strip a possible comma after the file expression + # Now synthesize a print(args, sep=..., end=..., file=...) node. + l_args = [arg.clone() for arg in args] + if l_args: + l_args[0].prefix = "" + if sep is not None or end is not None or file is not None: + if sep is not None: + self.add_kwarg(l_args, "sep", String(repr(sep))) + if end is not None: + self.add_kwarg(l_args, "end", String(repr(end))) + if file is not None: + self.add_kwarg(l_args, "file", file) + n_stmt = Call(Name("print"), l_args) + n_stmt.prefix = node.prefix + return n_stmt + + def add_kwarg(self, l_nodes, s_kwd, n_expr): + # XXX All this prefix-setting may lose comments (though rarely) + n_expr.prefix = "" + n_argument = pytree.Node(self.syms.argument, + (Name(s_kwd), + pytree.Leaf(token.EQUAL, "="), + n_expr)) + if l_nodes: + l_nodes.append(Comma()) + n_argument.prefix = " " + l_nodes.append(n_argument) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_raise.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_raise.py new file mode 100644 index 0000000000..05aa21e74a --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_raise.py @@ -0,0 +1,90 @@ +"""Fixer for 'raise E, V, T' + +raise -> raise +raise E -> raise E +raise E, V -> raise E(V) +raise E, V, T -> raise E(V).with_traceback(T) +raise E, None, T -> raise E.with_traceback(T) + +raise (((E, E'), E''), E'''), V -> raise E(V) +raise "foo", V, T -> warns about string exceptions + + +CAVEATS: +1) "raise E, V" will be incorrectly translated if V is an exception + instance. The correct Python 3 idiom is + + raise E from V + + but since we can't detect instance-hood by syntax alone and since + any client code would have to be changed as well, we don't automate + this. +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, Attr, ArgList, is_tuple + +class FixRaise(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type == token.STRING: + msg = "Python 3 does not support string exceptions" + self.cannot_convert(node, msg) + return + + # Python 2 supports + # raise ((((E1, E2), E3), E4), E5), V + # as a synonym for + # raise E1, V + # Since Python 3 will not support this, we recurse down any tuple + # literals, always taking the first element. + if is_tuple(exc): + while is_tuple(exc): + # exc.children[1:-1] is the unparenthesized tuple + # exc.children[1].children[0] is the first element of the tuple + exc = exc.children[1].children[0].clone() + exc.prefix = " " + + if "val" not in results: + # One-argument raise + new = pytree.Node(syms.raise_stmt, [Name("raise"), exc]) + new.prefix = node.prefix + return new + + val = results["val"].clone() + if is_tuple(val): + args = [c.clone() for c in val.children[1:-1]] + else: + val.prefix = "" + args = [val] + + if "tb" in results: + tb = results["tb"].clone() + tb.prefix = "" + + e = exc + # If there's a traceback and None is passed as the value, then don't + # add a call, since the user probably just wants to add a + # traceback. See issue #9661. + if val.type != token.NAME or val.value != "None": + e = Call(exc, args) + with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] + new = pytree.Node(syms.simple_stmt, [Name("raise")] + with_tb) + new.prefix = node.prefix + return new + else: + return pytree.Node(syms.raise_stmt, + [Name("raise"), Call(exc, args)], + prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_raw_input.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_raw_input.py new file mode 100644 index 0000000000..a51bb694b9 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_raw_input.py @@ -0,0 +1,17 @@ +"""Fixer that changes raw_input(...) into input(...).""" +# Author: Andre Roberge + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +class FixRawInput(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + power< name='raw_input' trailer< '(' [any] ')' > any* > + """ + + def transform(self, node, results): + name = results["name"] + name.replace(Name("input", prefix=name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_reduce.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_reduce.py new file mode 100644 index 0000000000..00e5aa1c33 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_reduce.py @@ -0,0 +1,35 @@ +# Copyright 2008 Armin Ronacher. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for reduce(). + +Makes sure reduce() is imported from the functools module if reduce is +used in that module. +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import touch_import + + + +class FixReduce(fixer_base.BaseFix): + + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'reduce' + trailer< '(' + arglist< ( + (not(argument<any '=' any>) any ',' + not(argument<any '=' any>) any) | + (not(argument<any '=' any>) any ',' + not(argument<any '=' any>) any ',' + not(argument<any '=' any>) any) + ) > + ')' > + > + """ + + def transform(self, node, results): + touch_import('functools', 'reduce', node) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_reload.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_reload.py new file mode 100644 index 0000000000..b30841131c --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_reload.py @@ -0,0 +1,36 @@ +"""Fixer for reload(). + +reload(s) -> importlib.reload(s)""" + +# Local imports +from .. import fixer_base +from ..fixer_util import ImportAndCall, touch_import + + +class FixReload(fixer_base.BaseFix): + BM_compatible = True + order = "pre" + + PATTERN = """ + power< 'reload' + trailer< lpar='(' + ( not(arglist | argument<any '=' any>) obj=any + | obj=arglist<(not argument<any '=' any>) any ','> ) + rpar=')' > + after=any* + > + """ + + def transform(self, node, results): + if results: + # I feel like we should be able to express this logic in the + # PATTERN above but I don't know how to do it so... + obj = results['obj'] + if obj: + if (obj.type == self.syms.argument and + obj.children[0].value in {'**', '*'}): + return # Make no change. + names = ('importlib', 'reload') + new = ImportAndCall(node, results, names) + touch_import(None, 'importlib', node) + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_renames.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_renames.py new file mode 100644 index 0000000000..c0e3705ab7 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_renames.py @@ -0,0 +1,70 @@ +"""Fix incompatible renames + +Fixes: + * sys.maxint -> sys.maxsize +""" +# Author: Christian Heimes +# based on Collin Winter's fix_import + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, attr_chain + +MAPPING = {"sys": {"maxint" : "maxsize"}, + } +LOOKUP = {} + +def alternates(members): + return "(" + "|".join(map(repr, members)) + ")" + + +def build_pattern(): + #bare = set() + for module, replace in list(MAPPING.items()): + for old_attr, new_attr in list(replace.items()): + LOOKUP[(module, old_attr)] = new_attr + #bare.add(module) + #bare.add(old_attr) + #yield """ + # import_name< 'import' (module=%r + # | dotted_as_names< any* module=%r any* >) > + # """ % (module, module) + yield """ + import_from< 'from' module_name=%r 'import' + ( attr_name=%r | import_as_name< attr_name=%r 'as' any >) > + """ % (module, old_attr, old_attr) + yield """ + power< module_name=%r trailer< '.' attr_name=%r > any* > + """ % (module, old_attr) + #yield """bare_name=%s""" % alternates(bare) + + +class FixRenames(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "|".join(build_pattern()) + + order = "pre" # Pre-order tree traversal + + # Don't match the node if it's within another match + def match(self, node): + match = super(FixRenames, self).match + results = match(node) + if results: + if any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + #def start_tree(self, tree, filename): + # super(FixRenames, self).start_tree(tree, filename) + # self.replace = {} + + def transform(self, node, results): + mod_name = results.get("module_name") + attr_name = results.get("attr_name") + #bare_name = results.get("bare_name") + #import_mod = results.get("module") + + if mod_name and attr_name: + new_attr = LOOKUP[(mod_name.value, attr_name.value)] + attr_name.replace(Name(new_attr, prefix=attr_name.prefix)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_repr.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_repr.py new file mode 100644 index 0000000000..1150bb8b9d --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_repr.py @@ -0,0 +1,23 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that transforms `xyzzy` into repr(xyzzy).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Call, Name, parenthesize + + +class FixRepr(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + atom < '`' expr=any '`' > + """ + + def transform(self, node, results): + expr = results["expr"].clone() + + if expr.type == self.syms.testlist1: + expr = parenthesize(expr) + return Call(Name("repr"), [expr], prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_set_literal.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_set_literal.py new file mode 100644 index 0000000000..762550cf73 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_set_literal.py @@ -0,0 +1,53 @@ +""" +Optional fixer to transform set() calls to set literals. +""" + +# Author: Benjamin Peterson + +from lib2to3 import fixer_base, pytree +from lib2to3.fixer_util import token, syms + + + +class FixSetLiteral(fixer_base.BaseFix): + + BM_compatible = True + explicit = True + + PATTERN = """power< 'set' trailer< '(' + (atom=atom< '[' (items=listmaker< any ((',' any)* [',']) > + | + single=any) ']' > + | + atom< '(' items=testlist_gexp< any ((',' any)* [',']) > ')' > + ) + ')' > > + """ + + def transform(self, node, results): + single = results.get("single") + if single: + # Make a fake listmaker + fake = pytree.Node(syms.listmaker, [single.clone()]) + single.replace(fake) + items = fake + else: + items = results["items"] + + # Build the contents of the literal + literal = [pytree.Leaf(token.LBRACE, "{")] + literal.extend(n.clone() for n in items.children) + literal.append(pytree.Leaf(token.RBRACE, "}")) + # Set the prefix of the right brace to that of the ')' or ']' + literal[-1].prefix = items.next_sibling.prefix + maker = pytree.Node(syms.dictsetmaker, literal) + maker.prefix = node.prefix + + # If the original was a one tuple, we need to remove the extra comma. + if len(maker.children) == 4: + n = maker.children[2] + n.remove() + maker.children[-1].prefix = n.prefix + + # Finally, replace the set call with our shiny new literal. + return maker diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_standarderror.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_standarderror.py new file mode 100644 index 0000000000..dc742167e6 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_standarderror.py @@ -0,0 +1,18 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for StandardError -> Exception.""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixStandarderror(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + 'StandardError' + """ + + def transform(self, node, results): + return Name("Exception", prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_sys_exc.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_sys_exc.py new file mode 100644 index 0000000000..f603969037 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_sys_exc.py @@ -0,0 +1,30 @@ +"""Fixer for sys.exc_{type, value, traceback} + +sys.exc_type -> sys.exc_info()[0] +sys.exc_value -> sys.exc_info()[1] +sys.exc_traceback -> sys.exc_info()[2] +""" + +# By Jeff Balogh and Benjamin Peterson + +# Local imports +from .. import fixer_base +from ..fixer_util import Attr, Call, Name, Number, Subscript, Node, syms + +class FixSysExc(fixer_base.BaseFix): + # This order matches the ordering of sys.exc_info(). + exc_info = ["exc_type", "exc_value", "exc_traceback"] + BM_compatible = True + PATTERN = """ + power< 'sys' trailer< dot='.' attribute=(%s) > > + """ % '|'.join("'%s'" % e for e in exc_info) + + def transform(self, node, results): + sys_attr = results["attribute"][0] + index = Number(self.exc_info.index(sys_attr.value)) + + call = Call(Name("exc_info"), prefix=sys_attr.prefix) + attr = Attr(Name("sys"), call) + attr[1].children[0].prefix = results["dot"].prefix + attr.append(Subscript(index)) + return Node(syms.power, attr, prefix=node.prefix) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_throw.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_throw.py new file mode 100644 index 0000000000..aac29169b4 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_throw.py @@ -0,0 +1,56 @@ +"""Fixer for generator.throw(E, V, T). + +g.throw(E) -> g.throw(E) +g.throw(E, V) -> g.throw(E(V)) +g.throw(E, V, T) -> g.throw(E(V).with_traceback(T)) + +g.throw("foo"[, V[, T]]) will warn about string exceptions.""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Name, Call, ArgList, Attr, is_tuple + +class FixThrow(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< any trailer< '.' 'throw' > + trailer< '(' args=arglist< exc=any ',' val=any [',' tb=any] > ')' > + > + | + power< any trailer< '.' 'throw' > trailer< '(' exc=any ')' > > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type is token.STRING: + self.cannot_convert(node, "Python 3 does not support string exceptions") + return + + # Leave "g.throw(E)" alone + val = results.get("val") + if val is None: + return + + val = val.clone() + if is_tuple(val): + args = [c.clone() for c in val.children[1:-1]] + else: + val.prefix = "" + args = [val] + + throw_args = results["args"] + + if "tb" in results: + tb = results["tb"].clone() + tb.prefix = "" + + e = Call(exc, args) + with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] + throw_args.replace(pytree.Node(syms.power, with_tb)) + else: + throw_args.replace(Call(exc, args)) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_tuple_params.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_tuple_params.py new file mode 100644 index 0000000000..cad755ffdb --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_tuple_params.py @@ -0,0 +1,175 @@ +"""Fixer for function definitions with tuple parameters. + +def func(((a, b), c), d): + ... + + -> + +def func(x, d): + ((a, b), c) = x + ... + +It will also support lambdas: + + lambda (x, y): x + y -> lambda t: t[0] + t[1] + + # The parens are a syntax error in Python 3 + lambda (x): x + y -> lambda x: x + y +""" +# Author: Collin Winter + +# Local imports +from .. import pytree +from ..pgen2 import token +from .. import fixer_base +from ..fixer_util import Assign, Name, Newline, Number, Subscript, syms + +def is_docstring(stmt): + return isinstance(stmt, pytree.Node) and \ + stmt.children[0].type == token.STRING + +class FixTupleParams(fixer_base.BaseFix): + run_order = 4 #use a lower order since lambda is part of other + #patterns + BM_compatible = True + + PATTERN = """ + funcdef< 'def' any parameters< '(' args=any ')' > + ['->' any] ':' suite=any+ > + | + lambda= + lambdef< 'lambda' args=vfpdef< '(' inner=any ')' > + ':' body=any + > + """ + + def transform(self, node, results): + if "lambda" in results: + return self.transform_lambda(node, results) + + new_lines = [] + suite = results["suite"] + args = results["args"] + # This crap is so "def foo(...): x = 5; y = 7" is handled correctly. + # TODO(cwinter): suite-cleanup + if suite[0].children[1].type == token.INDENT: + start = 2 + indent = suite[0].children[1].value + end = Newline() + else: + start = 0 + indent = "; " + end = pytree.Leaf(token.INDENT, "") + + # We need access to self for new_name(), and making this a method + # doesn't feel right. Closing over self and new_lines makes the + # code below cleaner. + def handle_tuple(tuple_arg, add_prefix=False): + n = Name(self.new_name()) + arg = tuple_arg.clone() + arg.prefix = "" + stmt = Assign(arg, n.clone()) + if add_prefix: + n.prefix = " " + tuple_arg.replace(n) + new_lines.append(pytree.Node(syms.simple_stmt, + [stmt, end.clone()])) + + if args.type == syms.tfpdef: + handle_tuple(args) + elif args.type == syms.typedargslist: + for i, arg in enumerate(args.children): + if arg.type == syms.tfpdef: + # Without add_prefix, the emitted code is correct, + # just ugly. + handle_tuple(arg, add_prefix=(i > 0)) + + if not new_lines: + return + + # This isn't strictly necessary, but it plays nicely with other fixers. + # TODO(cwinter) get rid of this when children becomes a smart list + for line in new_lines: + line.parent = suite[0] + + # TODO(cwinter) suite-cleanup + after = start + if start == 0: + new_lines[0].prefix = " " + elif is_docstring(suite[0].children[start]): + new_lines[0].prefix = indent + after = start + 1 + + for line in new_lines: + line.parent = suite[0] + suite[0].children[after:after] = new_lines + for i in range(after+1, after+len(new_lines)+1): + suite[0].children[i].prefix = indent + suite[0].changed() + + def transform_lambda(self, node, results): + args = results["args"] + body = results["body"] + inner = simplify_args(results["inner"]) + + # Replace lambda ((((x)))): x with lambda x: x + if inner.type == token.NAME: + inner = inner.clone() + inner.prefix = " " + args.replace(inner) + return + + params = find_params(args) + to_index = map_to_index(params) + tup_name = self.new_name(tuple_name(params)) + + new_param = Name(tup_name, prefix=" ") + args.replace(new_param.clone()) + for n in body.post_order(): + if n.type == token.NAME and n.value in to_index: + subscripts = [c.clone() for c in to_index[n.value]] + new = pytree.Node(syms.power, + [new_param.clone()] + subscripts) + new.prefix = n.prefix + n.replace(new) + + +### Helper functions for transform_lambda() + +def simplify_args(node): + if node.type in (syms.vfplist, token.NAME): + return node + elif node.type == syms.vfpdef: + # These look like vfpdef< '(' x ')' > where x is NAME + # or another vfpdef instance (leading to recursion). + while node.type == syms.vfpdef: + node = node.children[1] + return node + raise RuntimeError("Received unexpected node %s" % node) + +def find_params(node): + if node.type == syms.vfpdef: + return find_params(node.children[1]) + elif node.type == token.NAME: + return node.value + return [find_params(c) for c in node.children if c.type != token.COMMA] + +def map_to_index(param_list, prefix=[], d=None): + if d is None: + d = {} + for i, obj in enumerate(param_list): + trailer = [Subscript(Number(str(i)))] + if isinstance(obj, list): + map_to_index(obj, trailer, d=d) + else: + d[obj] = prefix + trailer + return d + +def tuple_name(param_list): + l = [] + for obj in param_list: + if isinstance(obj, list): + l.append(tuple_name(obj)) + else: + l.append(obj) + return "_".join(l) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_types.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_types.py new file mode 100644 index 0000000000..67bf51f2f5 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_types.py @@ -0,0 +1,61 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for removing uses of the types module. + +These work for only the known names in the types module. The forms above +can include types. or not. ie, It is assumed the module is imported either as: + + import types + from types import ... # either * or specific types + +The import statements are not modified. + +There should be another fixer that handles at least the following constants: + + type([]) -> list + type(()) -> tuple + type('') -> str + +""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + +_TYPE_MAPPING = { + 'BooleanType' : 'bool', + 'BufferType' : 'memoryview', + 'ClassType' : 'type', + 'ComplexType' : 'complex', + 'DictType': 'dict', + 'DictionaryType' : 'dict', + 'EllipsisType' : 'type(Ellipsis)', + #'FileType' : 'io.IOBase', + 'FloatType': 'float', + 'IntType': 'int', + 'ListType': 'list', + 'LongType': 'int', + 'ObjectType' : 'object', + 'NoneType': 'type(None)', + 'NotImplementedType' : 'type(NotImplemented)', + 'SliceType' : 'slice', + 'StringType': 'bytes', # XXX ? + 'StringTypes' : '(str,)', # XXX ? + 'TupleType': 'tuple', + 'TypeType' : 'type', + 'UnicodeType': 'str', + 'XRangeType' : 'range', + } + +_pats = ["power< 'types' trailer< '.' name='%s' > >" % t for t in _TYPE_MAPPING] + +class FixTypes(fixer_base.BaseFix): + BM_compatible = True + PATTERN = '|'.join(_pats) + + def transform(self, node, results): + new_value = _TYPE_MAPPING.get(results["name"].value) + if new_value: + return Name(new_value, prefix=node.prefix) + return None diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_unicode.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_unicode.py new file mode 100644 index 0000000000..c7982c2b97 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_unicode.py @@ -0,0 +1,42 @@ +r"""Fixer for unicode. + +* Changes unicode to str and unichr to chr. + +* If "...\u..." is not unicode literal change it into "...\\u...". + +* Change u"..." into "...". + +""" + +from ..pgen2 import token +from .. import fixer_base + +_mapping = {"unichr" : "chr", "unicode" : "str"} + +class FixUnicode(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "STRING | 'unicode' | 'unichr'" + + def start_tree(self, tree, filename): + super(FixUnicode, self).start_tree(tree, filename) + self.unicode_literals = 'unicode_literals' in tree.future_features + + def transform(self, node, results): + if node.type == token.NAME: + new = node.clone() + new.value = _mapping[node.value] + return new + elif node.type == token.STRING: + val = node.value + if not self.unicode_literals and val[0] in '\'"' and '\\' in val: + val = r'\\'.join([ + v.replace('\\u', r'\\u').replace('\\U', r'\\U') + for v in val.split(r'\\') + ]) + if val[0] in 'uU': + val = val[1:] + if val == node.value: + return node + new = node.clone() + new.value = val + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_urllib.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_urllib.py new file mode 100644 index 0000000000..ab892bc524 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_urllib.py @@ -0,0 +1,196 @@ +"""Fix changes imports of urllib which are now incompatible. + This is rather similar to fix_imports, but because of the more + complex nature of the fixing for urllib, it has its own fixer. +""" +# Author: Nick Edds + +# Local imports +from lib2to3.fixes.fix_imports import alternates, FixImports +from lib2to3.fixer_util import (Name, Comma, FromImport, Newline, + find_indentation, Node, syms) + +MAPPING = {"urllib": [ + ("urllib.request", + ["URLopener", "FancyURLopener", "urlretrieve", + "_urlopener", "urlopen", "urlcleanup", + "pathname2url", "url2pathname", "getproxies"]), + ("urllib.parse", + ["quote", "quote_plus", "unquote", "unquote_plus", + "urlencode", "splitattr", "splithost", "splitnport", + "splitpasswd", "splitport", "splitquery", "splittag", + "splittype", "splituser", "splitvalue", ]), + ("urllib.error", + ["ContentTooShortError"])], + "urllib2" : [ + ("urllib.request", + ["urlopen", "install_opener", "build_opener", + "Request", "OpenerDirector", "BaseHandler", + "HTTPDefaultErrorHandler", "HTTPRedirectHandler", + "HTTPCookieProcessor", "ProxyHandler", + "HTTPPasswordMgr", + "HTTPPasswordMgrWithDefaultRealm", + "AbstractBasicAuthHandler", + "HTTPBasicAuthHandler", "ProxyBasicAuthHandler", + "AbstractDigestAuthHandler", + "HTTPDigestAuthHandler", "ProxyDigestAuthHandler", + "HTTPHandler", "HTTPSHandler", "FileHandler", + "FTPHandler", "CacheFTPHandler", + "UnknownHandler"]), + ("urllib.error", + ["URLError", "HTTPError"]), + ] +} + +# Duplicate the url parsing functions for urllib2. +MAPPING["urllib2"].append(MAPPING["urllib"][1]) + + +def build_pattern(): + bare = set() + for old_module, changes in MAPPING.items(): + for change in changes: + new_module, members = change + members = alternates(members) + yield """import_name< 'import' (module=%r + | dotted_as_names< any* module=%r any* >) > + """ % (old_module, old_module) + yield """import_from< 'from' mod_member=%r 'import' + ( member=%s | import_as_name< member=%s 'as' any > | + import_as_names< members=any* >) > + """ % (old_module, members, members) + yield """import_from< 'from' module_star=%r 'import' star='*' > + """ % old_module + yield """import_name< 'import' + dotted_as_name< module_as=%r 'as' any > > + """ % old_module + # bare_with_attr has a special significance for FixImports.match(). + yield """power< bare_with_attr=%r trailer< '.' member=%s > any* > + """ % (old_module, members) + + +class FixUrllib(FixImports): + + def build_pattern(self): + return "|".join(build_pattern()) + + def transform_import(self, node, results): + """Transform for the basic import case. Replaces the old + import name with a comma separated list of its + replacements. + """ + import_mod = results.get("module") + pref = import_mod.prefix + + names = [] + + # create a Node list of the replacement modules + for name in MAPPING[import_mod.value][:-1]: + names.extend([Name(name[0], prefix=pref), Comma()]) + names.append(Name(MAPPING[import_mod.value][-1][0], prefix=pref)) + import_mod.replace(names) + + def transform_member(self, node, results): + """Transform for imports of specific module elements. Replaces + the module to be imported from with the appropriate new + module. + """ + mod_member = results.get("mod_member") + pref = mod_member.prefix + member = results.get("member") + + # Simple case with only a single member being imported + if member: + # this may be a list of length one, or just a node + if isinstance(member, list): + member = member[0] + new_name = None + for change in MAPPING[mod_member.value]: + if member.value in change[1]: + new_name = change[0] + break + if new_name: + mod_member.replace(Name(new_name, prefix=pref)) + else: + self.cannot_convert(node, "This is an invalid module element") + + # Multiple members being imported + else: + # a dictionary for replacements, order matters + modules = [] + mod_dict = {} + members = results["members"] + for member in members: + # we only care about the actual members + if member.type == syms.import_as_name: + as_name = member.children[2].value + member_name = member.children[0].value + else: + member_name = member.value + as_name = None + if member_name != ",": + for change in MAPPING[mod_member.value]: + if member_name in change[1]: + if change[0] not in mod_dict: + modules.append(change[0]) + mod_dict.setdefault(change[0], []).append(member) + + new_nodes = [] + indentation = find_indentation(node) + first = True + def handle_name(name, prefix): + if name.type == syms.import_as_name: + kids = [Name(name.children[0].value, prefix=prefix), + name.children[1].clone(), + name.children[2].clone()] + return [Node(syms.import_as_name, kids)] + return [Name(name.value, prefix=prefix)] + for module in modules: + elts = mod_dict[module] + names = [] + for elt in elts[:-1]: + names.extend(handle_name(elt, pref)) + names.append(Comma()) + names.extend(handle_name(elts[-1], pref)) + new = FromImport(module, names) + if not first or node.parent.prefix.endswith(indentation): + new.prefix = indentation + new_nodes.append(new) + first = False + if new_nodes: + nodes = [] + for new_node in new_nodes[:-1]: + nodes.extend([new_node, Newline()]) + nodes.append(new_nodes[-1]) + node.replace(nodes) + else: + self.cannot_convert(node, "All module elements are invalid") + + def transform_dot(self, node, results): + """Transform for calls to module members in code.""" + module_dot = results.get("bare_with_attr") + member = results.get("member") + new_name = None + if isinstance(member, list): + member = member[0] + for change in MAPPING[module_dot.value]: + if member.value in change[1]: + new_name = change[0] + break + if new_name: + module_dot.replace(Name(new_name, + prefix=module_dot.prefix)) + else: + self.cannot_convert(node, "This is an invalid module element") + + def transform(self, node, results): + if results.get("module"): + self.transform_import(node, results) + elif results.get("mod_member"): + self.transform_member(node, results) + elif results.get("bare_with_attr"): + self.transform_dot(node, results) + # Renaming and star imports are not supported for these modules. + elif results.get("module_star"): + self.cannot_convert(node, "Cannot handle star imports.") + elif results.get("module_as"): + self.cannot_convert(node, "This module is now multiple modules") diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_ws_comma.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_ws_comma.py new file mode 100644 index 0000000000..a54a376c47 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_ws_comma.py @@ -0,0 +1,39 @@ +"""Fixer that changes 'a ,b' into 'a, b'. + +This also changes '{a :b}' into '{a: b}', but does not touch other +uses of colons. It does not touch other uses of whitespace. + +""" + +from .. import pytree +from ..pgen2 import token +from .. import fixer_base + +class FixWsComma(fixer_base.BaseFix): + + explicit = True # The user must ask for this fixers + + PATTERN = """ + any<(not(',') any)+ ',' ((not(',') any)+ ',')* [not(',') any]> + """ + + COMMA = pytree.Leaf(token.COMMA, ",") + COLON = pytree.Leaf(token.COLON, ":") + SEPS = (COMMA, COLON) + + def transform(self, node, results): + new = node.clone() + comma = False + for child in new.children: + if child in self.SEPS: + prefix = child.prefix + if prefix.isspace() and "\n" not in prefix: + child.prefix = "" + comma = True + else: + if comma: + prefix = child.prefix + if not prefix: + child.prefix = " " + comma = False + return new diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_xrange.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_xrange.py new file mode 100644 index 0000000000..1e491e166a --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_xrange.py @@ -0,0 +1,73 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer that changes xrange(...) into range(...).""" + +# Local imports +from .. import fixer_base +from ..fixer_util import Name, Call, consuming_calls +from .. import patcomp + + +class FixXrange(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< + (name='range'|name='xrange') trailer< '(' args=any ')' > + rest=any* > + """ + + def start_tree(self, tree, filename): + super(FixXrange, self).start_tree(tree, filename) + self.transformed_xranges = set() + + def finish_tree(self, tree, filename): + self.transformed_xranges = None + + def transform(self, node, results): + name = results["name"] + if name.value == "xrange": + return self.transform_xrange(node, results) + elif name.value == "range": + return self.transform_range(node, results) + else: + raise ValueError(repr(name)) + + def transform_xrange(self, node, results): + name = results["name"] + name.replace(Name("range", prefix=name.prefix)) + # This prevents the new range call from being wrapped in a list later. + self.transformed_xranges.add(id(node)) + + def transform_range(self, node, results): + if (id(node) not in self.transformed_xranges and + not self.in_special_context(node)): + range_call = Call(Name("range"), [results["args"].clone()]) + # Encase the range call in list(). + list_call = Call(Name("list"), [range_call], + prefix=node.prefix) + # Put things that were after the range() call after the list call. + for n in results["rest"]: + list_call.append_child(n) + return list_call + + P1 = "power< func=NAME trailer< '(' node=any ')' > any* >" + p1 = patcomp.compile_pattern(P1) + + P2 = """for_stmt< 'for' any 'in' node=any ':' any* > + | comp_for< 'for' any 'in' node=any any* > + | comparison< any 'in' node=any any*> + """ + p2 = patcomp.compile_pattern(P2) + + def in_special_context(self, node): + if node.parent is None: + return False + results = {} + if (node.parent.parent is not None and + self.p1.match(node.parent.parent, results) and + results["node"] is node): + # list(d.keys()) -> list(d.keys()), etc. + return results["func"].value in consuming_calls + # for ... in d.iterkeys() -> for ... in d.keys(), etc. + return self.p2.match(node.parent, results) and results["node"] is node diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_xreadlines.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_xreadlines.py new file mode 100644 index 0000000000..3e3f71ab04 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_xreadlines.py @@ -0,0 +1,25 @@ +"""Fix "for x in f.xreadlines()" -> "for x in f". + +This fixer will also convert g(f.xreadlines) into g(f.__iter__).""" +# Author: Collin Winter + +# Local imports +from .. import fixer_base +from ..fixer_util import Name + + +class FixXreadlines(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< call=any+ trailer< '.' 'xreadlines' > trailer< '(' ')' > > + | + power< any+ trailer< '.' no_call='xreadlines' > > + """ + + def transform(self, node, results): + no_call = results.get("no_call") + + if no_call: + no_call.replace(Name("__iter__", prefix=no_call.prefix)) + else: + node.replace([x.clone() for x in results["call"]]) diff --git a/contrib/tools/python3/Lib/lib2to3/fixes/fix_zip.py b/contrib/tools/python3/Lib/lib2to3/fixes/fix_zip.py new file mode 100644 index 0000000000..52c28df6aa --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/fixes/fix_zip.py @@ -0,0 +1,46 @@ +""" +Fixer that changes zip(seq0, seq1, ...) into list(zip(seq0, seq1, ...) +unless there exists a 'from future_builtins import zip' statement in the +top-level namespace. + +We avoid the transformation if the zip() call is directly contained in +iter(<>), list(<>), tuple(<>), sorted(<>), ...join(<>), or for V in <>:. +""" + +# Local imports +from .. import fixer_base +from ..pytree import Node +from ..pygram import python_symbols as syms +from ..fixer_util import Name, ArgList, in_special_context + + +class FixZip(fixer_base.ConditionalFix): + + BM_compatible = True + PATTERN = """ + power< 'zip' args=trailer< '(' [any] ')' > [trailers=trailer*] + > + """ + + skip_on = "future_builtins.zip" + + def transform(self, node, results): + if self.should_skip(node): + return + + if in_special_context(node): + return None + + args = results['args'].clone() + args.prefix = "" + + trailers = [] + if 'trailers' in results: + trailers = [n.clone() for n in results['trailers']] + for n in trailers: + n.prefix = "" + + new = Node(syms.power, [Name("zip"), args], prefix="") + new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) + new.prefix = node.prefix + return new diff --git a/contrib/tools/python3/Lib/lib2to3/main.py b/contrib/tools/python3/Lib/lib2to3/main.py new file mode 100644 index 0000000000..f2849fd6be --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/main.py @@ -0,0 +1,273 @@ +""" +Main program for 2to3. +""" + +from __future__ import with_statement, print_function + +import sys +import os +import difflib +import logging +import shutil +import optparse + +from . import refactor + + +def diff_texts(a, b, filename): + """Return a unified diff of two strings.""" + a = a.splitlines() + b = b.splitlines() + return difflib.unified_diff(a, b, filename, filename, + "(original)", "(refactored)", + lineterm="") + + +class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool): + """ + A refactoring tool that can avoid overwriting its input files. + Prints output to stdout. + + Output files can optionally be written to a different directory and or + have an extra file suffix appended to their name for use in situations + where you do not want to replace the input files. + """ + + def __init__(self, fixers, options, explicit, nobackups, show_diffs, + input_base_dir='', output_dir='', append_suffix=''): + """ + Args: + fixers: A list of fixers to import. + options: A dict with RefactoringTool configuration. + explicit: A list of fixers to run even if they are explicit. + nobackups: If true no backup '.bak' files will be created for those + files that are being refactored. + show_diffs: Should diffs of the refactoring be printed to stdout? + input_base_dir: The base directory for all input files. This class + will strip this path prefix off of filenames before substituting + it with output_dir. Only meaningful if output_dir is supplied. + All files processed by refactor() must start with this path. + output_dir: If supplied, all converted files will be written into + this directory tree instead of input_base_dir. + append_suffix: If supplied, all files output by this tool will have + this appended to their filename. Useful for changing .py to + .py3 for example by passing append_suffix='3'. + """ + self.nobackups = nobackups + self.show_diffs = show_diffs + if input_base_dir and not input_base_dir.endswith(os.sep): + input_base_dir += os.sep + self._input_base_dir = input_base_dir + self._output_dir = output_dir + self._append_suffix = append_suffix + super(StdoutRefactoringTool, self).__init__(fixers, options, explicit) + + def log_error(self, msg, *args, **kwargs): + self.errors.append((msg, args, kwargs)) + self.logger.error(msg, *args, **kwargs) + + def write_file(self, new_text, filename, old_text, encoding): + orig_filename = filename + if self._output_dir: + if filename.startswith(self._input_base_dir): + filename = os.path.join(self._output_dir, + filename[len(self._input_base_dir):]) + else: + raise ValueError('filename %s does not start with the ' + 'input_base_dir %s' % ( + filename, self._input_base_dir)) + if self._append_suffix: + filename += self._append_suffix + if orig_filename != filename: + output_dir = os.path.dirname(filename) + if not os.path.isdir(output_dir) and output_dir: + os.makedirs(output_dir) + self.log_message('Writing converted %s to %s.', orig_filename, + filename) + if not self.nobackups: + # Make backup + backup = filename + ".bak" + if os.path.lexists(backup): + try: + os.remove(backup) + except OSError: + self.log_message("Can't remove backup %s", backup) + try: + os.rename(filename, backup) + except OSError: + self.log_message("Can't rename %s to %s", filename, backup) + # Actually write the new file + write = super(StdoutRefactoringTool, self).write_file + write(new_text, filename, old_text, encoding) + if not self.nobackups: + shutil.copymode(backup, filename) + if orig_filename != filename: + # Preserve the file mode in the new output directory. + shutil.copymode(orig_filename, filename) + + def print_output(self, old, new, filename, equal): + if equal: + self.log_message("No changes to %s", filename) + else: + self.log_message("Refactored %s", filename) + if self.show_diffs: + diff_lines = diff_texts(old, new, filename) + try: + if self.output_lock is not None: + with self.output_lock: + for line in diff_lines: + print(line) + sys.stdout.flush() + else: + for line in diff_lines: + print(line) + except UnicodeEncodeError: + warn("couldn't encode %s's diff for your terminal" % + (filename,)) + return + +def warn(msg): + print("WARNING: %s" % (msg,), file=sys.stderr) + + +def main(fixer_pkg, args=None): + """Main program. + + Args: + fixer_pkg: the name of a package where the fixers are located. + args: optional; a list of command line arguments. If omitted, + sys.argv[1:] is used. + + Returns a suggested exit status (0, 1, 2). + """ + # Set up option parser + parser = optparse.OptionParser(usage="2to3 [options] file|dir ...") + parser.add_option("-d", "--doctests_only", action="store_true", + help="Fix up doctests only") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a transformation from being run") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + parser.add_option("-p", "--print-function", action="store_true", + help="Modify the grammar so that print() is a function") + parser.add_option("-e", "--exec-function", action="store_true", + help="Modify the grammar so that exec() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files") + parser.add_option("-o", "--output-dir", action="store", type="str", + default="", help="Put output files in this directory " + "instead of overwriting the input files. Requires -n.") + parser.add_option("-W", "--write-unchanged-files", action="store_true", + help="Also write files even if no changes were required" + " (useful with --output-dir); implies -w.") + parser.add_option("--add-suffix", action="store", type="str", default="", + help="Append this string to all output filenames." + " Requires -n if non-empty. " + "ex: --add-suffix='3' will generate .py3 files.") + + # Parse command line arguments + refactor_stdin = False + flags = {} + options, args = parser.parse_args(args) + if options.write_unchanged_files: + flags["write_unchanged_files"] = True + if not options.write: + warn("--write-unchanged-files/-W implies -w.") + options.write = True + # If we allowed these, the original files would be renamed to backup names + # but not replaced. + if options.output_dir and not options.nobackups: + parser.error("Can't use --output-dir/-o without -n.") + if options.add_suffix and not options.nobackups: + parser.error("Can't use --add-suffix without -n.") + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + for fixname in refactor.get_all_fix_names(fixer_pkg): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + if options.print_function: + flags["print_function"] = True + + if options.exec_function: + flags["exec_function"] = True + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + logger = logging.getLogger('lib2to3.main') + + # Initialize the refactoring tool + avail_fixes = set(refactor.get_fixers_from_package(fixer_pkg)) + unwanted_fixes = set(fixer_pkg + ".fix_" + fix for fix in options.nofix) + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == "all": + all_present = True + else: + explicit.add(fixer_pkg + ".fix_" + fix) + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + fixer_names = requested.difference(unwanted_fixes) + input_base_dir = os.path.commonprefix(args) + if (input_base_dir and not input_base_dir.endswith(os.sep) + and not os.path.isdir(input_base_dir)): + # One or more similar names were passed, their directory is the base. + # os.path.commonprefix() is ignorant of path elements, this corrects + # for that weird API. + input_base_dir = os.path.dirname(input_base_dir) + if options.output_dir: + input_base_dir = input_base_dir.rstrip(os.sep) + logger.info('Output in %r will mirror the input directory %r layout.', + options.output_dir, input_base_dir) + rt = StdoutRefactoringTool( + sorted(fixer_names), flags, sorted(explicit), + options.nobackups, not options.no_diffs, + input_base_dir=input_base_dir, + output_dir=options.output_dir, + append_suffix=options.add_suffix) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, options.doctests_only, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't supported on this platform.", + file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/contrib/tools/python3/Lib/lib2to3/patcomp.py b/contrib/tools/python3/Lib/lib2to3/patcomp.py new file mode 100644 index 0000000000..f57f4954b2 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/patcomp.py @@ -0,0 +1,204 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Pattern compiler. + +The grammar is taken from PatternGrammar.txt. + +The compiler compiles a pattern to a pytree.*Pattern instance. +""" + +__author__ = "Guido van Rossum <guido@python.org>" + +# Python imports +import io + +# Fairly local imports +from .pgen2 import driver, literals, token, tokenize, parse, grammar + +# Really local imports +from . import pytree +from . import pygram + + +class PatternSyntaxError(Exception): + pass + + +def tokenize_wrapper(input): + """Tokenizes a string suppressing significant whitespace.""" + skip = {token.NEWLINE, token.INDENT, token.DEDENT} + tokens = tokenize.generate_tokens(io.StringIO(input).readline) + for quintuple in tokens: + type, value, start, end, line_text = quintuple + if type not in skip: + yield quintuple + + +class PatternCompiler(object): + + def __init__(self, grammar_file=None): + """Initializer. + + Takes an optional alternative filename for the pattern grammar. + """ + if grammar_file is None: + self.grammar = pygram.pattern_grammar + self.syms = pygram.pattern_symbols + else: + self.grammar = driver.load_grammar(grammar_file) + self.syms = pygram.Symbols(self.grammar) + self.pygrammar = pygram.python_grammar + self.pysyms = pygram.python_symbols + self.driver = driver.Driver(self.grammar, convert=pattern_convert) + + def compile_pattern(self, input, debug=False, with_tree=False): + """Compiles a pattern string to a nested pytree.*Pattern object.""" + tokens = tokenize_wrapper(input) + try: + root = self.driver.parse_tokens(tokens, debug=debug) + except parse.ParseError as e: + raise PatternSyntaxError(str(e)) from None + if with_tree: + return self.compile_node(root), root + else: + return self.compile_node(root) + + def compile_node(self, node): + """Compiles a node, recursively. + + This is one big switch on the node type. + """ + # XXX Optimize certain Wildcard-containing-Wildcard patterns + # that can be merged + if node.type == self.syms.Matcher: + node = node.children[0] # Avoid unneeded recursion + + if node.type == self.syms.Alternatives: + # Skip the odd children since they are just '|' tokens + alts = [self.compile_node(ch) for ch in node.children[::2]] + if len(alts) == 1: + return alts[0] + p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) + return p.optimize() + + if node.type == self.syms.Alternative: + units = [self.compile_node(ch) for ch in node.children] + if len(units) == 1: + return units[0] + p = pytree.WildcardPattern([units], min=1, max=1) + return p.optimize() + + if node.type == self.syms.NegatedUnit: + pattern = self.compile_basic(node.children[1:]) + p = pytree.NegatedPattern(pattern) + return p.optimize() + + assert node.type == self.syms.Unit + + name = None + nodes = node.children + if len(nodes) >= 3 and nodes[1].type == token.EQUAL: + name = nodes[0].value + nodes = nodes[2:] + repeat = None + if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: + repeat = nodes[-1] + nodes = nodes[:-1] + + # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] + pattern = self.compile_basic(nodes, repeat) + + if repeat is not None: + assert repeat.type == self.syms.Repeater + children = repeat.children + child = children[0] + if child.type == token.STAR: + min = 0 + max = pytree.HUGE + elif child.type == token.PLUS: + min = 1 + max = pytree.HUGE + elif child.type == token.LBRACE: + assert children[-1].type == token.RBRACE + assert len(children) in (3, 5) + min = max = self.get_int(children[1]) + if len(children) == 5: + max = self.get_int(children[3]) + else: + assert False + if min != 1 or max != 1: + pattern = pattern.optimize() + pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) + + if name is not None: + pattern.name = name + return pattern.optimize() + + def compile_basic(self, nodes, repeat=None): + # Compile STRING | NAME [Details] | (...) | [...] + assert len(nodes) >= 1 + node = nodes[0] + if node.type == token.STRING: + value = str(literals.evalString(node.value)) + return pytree.LeafPattern(_type_of_literal(value), value) + elif node.type == token.NAME: + value = node.value + if value.isupper(): + if value not in TOKEN_MAP: + raise PatternSyntaxError("Invalid token: %r" % value) + if nodes[1:]: + raise PatternSyntaxError("Can't have details for token") + return pytree.LeafPattern(TOKEN_MAP[value]) + else: + if value == "any": + type = None + elif not value.startswith("_"): + type = getattr(self.pysyms, value, None) + if type is None: + raise PatternSyntaxError("Invalid symbol: %r" % value) + if nodes[1:]: # Details present + content = [self.compile_node(nodes[1].children[1])] + else: + content = None + return pytree.NodePattern(type, content) + elif node.value == "(": + return self.compile_node(nodes[1]) + elif node.value == "[": + assert repeat is None + subpattern = self.compile_node(nodes[1]) + return pytree.WildcardPattern([[subpattern]], min=0, max=1) + assert False, node + + def get_int(self, node): + assert node.type == token.NUMBER + return int(node.value) + + +# Map named tokens to the type value for a LeafPattern +TOKEN_MAP = {"NAME": token.NAME, + "STRING": token.STRING, + "NUMBER": token.NUMBER, + "TOKEN": None} + + +def _type_of_literal(value): + if value[0].isalpha(): + return token.NAME + elif value in grammar.opmap: + return grammar.opmap[value] + else: + return None + + +def pattern_convert(grammar, raw_node_info): + """Converts raw node information to a Node or Leaf instance.""" + type, value, context, children = raw_node_info + if children or type in grammar.number2symbol: + return pytree.Node(type, children, context=context) + else: + return pytree.Leaf(type, value, context=context) + + +def compile_pattern(pattern): + return PatternCompiler().compile_pattern(pattern) diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/__init__.py b/contrib/tools/python3/Lib/lib2to3/pgen2/__init__.py new file mode 100644 index 0000000000..af39048452 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""The pgen2 package.""" diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/conv.py b/contrib/tools/python3/Lib/lib2to3/pgen2/conv.py new file mode 100644 index 0000000000..ed0cac532e --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/conv.py @@ -0,0 +1,257 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Convert graminit.[ch] spit out by pgen to Python code. + +Pgen is the Python parser generator. It is useful to quickly create a +parser from a grammar file in Python's grammar notation. But I don't +want my parsers to be written in C (yet), so I'm translating the +parsing tables to Python data structures and writing a Python parse +engine. + +Note that the token numbers are constants determined by the standard +Python tokenizer. The standard token module defines these numbers and +their names (the names are not used much). The token numbers are +hardcoded into the Python tokenizer and into pgen. A Python +implementation of the Python tokenizer is also available, in the +standard tokenize module. + +On the other hand, symbol numbers (representing the grammar's +non-terminals) are assigned by pgen based on the actual grammar +input. + +Note: this module is pretty much obsolete; the pgen module generates +equivalent grammar tables directly from the Grammar.txt input file +without having to invoke the Python pgen C program. + +""" + +# Python imports +import re + +# Local imports +from pgen2 import grammar, token + + +class Converter(grammar.Grammar): + """Grammar subclass that reads classic pgen output files. + + The run() method reads the tables as produced by the pgen parser + generator, typically contained in two C files, graminit.h and + graminit.c. The other methods are for internal use only. + + See the base class for more documentation. + + """ + + def run(self, graminit_h, graminit_c): + """Load the grammar tables from the text files written by pgen.""" + self.parse_graminit_h(graminit_h) + self.parse_graminit_c(graminit_c) + self.finish_off() + + def parse_graminit_h(self, filename): + """Parse the .h file written by pgen. (Internal) + + This file is a sequence of #define statements defining the + nonterminals of the grammar as numbers. We build two tables + mapping the numbers to names and back. + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + self.symbol2number = {} + self.number2symbol = {} + lineno = 0 + for line in f: + lineno += 1 + mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line) + if not mo and line.strip(): + print("%s(%s): can't parse %s" % (filename, lineno, + line.strip())) + else: + symbol, number = mo.groups() + number = int(number) + assert symbol not in self.symbol2number + assert number not in self.number2symbol + self.symbol2number[symbol] = number + self.number2symbol[number] = symbol + return True + + def parse_graminit_c(self, filename): + """Parse the .c file written by pgen. (Internal) + + The file looks as follows. The first two lines are always this: + + #include "pgenheaders.h" + #include "grammar.h" + + After that come four blocks: + + 1) one or more state definitions + 2) a table defining dfas + 3) a table defining labels + 4) a struct defining the grammar + + A state definition has the following form: + - one or more arc arrays, each of the form: + static arc arcs_<n>_<m>[<k>] = { + {<i>, <j>}, + ... + }; + - followed by a state array, of the form: + static state states_<s>[<t>] = { + {<k>, arcs_<n>_<m>}, + ... + }; + + """ + try: + f = open(filename) + except OSError as err: + print("Can't open %s: %s" % (filename, err)) + return False + # The code below essentially uses f's iterator-ness! + lineno = 0 + + # Expect the two #include lines + lineno, line = lineno+1, next(f) + assert line == '#include "pgenheaders.h"\n', (lineno, line) + lineno, line = lineno+1, next(f) + assert line == '#include "grammar.h"\n', (lineno, line) + + # Parse the state definitions + lineno, line = lineno+1, next(f) + allarcs = {} + states = [] + while line.startswith("static arc "): + while line.startswith("static arc "): + mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", + line) + assert mo, (lineno, line) + n, m, k = list(map(int, mo.groups())) + arcs = [] + for _ in range(k): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), (\d+)},$", line) + assert mo, (lineno, line) + i, j = list(map(int, mo.groups())) + arcs.append((i, j)) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + allarcs[(n, m)] = arcs + lineno, line = lineno+1, next(f) + mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line) + assert mo, (lineno, line) + s, t = list(map(int, mo.groups())) + assert s == len(states), (lineno, line) + state = [] + for _ in range(t): + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line) + assert mo, (lineno, line) + k, n, m = list(map(int, mo.groups())) + arcs = allarcs[n, m] + assert k == len(arcs), (lineno, line) + state.append(arcs) + states.append(state) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + lineno, line = lineno+1, next(f) + self.states = states + + # Parse the dfas + dfas = {} + mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + for i in range(ndfas): + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', + line) + assert mo, (lineno, line) + symbol = mo.group(2) + number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) + assert self.symbol2number[symbol] == number, (lineno, line) + assert self.number2symbol[number] == symbol, (lineno, line) + assert x == 0, (lineno, line) + state = states[z] + assert y == len(state), (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) + assert mo, (lineno, line) + first = {} + rawbitset = eval(mo.group(1)) + for i, c in enumerate(rawbitset): + byte = ord(c) + for j in range(8): + if byte & (1<<j): + first[i*8 + j] = 1 + dfas[number] = (state, first) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + self.dfas = dfas + + # Parse the labels + labels = [] + lineno, line = lineno+1, next(f) + mo = re.match(r"static label labels\[(\d+)\] = {$", line) + assert mo, (lineno, line) + nlabels = int(mo.group(1)) + for i in range(nlabels): + lineno, line = lineno+1, next(f) + mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line) + assert mo, (lineno, line) + x, y = mo.groups() + x = int(x) + if y == "0": + y = None + else: + y = eval(y) + labels.append((x, y)) + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + self.labels = labels + + # Parse the grammar struct + lineno, line = lineno+1, next(f) + assert line == "grammar _PyParser_Grammar = {\n", (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+(\d+),$", line) + assert mo, (lineno, line) + ndfas = int(mo.group(1)) + assert ndfas == len(self.dfas) + lineno, line = lineno+1, next(f) + assert line == "\tdfas,\n", (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+{(\d+), labels},$", line) + assert mo, (lineno, line) + nlabels = int(mo.group(1)) + assert nlabels == len(self.labels), (lineno, line) + lineno, line = lineno+1, next(f) + mo = re.match(r"\s+(\d+)$", line) + assert mo, (lineno, line) + start = int(mo.group(1)) + assert start in self.number2symbol, (lineno, line) + self.start = start + lineno, line = lineno+1, next(f) + assert line == "};\n", (lineno, line) + try: + lineno, line = lineno+1, next(f) + except StopIteration: + pass + else: + assert 0, (lineno, line) + + def finish_off(self): + """Create additional useful structures. (Internal).""" + self.keywords = {} # map from keyword strings to arc labels + self.tokens = {} # map from numeric token values to arc labels + for ilabel, (type, value) in enumerate(self.labels): + if type == token.NAME and value is not None: + self.keywords[value] = ilabel + elif value is None: + self.tokens[type] = ilabel diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/driver.py b/contrib/tools/python3/Lib/lib2to3/pgen2/driver.py new file mode 100644 index 0000000000..6471635a31 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/driver.py @@ -0,0 +1,177 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser driver. + +This provides a high-level interface to parse a file into a syntax tree. + +""" + +__author__ = "Guido van Rossum <guido@python.org>" + +__all__ = ["Driver", "load_grammar"] + +# Python imports +import io +import os +import logging +import pkgutil +import sys + +# Pgen imports +from . import grammar, parse, token, tokenize, pgen + + +class Driver(object): + + def __init__(self, grammar, convert=None, logger=None): + self.grammar = grammar + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.convert = convert + + def parse_tokens(self, tokens, debug=False): + """Parse a series of tokens and return the syntax tree.""" + # XXX Move the prefix computation into a wrapper around tokenize. + p = parse.Parser(self.grammar, self.convert) + p.setup() + lineno = 1 + column = 0 + type = value = start = end = line_text = None + prefix = "" + for quintuple in tokens: + type, value, start, end, line_text = quintuple + if start != (lineno, column): + assert (lineno, column) <= start, ((lineno, column), start) + s_lineno, s_column = start + if lineno < s_lineno: + prefix += "\n" * (s_lineno - lineno) + lineno = s_lineno + column = 0 + if column < s_column: + prefix += line_text[column:s_column] + column = s_column + if type in (tokenize.COMMENT, tokenize.NL): + prefix += value + lineno, column = end + if value.endswith("\n"): + lineno += 1 + column = 0 + continue + if type == token.OP: + type = grammar.opmap[value] + if debug: + self.logger.debug("%s %r (prefix=%r)", + token.tok_name[type], value, prefix) + if p.addtoken(type, value, (prefix, start)): + if debug: + self.logger.debug("Stop.") + break + prefix = "" + lineno, column = end + if value.endswith("\n"): + lineno += 1 + column = 0 + else: + # We never broke out -- EOF is too soon (how can this happen???) + raise parse.ParseError("incomplete input", + type, value, (prefix, start)) + return p.rootnode + + def parse_stream_raw(self, stream, debug=False): + """Parse a stream and return the syntax tree.""" + tokens = tokenize.generate_tokens(stream.readline) + return self.parse_tokens(tokens, debug) + + def parse_stream(self, stream, debug=False): + """Parse a stream and return the syntax tree.""" + return self.parse_stream_raw(stream, debug) + + def parse_file(self, filename, encoding=None, debug=False): + """Parse a file and return the syntax tree.""" + with io.open(filename, "r", encoding=encoding) as stream: + return self.parse_stream(stream, debug) + + def parse_string(self, text, debug=False): + """Parse a string and return the syntax tree.""" + tokens = tokenize.generate_tokens(io.StringIO(text).readline) + return self.parse_tokens(tokens, debug) + + +def _generate_pickle_name(gt): + head, tail = os.path.splitext(gt) + if tail == ".txt": + tail = "" + return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" + + +def load_grammar(gt="Grammar.txt", gp=None, + save=True, force=False, logger=None): + """Load the grammar (maybe from a pickle).""" + if logger is None: + logger = logging.getLogger() + gp = _generate_pickle_name(gt) if gp is None else gp + if force or not _newer(gp, gt): + logger.info("Generating grammar tables from %s", gt) + g = pgen.generate_grammar(gt) + if save: + logger.info("Writing grammar tables to %s", gp) + try: + g.dump(gp) + except OSError as e: + logger.info("Writing failed: %s", e) + else: + g = grammar.Grammar() + g.load(gp) + return g + + +def _newer(a, b): + """Inquire whether file a was written since file b.""" + if not os.path.exists(a): + return False + if not os.path.exists(b): + return True + return os.path.getmtime(a) >= os.path.getmtime(b) + + +def load_packaged_grammar(package, grammar_source): + """Normally, loads a pickled grammar by doing + pkgutil.get_data(package, pickled_grammar) + where *pickled_grammar* is computed from *grammar_source* by adding the + Python version and using a ``.pickle`` extension. + + However, if *grammar_source* is an extant file, load_grammar(grammar_source) + is called instead. This facilitates using a packaged grammar file when needed + but preserves load_grammar's automatic regeneration behavior when possible. + + """ + if os.path.isfile(grammar_source): + return load_grammar(grammar_source) + pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) + data = pkgutil.get_data(package, pickled_name) + g = grammar.Grammar() + g.loads(data) + return g + + +def main(*args): + """Main program, when run as a script: produce grammar pickle files. + + Calls load_grammar for each argument, a path to a grammar text file. + """ + if not args: + args = sys.argv[1:] + logging.basicConfig(level=logging.INFO, stream=sys.stdout, + format='%(message)s') + for gt in args: + load_grammar(gt, save=True, force=True) + return True + +if __name__ == "__main__": + sys.exit(int(not main())) diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/grammar.py b/contrib/tools/python3/Lib/lib2to3/pgen2/grammar.py new file mode 100644 index 0000000000..5d550aeb65 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/grammar.py @@ -0,0 +1,189 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""This module defines the data structures used to represent a grammar. + +These are a bit arcane because they are derived from the data +structures used by Python's 'pgen' parser generator. + +There's also a table here mapping operators to their names in the +token module; the Python tokenize module reports all operators as the +fallback token code OP, but the parser needs the actual token code. + +""" + +# Python imports +import pickle + +# Local imports +from . import token + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + + The load() method reads the tables from a pickle file, which is + much faster than the other ways offered by subclasses. The pickle + file is written by calling dump() (after loading the grammar + tables using a subclass). The report() method prints a readable + representation of the tables to stdout, for debugging. + + The instance variables are as follows: + + symbol2number -- a dict mapping symbol names to numbers. Symbol + numbers are always 256 or higher, to distinguish + them from token numbers, which are between 0 and + 255 (inclusive). + + number2symbol -- a dict mapping numbers to symbol names; + these two are each other's inverse. + + states -- a list of DFAs, where each DFA is a list of + states, each state is a list of arcs, and each + arc is a (i, j) pair where i is a label and j is + a state number. The DFA number is the index into + this list. (This name is slightly confusing.) + Final states are represented by a special arc of + the form (0, j) where j is its own state number. + + dfas -- a dict mapping symbol numbers to (DFA, first) + pairs, where DFA is an item from the states list + above, and first is a set of tokens that can + begin this grammar rule (represented by a dict + whose values are always 1). + + labels -- a list of (x, y) pairs where x is either a token + number or a symbol number, and y is either None + or a string; the strings are keywords. The label + number is the index in this list; label numbers + are used to mark state transitions (arcs) in the + DFAs. + + start -- the number of the grammar's start symbol. + + keywords -- a dict mapping keyword strings to arc labels. + + tokens -- a dict mapping token numbers to arc labels. + + """ + + def __init__(self): + self.symbol2number = {} + self.number2symbol = {} + self.states = [] + self.dfas = {} + self.labels = [(0, "EMPTY")] + self.keywords = {} + self.tokens = {} + self.symbol2label = {} + self.start = 256 + + def dump(self, filename): + """Dump the grammar tables to a pickle file.""" + with open(filename, "wb") as f: + pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) + + def load(self, filename): + """Load the grammar tables from a pickle file.""" + with open(filename, "rb") as f: + d = pickle.load(f) + self.__dict__.update(d) + + def loads(self, pkl): + """Load the grammar tables from a pickle bytes object.""" + self.__dict__.update(pickle.loads(pkl)) + + def copy(self): + """ + Copy the grammar. + """ + new = self.__class__() + for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", + "tokens", "symbol2label"): + setattr(new, dict_attr, getattr(self, dict_attr).copy()) + new.labels = self.labels[:] + new.states = self.states[:] + new.start = self.start + return new + + def report(self): + """Dump the grammar tables to standard output, for debugging.""" + from pprint import pprint + print("s2n") + pprint(self.symbol2number) + print("n2s") + pprint(self.number2symbol) + print("states") + pprint(self.states) + print("dfas") + pprint(self.dfas) + print("labels") + pprint(self.labels) + print("start", self.start) + + +# Map from operator to number (since tokenize doesn't do this) + +opmap_raw = """ +( LPAR +) RPAR +[ LSQB +] RSQB +: COLON +, COMMA +; SEMI ++ PLUS +- MINUS +* STAR +/ SLASH +| VBAR +& AMPER +< LESS +> GREATER += EQUAL +. DOT +% PERCENT +` BACKQUOTE +{ LBRACE +} RBRACE +@ AT +@= ATEQUAL +== EQEQUAL +!= NOTEQUAL +<> NOTEQUAL +<= LESSEQUAL +>= GREATEREQUAL +~ TILDE +^ CIRCUMFLEX +<< LEFTSHIFT +>> RIGHTSHIFT +** DOUBLESTAR ++= PLUSEQUAL +-= MINEQUAL +*= STAREQUAL +/= SLASHEQUAL +%= PERCENTEQUAL +&= AMPEREQUAL +|= VBAREQUAL +^= CIRCUMFLEXEQUAL +<<= LEFTSHIFTEQUAL +>>= RIGHTSHIFTEQUAL +**= DOUBLESTAREQUAL +// DOUBLESLASH +//= DOUBLESLASHEQUAL +-> RARROW +:= COLONEQUAL +""" + +opmap = {} +for line in opmap_raw.splitlines(): + if line: + op, name = line.split() + opmap[op] = getattr(token, name) +del line, op, name diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/literals.py b/contrib/tools/python3/Lib/lib2to3/pgen2/literals.py new file mode 100644 index 0000000000..b9b63e6e55 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/literals.py @@ -0,0 +1,60 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Safely evaluate Python string literals without using eval().""" + +import re + +simple_escapes = {"a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", + "'": "'", + '"': '"', + "\\": "\\"} + +def escape(m): + all, tail = m.group(0, 1) + assert all.startswith("\\") + esc = simple_escapes.get(tail) + if esc is not None: + return esc + if tail.startswith("x"): + hexes = tail[1:] + if len(hexes) < 2: + raise ValueError("invalid hex string escape ('\\%s')" % tail) + try: + i = int(hexes, 16) + except ValueError: + raise ValueError("invalid hex string escape ('\\%s')" % tail) from None + else: + try: + i = int(tail, 8) + except ValueError: + raise ValueError("invalid octal string escape ('\\%s')" % tail) from None + return chr(i) + +def evalString(s): + assert s.startswith("'") or s.startswith('"'), repr(s[:1]) + q = s[0] + if s[:3] == q*3: + q = q*3 + assert s.endswith(q), repr(s[-len(q):]) + assert len(s) >= 2*len(q) + s = s[len(q):-len(q)] + return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) + +def test(): + for i in range(256): + c = chr(i) + s = repr(c) + e = evalString(s) + if e != c: + print(i, c, s, e) + + +if __name__ == "__main__": + test() diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/parse.py b/contrib/tools/python3/Lib/lib2to3/pgen2/parse.py new file mode 100644 index 0000000000..cf3fcf7e99 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/parse.py @@ -0,0 +1,204 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Parser engine for the grammar tables generated by pgen. + +The grammar table must be loaded first. + +See Parser/parser.c in the Python distribution for additional info on +how this parsing engine works. + +""" + +# Local imports +from . import token + +class ParseError(Exception): + """Exception to signal the parser is stuck.""" + + def __init__(self, msg, type, value, context): + Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % + (msg, type, value, context)) + self.msg = msg + self.type = type + self.value = value + self.context = context + + def __reduce__(self): + return type(self), (self.msg, self.type, self.value, self.context) + +class Parser(object): + """Parser engine. + + The proper usage sequence is: + + p = Parser(grammar, [converter]) # create instance + p.setup([start]) # prepare for parsing + <for each input token>: + if p.addtoken(...): # parse a token; may raise ParseError + break + root = p.rootnode # root of abstract syntax tree + + A Parser instance may be reused by calling setup() repeatedly. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See driver.py for how to get input tokens by tokenizing a file or + string. + + Parsing is complete when addtoken() returns True; the root of the + abstract syntax tree can then be retrieved from the rootnode + instance variable. When a syntax error occurs, addtoken() raises + the ParseError exception. There is no error recovery; the parser + cannot be used after a syntax error was reported (but it can be + reinitialized by calling setup()). + + """ + + def __init__(self, grammar, convert=None): + """Constructor. + + The grammar argument is a grammar.Grammar instance; see the + grammar module for more information. + + The parser is not ready yet for parsing; you must call the + setup() method to get it started. + + The optional convert argument is a function mapping concrete + syntax tree nodes to abstract syntax tree nodes. If not + given, no conversion is done and the syntax tree produced is + the concrete syntax tree. If given, it must be a function of + two arguments, the first being the grammar (a grammar.Grammar + instance), and the second being the concrete syntax tree node + to be converted. The syntax tree is converted from the bottom + up. + + A concrete syntax tree node is a (type, value, context, nodes) + tuple, where type is the node type (a token or symbol number), + value is None for symbols and a string for tokens, context is + None or an opaque value used for error reporting (typically a + (lineno, offset) pair), and nodes is a list of children for + symbols, and None for tokens. + + An abstract syntax tree node may be anything; this is entirely + up to the converter function. + + """ + self.grammar = grammar + self.convert = convert or (lambda grammar, node: node) + + def setup(self, start=None): + """Prepare for parsing. + + This *must* be called before starting to parse. + + The optional argument is an alternative start symbol; it + defaults to the grammar's start symbol. + + You can use a Parser instance to parse any number of programs; + each time you call setup() the parser is reset to an initial + state determined by the (implicit or explicit) start symbol. + + """ + if start is None: + start = self.grammar.start + # Each stack entry is a tuple: (dfa, state, node). + # A node is a tuple: (type, value, context, children), + # where children is a list of nodes or None, and context may be None. + newnode = (start, None, None, []) + stackentry = (self.grammar.dfas[start], 0, newnode) + self.stack = [stackentry] + self.rootnode = None + self.used_names = set() # Aliased to self.rootnode.used_names in pop() + + def addtoken(self, type, value, context): + """Add a token; return True iff this is the end of the program.""" + # Map from token to label + ilabel = self.classify(type, value, context) + # Loop until the token is shifted; may raise exceptions + while True: + dfa, state, node = self.stack[-1] + states, first = dfa + arcs = states[state] + # Look for a state with this label + for i, newstate in arcs: + t, v = self.grammar.labels[i] + if ilabel == i: + # Look it up in the list of labels + assert t < 256 + # Shift a token; we're done with it + self.shift(type, value, newstate, context) + # Pop while we are in an accept-only state + state = newstate + while states[state] == [(0, state)]: + self.pop() + if not self.stack: + # Done parsing! + return True + dfa, state, node = self.stack[-1] + states, first = dfa + # Done with this token + return False + elif t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, self.grammar.dfas[t], newstate, context) + break # To continue the outer while loop + else: + if (0, state) in arcs: + # An accepting state, pop it and try something else + self.pop() + if not self.stack: + # Done parsing, but another token is input + raise ParseError("too much input", + type, value, context) + else: + # No success finding a transition + raise ParseError("bad input", type, value, context) + + def classify(self, type, value, context): + """Turn a token into a label. (Internal)""" + if type == token.NAME: + # Keep a listing of all used names + self.used_names.add(value) + # Check for reserved words + ilabel = self.grammar.keywords.get(value) + if ilabel is not None: + return ilabel + ilabel = self.grammar.tokens.get(type) + if ilabel is None: + raise ParseError("bad token", type, value, context) + return ilabel + + def shift(self, type, value, newstate, context): + """Shift a token. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, value, context, None) + newnode = self.convert(self.grammar, newnode) + if newnode is not None: + node[-1].append(newnode) + self.stack[-1] = (dfa, newstate, node) + + def push(self, type, newdfa, newstate, context): + """Push a nonterminal. (Internal)""" + dfa, state, node = self.stack[-1] + newnode = (type, None, context, []) + self.stack[-1] = (dfa, newstate, node) + self.stack.append((newdfa, 0, newnode)) + + def pop(self): + """Pop a nonterminal. (Internal)""" + popdfa, popstate, popnode = self.stack.pop() + newnode = self.convert(self.grammar, popnode) + if newnode is not None: + if self.stack: + dfa, state, node = self.stack[-1] + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/pgen.py b/contrib/tools/python3/Lib/lib2to3/pgen2/pgen.py new file mode 100644 index 0000000000..7abd5cef1c --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/pgen.py @@ -0,0 +1,386 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Pgen imports +from . import grammar, token, tokenize + +class PgenGrammar(grammar.Grammar): + pass + +class ParserGenerator(object): + + def __init__(self, filename, stream=None): + close_stream = None + if stream is None: + stream = open(filename, encoding="utf-8") + close_stream = stream.close + self.filename = filename + self.stream = stream + self.generator = tokenize.generate_tokens(stream.readline) + self.gettoken() # Initialize lookahead + self.dfas, self.startsymbol = self.parse() + if close_stream is not None: + close_stream() + self.first = {} # map from symbol name to set of tokens + self.addfirstsets() + + def make_grammar(self): + c = PgenGrammar() + names = list(self.dfas.keys()) + names.sort() + names.remove(self.startsymbol) + names.insert(0, self.startsymbol) + for name in names: + i = 256 + len(c.symbol2number) + c.symbol2number[name] = i + c.number2symbol[i] = name + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in sorted(state.arcs.items()): + arcs.append((self.make_label(c, label), dfa.index(next))) + if state.isfinal: + arcs.append((0, dfa.index(state))) + states.append(arcs) + c.states.append(states) + c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) + c.start = c.symbol2number[self.startsymbol] + return c + + def make_first(self, c, name): + rawfirst = self.first[name] + first = {} + for label in sorted(rawfirst): + ilabel = self.make_label(c, label) + ##assert ilabel not in first # XXX failed on <> ... != + first[ilabel] = 1 + return first + + def make_label(self, c, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(c.labels) + if label[0].isalpha(): + # Either a symbol name or a named token + if label in c.symbol2number: + # A symbol name (a non-terminal) + if label in c.symbol2label: + return c.symbol2label[label] + else: + c.labels.append((c.symbol2number[label], None)) + c.symbol2label[label] = ilabel + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(token, label, None) + assert isinstance(itoken, int), label + assert itoken in token.tok_name, label + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in c.keywords: + return c.keywords[value] + else: + c.labels.append((token.NAME, value)) + c.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = grammar.opmap[value] # Fails if unknown token + if itoken in c.tokens: + return c.tokens[itoken] + else: + c.labels.append((itoken, None)) + c.tokens[itoken] = ilabel + return ilabel + + def addfirstsets(self): + names = list(self.dfas.keys()) + names.sort() + for name in names: + if name not in self.first: + self.calcfirst(name) + #print name, self.first[name].keys() + + def calcfirst(self, name): + dfa = self.dfas[name] + self.first[name] = None # dummy to detect left recursion + state = dfa[0] + totalset = {} + overlapcheck = {} + for label, next in state.arcs.items(): + if label in self.dfas: + if label in self.first: + fset = self.first[label] + if fset is None: + raise ValueError("recursion for rule %r" % name) + else: + self.calcfirst(label) + fset = self.first[label] + totalset.update(fset) + overlapcheck[label] = fset + else: + totalset[label] = 1 + overlapcheck[label] = {label: 1} + inverse = {} + for label, itsfirst in overlapcheck.items(): + for symbol in itsfirst: + if symbol in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (name, symbol, label, inverse[symbol])) + inverse[symbol] = label + self.first[name] = totalset + + def parse(self): + dfas = {} + startsymbol = None + # MSTART: (NEWLINE | RULE)* ENDMARKER + while self.type != token.ENDMARKER: + while self.type == token.NEWLINE: + self.gettoken() + # RULE: NAME ':' RHS NEWLINE + name = self.expect(token.NAME) + self.expect(token.OP, ":") + a, z = self.parse_rhs() + self.expect(token.NEWLINE) + #self.dump_nfa(name, a, z) + dfa = self.make_dfa(a, z) + #self.dump_dfa(name, dfa) + oldlen = len(dfa) + self.simplify_dfa(dfa) + newlen = len(dfa) + dfas[name] = dfa + #print name, oldlen, newlen + if startsymbol is None: + startsymbol = name + return dfas, startsymbol + + def make_dfa(self, start, finish): + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. Let's represent sets as dicts with 1 for + # values. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + def closure(state): + base = {} + addclosure(state, base) + return base + def addclosure(state, base): + assert isinstance(state, NFAState) + if state in base: + return + base[state] = 1 + for label, next in state.arcs: + if label is None: + addclosure(next, base) + states = [DFAState(closure(start), finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + for nfastate in state.nfaset: + for label, next in nfastate.arcs: + if label is not None: + addclosure(next, arcs.setdefault(label, {})) + for label, nfaset in sorted(arcs.items()): + for st in states: + if st.nfaset == nfaset: + break + else: + st = DFAState(nfaset, finish) + states.append(st) + state.addarc(st, label) + return states # List of DFAState instances; first one is start + + def dump_nfa(self, name, start, finish): + print("Dump of NFA for", name) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for label, next in state.arcs: + if next in todo: + j = todo.index(next) + else: + j = len(todo) + todo.append(next) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + def dump_dfa(self, name, dfa): + print("Dump of DFA for", name) + for i, state in enumerate(dfa): + print(" State", i, state.isfinal and "(final)" or "") + for label, next in sorted(state.arcs.items()): + print(" %s -> %d" % (label, dfa.index(next))) + + def simplify_dfa(self, dfa): + # This is not theoretically optimal, but works well enough. + # Algorithm: repeatedly look for two states that have the same + # set of arcs (same labels pointing to the same nodes) and + # unify them, until things stop changing. + + # dfa is a list of DFAState instances + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfa): + for j in range(i+1, len(dfa)): + state_j = dfa[j] + if state_i == state_j: + #print " unify", i, j + del dfa[j] + for state in dfa: + state.unifystate(state_j, state_i) + changes = True + break + + def parse_rhs(self): + # RHS: ALT ('|' ALT)* + a, z = self.parse_alt() + if self.value != "|": + return a, z + else: + aa = NFAState() + zz = NFAState() + aa.addarc(a) + z.addarc(zz) + while self.value == "|": + self.gettoken() + a, z = self.parse_alt() + aa.addarc(a) + z.addarc(zz) + return aa, zz + + def parse_alt(self): + # ALT: ITEM+ + a, b = self.parse_item() + while (self.value in ("(", "[") or + self.type in (token.NAME, token.STRING)): + c, d = self.parse_item() + b.addarc(c) + b = d + return a, b + + def parse_item(self): + # ITEM: '[' RHS ']' | ATOM ['+' | '*'] + if self.value == "[": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, "]") + a.addarc(z) + return a, z + else: + a, z = self.parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self.gettoken() + z.addarc(a) + if value == "+": + return a, z + else: + return a, a + + def parse_atom(self): + # ATOM: '(' RHS ')' | NAME | STRING + if self.value == "(": + self.gettoken() + a, z = self.parse_rhs() + self.expect(token.OP, ")") + return a, z + elif self.type in (token.NAME, token.STRING): + a = NFAState() + z = NFAState() + a.addarc(z, self.value) + self.gettoken() + return a, z + else: + self.raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def expect(self, type, value=None): + if self.type != type or (value is not None and self.value != value): + self.raise_error("expected %s/%s, got %s/%s", + type, value, self.type, self.value) + value = self.value + self.gettoken() + return value + + def gettoken(self): + tup = next(self.generator) + while tup[0] in (tokenize.COMMENT, tokenize.NL): + tup = next(self.generator) + self.type, self.value, self.begin, self.end, self.line = tup + #print token.tok_name[self.type], repr(self.value) + + def raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + raise SyntaxError(msg, (self.filename, self.end[0], + self.end[1], self.line)) + +class NFAState(object): + + def __init__(self): + self.arcs = [] # list of (label, NFAState) pairs + + def addarc(self, next, label=None): + assert label is None or isinstance(label, str) + assert isinstance(next, NFAState) + self.arcs.append((label, next)) + +class DFAState(object): + + def __init__(self, nfaset, final): + assert isinstance(nfaset, dict) + assert isinstance(next(iter(nfaset)), NFAState) + assert isinstance(final, NFAState) + self.nfaset = nfaset + self.isfinal = final in nfaset + self.arcs = {} # map from label to DFAState + + def addarc(self, next, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next, DFAState) + self.arcs[label] = next + + def unifystate(self, old, new): + for label, next in self.arcs.items(): + if next is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfaset instance variable + assert isinstance(other, DFAState) + if self.isfinal != other.isfinal: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next in self.arcs.items(): + if next is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + +def generate_grammar(filename="Grammar.txt"): + p = ParserGenerator(filename) + return p.make_grammar() diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/token.py b/contrib/tools/python3/Lib/lib2to3/pgen2/token.py new file mode 100755 index 0000000000..2a55138e48 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/token.py @@ -0,0 +1,86 @@ +#! /usr/bin/env python3 + +"""Token constants (from "token.h").""" + +# Taken from Python (r53757) and modified to include some tokens +# originally monkeypatched in by pgen2.tokenize + +#--start constants-- +ENDMARKER = 0 +NAME = 1 +NUMBER = 2 +STRING = 3 +NEWLINE = 4 +INDENT = 5 +DEDENT = 6 +LPAR = 7 +RPAR = 8 +LSQB = 9 +RSQB = 10 +COLON = 11 +COMMA = 12 +SEMI = 13 +PLUS = 14 +MINUS = 15 +STAR = 16 +SLASH = 17 +VBAR = 18 +AMPER = 19 +LESS = 20 +GREATER = 21 +EQUAL = 22 +DOT = 23 +PERCENT = 24 +BACKQUOTE = 25 +LBRACE = 26 +RBRACE = 27 +EQEQUAL = 28 +NOTEQUAL = 29 +LESSEQUAL = 30 +GREATEREQUAL = 31 +TILDE = 32 +CIRCUMFLEX = 33 +LEFTSHIFT = 34 +RIGHTSHIFT = 35 +DOUBLESTAR = 36 +PLUSEQUAL = 37 +MINEQUAL = 38 +STAREQUAL = 39 +SLASHEQUAL = 40 +PERCENTEQUAL = 41 +AMPEREQUAL = 42 +VBAREQUAL = 43 +CIRCUMFLEXEQUAL = 44 +LEFTSHIFTEQUAL = 45 +RIGHTSHIFTEQUAL = 46 +DOUBLESTAREQUAL = 47 +DOUBLESLASH = 48 +DOUBLESLASHEQUAL = 49 +AT = 50 +ATEQUAL = 51 +OP = 52 +COMMENT = 53 +NL = 54 +RARROW = 55 +AWAIT = 56 +ASYNC = 57 +ERRORTOKEN = 58 +COLONEQUAL = 59 +N_TOKENS = 60 +NT_OFFSET = 256 +#--end constants-- + +tok_name = {} +for _name, _value in list(globals().items()): + if isinstance(_value, int): + tok_name[_value] = _name + + +def ISTERMINAL(x): + return x < NT_OFFSET + +def ISNONTERMINAL(x): + return x >= NT_OFFSET + +def ISEOF(x): + return x == ENDMARKER diff --git a/contrib/tools/python3/Lib/lib2to3/pgen2/tokenize.py b/contrib/tools/python3/Lib/lib2to3/pgen2/tokenize.py new file mode 100644 index 0000000000..099dfa7798 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pgen2/tokenize.py @@ -0,0 +1,564 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. +# All rights reserved. + +"""Tokenization help for Python programs. + +generate_tokens(readline) is a generator that breaks a stream of +text into Python tokens. It accepts a readline-like method which is called +repeatedly to get the next line of input (or "" for EOF). It generates +5-tuples with these members: + + the token type (see token.py) + the token (a string) + the starting (row, column) indices of the token (a 2-tuple of ints) + the ending (row, column) indices of the token (a 2-tuple of ints) + the original line (string) + +It is designed to match the working of the Python tokenizer exactly, except +that it produces COMMENT tokens for comments and gives type OP for all +operators + +Older entry points + tokenize_loop(readline, tokeneater) + tokenize(readline, tokeneater=printtoken) +are the same, except instead of generating tokens, tokeneater is a callback +function to which the 5 fields described above are passed as 5 arguments, +each time a new token is found.""" + +__author__ = 'Ka-Ping Yee <ping@lfw.org>' +__credits__ = \ + 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' + +import string, re +from codecs import BOM_UTF8, lookup +from lib2to3.pgen2.token import * + +from . import token +__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize", + "generate_tokens", "untokenize"] +del token + +try: + bytes +except NameError: + # Support bytes type in Python <= 2.5, so 2to3 turns itself into + # valid Python 3 code. + bytes = str + +def group(*choices): return '(' + '|'.join(choices) + ')' +def any(*choices): return group(*choices) + '*' +def maybe(*choices): return group(*choices) + '?' +def _combinations(*l): + return set( + x + y for x in l for y in l + ("",) if x.casefold() != y.casefold() + ) + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'\w+' + +Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' +Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' +Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' +Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') +Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+(?:_\d+)*' +Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent) +Expfloat = r'\d+(?:_\d+)*' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?" +Triple = group(_litprefix + "'''", _litprefix + '"""') +# Single-line ' or " string. +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", + r"//=?", r"->", + r"[+\-*/%&@|^=<>]=?", + r"~") + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r':=', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = map( + re.compile, (Token, PseudoToken, Single3, Double3)) + +_strprefixes = ( + _combinations('r', 'R', 'f', 'F') | + _combinations('r', 'R', 'b', 'B') | + {'u', 'U', 'ur', 'uR', 'Ur', 'UR'} +) + +endprogs = {"'": re.compile(Single), '"': re.compile(Double), + "'''": single3prog, '"""': double3prog, + **{f"{prefix}'''": single3prog for prefix in _strprefixes}, + **{f'{prefix}"""': double3prog for prefix in _strprefixes}, + **{prefix: None for prefix in _strprefixes}} + +triple_quoted = ( + {"'''", '"""'} | + {f"{prefix}'''" for prefix in _strprefixes} | + {f'{prefix}"""' for prefix in _strprefixes} +) +single_quoted = ( + {"'", '"'} | + {f"{prefix}'" for prefix in _strprefixes} | + {f'{prefix}"' for prefix in _strprefixes} +) + +tabsize = 8 + +class TokenError(Exception): pass + +class StopTokenizing(Exception): pass + +def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing + (srow, scol) = xxx_todo_changeme + (erow, ecol) = xxx_todo_changeme1 + print("%d,%d-%d,%d:\t%s\t%s" % \ + (srow, scol, erow, ecol, tok_name[type], repr(token))) + +def tokenize(readline, tokeneater=printtoken): + """ + The tokenize() function accepts two parameters: one representing the + input stream, and one providing an output mechanism for tokenize(). + + The first parameter, readline, must be a callable object which provides + the same interface as the readline() method of built-in file objects. + Each call to the function should return one line of input as a string. + + The second parameter, tokeneater, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the + tuples generated by generate_tokens(). + """ + try: + tokenize_loop(readline, tokeneater) + except StopTokenizing: + pass + +# backwards compatible interface +def tokenize_loop(readline, tokeneater): + for token_info in generate_tokens(readline): + tokeneater(*token_info) + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + + def add_whitespace(self, start): + row, col = start + assert row <= self.prev_row + col_offset = col - self.prev_col + if col_offset: + self.tokens.append(" " * col_offset) + + def untokenize(self, iterable): + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + tok_type, token, start, end, line = t + self.add_whitespace(start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + return "".join(self.tokens) + + def compat(self, token, iterable): + startline = False + indents = [] + toks_append = self.tokens.append + toknum, tokval = token + if toknum in (NAME, NUMBER): + tokval += ' ' + if toknum in (NEWLINE, NL): + startline = True + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum in (NAME, NUMBER, ASYNC, AWAIT): + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read + in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, but + disagree, a SyntaxError will be raised. If the encoding cookie is an invalid + charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return bytes() + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + if not blank_re.match(first): + return default, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. If + only two tokens are passed, the resulting output is poor. + + Round-trip invariant for full input: + Untokenized source will match input source exactly + + Round-trip invariant for limited input: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + ut = Untokenizer() + return ut.untokenize(iterable) + +def generate_tokens(readline): + """ + The generate_tokens() generator requires one argument, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + physical line. + """ + lnum = parenlev = continued = 0 + contstr, needcont = '', 0 + contline = None + indents = [0] + + # 'stashed' and 'async_*' are used for async/await parsing + stashed = None + async_def = False + async_def_indent = 0 + async_def_nl = False + + while 1: # loop over lines in stream + try: + line = readline() + except StopIteration: + line = '' + lnum = lnum + 1 + pos, max = 0, len(line) + + if contstr: # continued string + if not line: + raise TokenError("EOF in multi-line string", strstart) + endmatch = endprog.match(line) + if endmatch: + pos = end = endmatch.end(0) + yield (STRING, contstr + line[:end], + strstart, (lnum, end), contline + line) + contstr, needcont = '', 0 + contline = None + elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': + yield (ERRORTOKEN, contstr + line, + strstart, (lnum, len(line)), contline) + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': column = column + 1 + elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize + elif line[pos] == '\f': column = 0 + else: break + pos = pos + 1 + if pos == max: break + + if stashed: + yield stashed + stashed = None + + if line[pos] in '#\r\n': # skip comments or blank lines + if line[pos] == '#': + comment_token = line[pos:].rstrip('\r\n') + nl_pos = pos + len(comment_token) + yield (COMMENT, comment_token, + (lnum, pos), (lnum, pos + len(comment_token)), line) + yield (NL, line[nl_pos:], + (lnum, nl_pos), (lnum, len(line)), line) + else: + yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], + (lnum, pos), (lnum, len(line)), line) + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) + while column < indents[-1]: + if column not in indents: + raise IndentationError( + "unindent does not match any outer indentation level", + ("<tokenize>", lnum, pos, line)) + indents = indents[:-1] + + if async_def and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + yield (DEDENT, '', (lnum, pos), (lnum, pos), line) + + if async_def and async_def_nl and async_def_indent >= indents[-1]: + async_def = False + async_def_nl = False + async_def_indent = 0 + + else: # continued statement + if not line: + raise TokenError("EOF in multi-line statement", (lnum, 0)) + continued = 0 + + while pos < max: + pseudomatch = pseudoprog.match(line, pos) + if pseudomatch: # scan for tokens + start, end = pseudomatch.span(1) + spos, epos, pos = (lnum, start), (lnum, end), end + token, initial = line[start:end], line[start] + + if initial in string.digits or \ + (initial == '.' and token != '.'): # ordinary number + yield (NUMBER, token, spos, epos, line) + elif initial in '\r\n': + newline = NEWLINE + if parenlev > 0: + newline = NL + elif async_def: + async_def_nl = True + if stashed: + yield stashed + stashed = None + yield (newline, token, spos, epos, line) + + elif initial == '#': + assert not token.endswith("\n") + if stashed: + yield stashed + stashed = None + yield (COMMENT, token, spos, epos, line) + elif token in triple_quoted: + endprog = endprogs[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, (lnum, pos), line) + else: + strstart = (lnum, start) # multiple lines + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start) + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + if stashed: + yield stashed + stashed = None + yield (STRING, token, spos, epos, line) + elif initial.isidentifier(): # ordinary name + if token in ('async', 'await'): + if async_def: + yield (ASYNC if token == 'async' else AWAIT, + token, spos, epos, line) + continue + + tok = (NAME, token, spos, epos, line) + if token == 'async' and not stashed: + stashed = tok + continue + + if token in ('def', 'for'): + if (stashed + and stashed[0] == NAME + and stashed[1] == 'async'): + + if token == 'def': + async_def = True + async_def_indent = indents[-1] + + yield (ASYNC, stashed[1], + stashed[2], stashed[3], + stashed[4]) + stashed = None + + if stashed: + yield stashed + stashed = None + + yield tok + elif initial == '\\': # continued stmt + # This yield is new; needed for better idempotency: + if stashed: + yield stashed + stashed = None + yield (NL, token, spos, (lnum, pos), line) + continued = 1 + else: + if initial in '([{': parenlev = parenlev + 1 + elif initial in ')]}': parenlev = parenlev - 1 + if stashed: + yield stashed + stashed = None + yield (OP, token, spos, epos, line) + else: + yield (ERRORTOKEN, line[pos], + (lnum, pos), (lnum, pos+1), line) + pos = pos + 1 + + if stashed: + yield stashed + stashed = None + + for indent in indents[1:]: # pop remaining indent levels + yield (DEDENT, '', (lnum, 0), (lnum, 0), '') + yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') + +if __name__ == '__main__': # testing + import sys + if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) + else: tokenize(sys.stdin.readline) diff --git a/contrib/tools/python3/Lib/lib2to3/pygram.py b/contrib/tools/python3/Lib/lib2to3/pygram.py new file mode 100644 index 0000000000..24d9db9217 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pygram.py @@ -0,0 +1,43 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Export the Python grammar and symbols.""" + +# Python imports +import os + +# Local imports +from .pgen2 import token +from .pgen2 import driver +from . import pytree + +# The grammar file +_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") +_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), + "PatternGrammar.txt") + + +class Symbols(object): + + def __init__(self, grammar): + """Initializer. + + Creates an attribute for each grammar symbol (nonterminal), + whose value is the symbol's type (an int >= 256). + """ + for name, symbol in grammar.symbol2number.items(): + setattr(self, name, symbol) + + +python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) + +python_symbols = Symbols(python_grammar) + +python_grammar_no_print_statement = python_grammar.copy() +del python_grammar_no_print_statement.keywords["print"] + +python_grammar_no_print_and_exec_statement = python_grammar_no_print_statement.copy() +del python_grammar_no_print_and_exec_statement.keywords["exec"] + +pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) +pattern_symbols = Symbols(pattern_grammar) diff --git a/contrib/tools/python3/Lib/lib2to3/pytree.py b/contrib/tools/python3/Lib/lib2to3/pytree.py new file mode 100644 index 0000000000..729023df02 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/pytree.py @@ -0,0 +1,853 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +""" +Python parse tree definitions. + +This is a very concrete parse tree; we need to keep every token and +even the comments and whitespace between tokens. + +There's also a pattern matching implementation here. +""" + +__author__ = "Guido van Rossum <guido@python.org>" + +import sys +from io import StringIO + +HUGE = 0x7FFFFFFF # maximum repeat count, default max + +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) + +class Base(object): + + """ + Abstract base class for Node and Leaf. + + This provides some default functionality and boilerplate using the + template pattern. + + A node may be a subnode of at most one parent. + """ + + # Default values for instance variables + type = None # int: token number (< 256) or symbol number (>= 256) + parent = None # Parent node pointer, or None + children = () # Tuple of subnodes + was_changed = False + was_checked = False + + def __new__(cls, *args, **kwds): + """Constructor that prevents Base from being instantiated.""" + assert cls is not Base, "Cannot instantiate Base" + return object.__new__(cls) + + def __eq__(self, other): + """ + Compare two nodes for equality. + + This calls the method _eq(). + """ + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + __hash__ = None # For Py3 compatibility. + + def _eq(self, other): + """ + Compare two nodes for equality. + + This is called by __eq__ and __ne__. It is only called if the two nodes + have the same type. This must be implemented by the concrete subclass. + Nodes should be considered equal if they have the same structure, + ignoring the prefix string and other context information. + """ + raise NotImplementedError + + def clone(self): + """ + Return a cloned (deep) copy of self. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def post_order(self): + """ + Return a post-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def pre_order(self): + """ + Return a pre-order iterator for the tree. + + This must be implemented by the concrete subclass. + """ + raise NotImplementedError + + def replace(self, new): + """Replace this node with a new one in the parent.""" + assert self.parent is not None, str(self) + assert new is not None + if not isinstance(new, list): + new = [new] + l_children = [] + found = False + for ch in self.parent.children: + if ch is self: + assert not found, (self.parent.children, self, new) + if new is not None: + l_children.extend(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, self, new) + self.parent.changed() + self.parent.children = l_children + for x in new: + x.parent = self.parent + self.parent = None + + def get_lineno(self): + """Return the line number which generated the invocant node.""" + node = self + while not isinstance(node, Leaf): + if not node.children: + return + node = node.children[0] + return node.lineno + + def changed(self): + if self.parent: + self.parent.changed() + self.was_changed = True + + def remove(self): + """ + Remove the node from the tree. Returns the position of the node in its + parent's children before it was removed. + """ + if self.parent: + for i, node in enumerate(self.parent.children): + if node is self: + self.parent.changed() + del self.parent.children[i] + self.parent = None + return i + + @property + def next_sibling(self): + """ + The node immediately following the invocant in their parent's children + list. If the invocant does not have a next sibling, it is None + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + try: + return self.parent.children[i+1] + except IndexError: + return None + + @property + def prev_sibling(self): + """ + The node immediately preceding the invocant in their parent's children + list. If the invocant does not have a previous sibling, it is None. + """ + if self.parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(self.parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i-1] + + def leaves(self): + for child in self.children: + yield from child.leaves() + + def depth(self): + if self.parent is None: + return 0 + return 1 + self.parent.depth() + + def get_suffix(self): + """ + Return the string immediately following the invocant node. This is + effectively equivalent to node.next_sibling.prefix + """ + next_sib = self.next_sibling + if next_sib is None: + return "" + return next_sib.prefix + + if sys.version_info < (3, 0): + def __str__(self): + return str(self).encode("ascii") + +class Node(Base): + + """Concrete implementation for interior nodes.""" + + def __init__(self,type, children, + context=None, + prefix=None, + fixers_applied=None): + """ + Initializer. + + Takes a type constant (a symbol number >= 256), a sequence of + child nodes, and an optional context keyword argument. + + As a side effect, the parent pointers of the children are updated. + """ + assert type >= 256, type + self.type = type + self.children = list(children) + for ch in self.children: + assert ch.parent is None, repr(ch) + ch.parent = self + if prefix is not None: + self.prefix = prefix + if fixers_applied: + self.fixers_applied = fixers_applied[:] + else: + self.fixers_applied = None + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%s, %r)" % (self.__class__.__name__, + type_repr(self.type), + self.children) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return "".join(map(str, self.children)) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.children) == (other.type, other.children) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Node(self.type, [ch.clone() for ch in self.children], + fixers_applied=self.fixers_applied) + + def post_order(self): + """Return a post-order iterator for the tree.""" + for child in self.children: + yield from child.post_order() + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + for child in self.children: + yield from child.pre_order() + + @property + def prefix(self): + """ + The whitespace and comments preceding this node in the input. + """ + if not self.children: + return "" + return self.children[0].prefix + + @prefix.setter + def prefix(self, prefix): + if self.children: + self.children[0].prefix = prefix + + def set_child(self, i, child): + """ + Equivalent to 'node.children[i] = child'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children[i].parent = None + self.children[i] = child + self.changed() + + def insert_child(self, i, child): + """ + Equivalent to 'node.children.insert(i, child)'. This method also sets + the child's parent attribute appropriately. + """ + child.parent = self + self.children.insert(i, child) + self.changed() + + def append_child(self, child): + """ + Equivalent to 'node.children.append(child)'. This method also sets the + child's parent attribute appropriately. + """ + child.parent = self + self.children.append(child) + self.changed() + + +class Leaf(Base): + + """Concrete implementation for leaf nodes.""" + + # Default values for instance variables + _prefix = "" # Whitespace and comments preceding this token in the input + lineno = 0 # Line where this token starts in the input + column = 0 # Column where this token tarts in the input + + def __init__(self, type, value, + context=None, + prefix=None, + fixers_applied=[]): + """ + Initializer. + + Takes a type constant (a token number < 256), a string value, and an + optional context keyword argument. + """ + assert 0 <= type < 256, type + if context is not None: + self._prefix, (self.lineno, self.column) = context + self.type = type + self.value = value + if prefix is not None: + self._prefix = prefix + self.fixers_applied = fixers_applied[:] + + def __repr__(self): + """Return a canonical string representation.""" + return "%s(%r, %r)" % (self.__class__.__name__, + self.type, + self.value) + + def __unicode__(self): + """ + Return a pretty string representation. + + This reproduces the input source exactly. + """ + return self.prefix + str(self.value) + + if sys.version_info > (3, 0): + __str__ = __unicode__ + + def _eq(self, other): + """Compare two nodes for equality.""" + return (self.type, self.value) == (other.type, other.value) + + def clone(self): + """Return a cloned (deep) copy of self.""" + return Leaf(self.type, self.value, + (self.prefix, (self.lineno, self.column)), + fixers_applied=self.fixers_applied) + + def leaves(self): + yield self + + def post_order(self): + """Return a post-order iterator for the tree.""" + yield self + + def pre_order(self): + """Return a pre-order iterator for the tree.""" + yield self + + @property + def prefix(self): + """ + The whitespace and comments preceding this token in the input. + """ + return self._prefix + + @prefix.setter + def prefix(self, prefix): + self.changed() + self._prefix = prefix + +def convert(gr, raw_node): + """ + Convert raw node information to a Node or Leaf instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + type, value, context, children = raw_node + if children or type in gr.number2symbol: + # If there's exactly one child, return that child instead of + # creating a new node. + if len(children) == 1: + return children[0] + return Node(type, children, context=context) + else: + return Leaf(type, value, context=context) + + +class BasePattern(object): + + """ + A pattern is a tree matching pattern. + + It looks for a specific node type (token or symbol), and + optionally for a specific content. + + This is an abstract base class. There are three concrete + subclasses: + + - LeafPattern matches a single leaf node; + - NodePattern matches a single node (usually non-leaf); + - WildcardPattern matches a sequence of nodes of variable length. + """ + + # Defaults for instance variables + type = None # Node type (token if < 256, symbol if >= 256) + content = None # Optional content matching pattern + name = None # Optional name used to store match in results dict + + def __new__(cls, *args, **kwds): + """Constructor that prevents BasePattern from being instantiated.""" + assert cls is not BasePattern, "Cannot instantiate BasePattern" + return object.__new__(cls) + + def __repr__(self): + args = [type_repr(self.type), self.content, self.name] + while args and args[-1] is None: + del args[-1] + return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) + + def optimize(self): + """ + A subclass can define this as a hook for optimizations. + + Returns either self or another node with the same effect. + """ + return self + + def match(self, node, results=None): + """ + Does this pattern exactly match a node? + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + Default implementation for non-wildcard patterns. + """ + if self.type is not None and node.type != self.type: + return False + if self.content is not None: + r = None + if results is not None: + r = {} + if not self._submatch(node, r): + return False + if r: + results.update(r) + if results is not None and self.name: + results[self.name] = node + return True + + def match_seq(self, nodes, results=None): + """ + Does this pattern exactly match a sequence of nodes? + + Default implementation for non-wildcard patterns. + """ + if len(nodes) != 1: + return False + return self.match(nodes[0], results) + + def generate_matches(self, nodes): + """ + Generator yielding all matches for this pattern. + + Default implementation for non-wildcard patterns. + """ + r = {} + if nodes and self.match(nodes[0], r): + yield 1, r + + +class LeafPattern(BasePattern): + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given must be a token type (< 256). If not given, + this matches any *leaf* node; the content may still be required. + + The content, if given, must be a string. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert 0 <= type < 256, type + if content is not None: + assert isinstance(content, str), repr(content) + self.type = type + self.content = content + self.name = name + + def match(self, node, results=None): + """Override match() to insist on a leaf node.""" + if not isinstance(node, Leaf): + return False + return BasePattern.match(self, node, results) + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + return self.content == node.value + + +class NodePattern(BasePattern): + + wildcards = False + + def __init__(self, type=None, content=None, name=None): + """ + Initializer. Takes optional type, content, and name. + + The type, if given, must be a symbol type (>= 256). If the + type is None this matches *any* single node (leaf or not), + except if content is not None, in which it only matches + non-leaf nodes that also match the content pattern. + + The content, if not None, must be a sequence of Patterns that + must match the node's children exactly. If the content is + given, the type must not be None. + + If a name is given, the matching node is stored in the results + dict under that key. + """ + if type is not None: + assert type >= 256, type + if content is not None: + assert not isinstance(content, str), repr(content) + content = list(content) + for i, item in enumerate(content): + assert isinstance(item, BasePattern), (i, item) + if isinstance(item, WildcardPattern): + self.wildcards = True + self.type = type + self.content = content + self.name = name + + def _submatch(self, node, results=None): + """ + Match the pattern's content to the node's children. + + This assumes the node type matches and self.content is not None. + + Returns True if it matches, False if not. + + If results is not None, it must be a dict which will be + updated with the nodes matching named subpatterns. + + When returning False, the results dict may still be updated. + """ + if self.wildcards: + for c, r in generate_matches(self.content, node.children): + if c == len(node.children): + if results is not None: + results.update(r) + return True + return False + if len(self.content) != len(node.children): + return False + for subpattern, child in zip(self.content, node.children): + if not subpattern.match(child, results): + return False + return True + + +class WildcardPattern(BasePattern): + + """ + A wildcard pattern can match zero or more nodes. + + This has all the flexibility needed to implement patterns like: + + .* .+ .? .{m,n} + (a b c | d e | f) + (...)* (...)+ (...)? (...){m,n} + + except it always uses non-greedy matching. + """ + + def __init__(self, content=None, min=0, max=HUGE, name=None): + """ + Initializer. + + Args: + content: optional sequence of subsequences of patterns; + if absent, matches one node; + if present, each subsequence is an alternative [*] + min: optional minimum number of times to match, default 0 + max: optional maximum number of times to match, default HUGE + name: optional name assigned to this match + + [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is + equivalent to (a b c | d e | f g h); if content is None, + this is equivalent to '.' in regular expression terms. + The min and max parameters work as follows: + min=0, max=maxint: .* + min=1, max=maxint: .+ + min=0, max=1: .? + min=1, max=1: . + If content is not None, replace the dot with the parenthesized + list of alternatives, e.g. (a b c | d e | f g h)* + """ + assert 0 <= min <= max <= HUGE, (min, max) + if content is not None: + content = tuple(map(tuple, content)) # Protect against alterations + # Check sanity of alternatives + assert len(content), repr(content) # Can't have zero alternatives + for alt in content: + assert len(alt), repr(alt) # Can have empty alternatives + self.content = content + self.min = min + self.max = max + self.name = name + + def optimize(self): + """Optimize certain stacked wildcard patterns.""" + subpattern = None + if (self.content is not None and + len(self.content) == 1 and len(self.content[0]) == 1): + subpattern = self.content[0][0] + if self.min == 1 and self.max == 1: + if self.content is None: + return NodePattern(name=self.name) + if subpattern is not None and self.name == subpattern.name: + return subpattern.optimize() + if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and + subpattern.min <= 1 and self.name == subpattern.name): + return WildcardPattern(subpattern.content, + self.min*subpattern.min, + self.max*subpattern.max, + subpattern.name) + return self + + def match(self, node, results=None): + """Does this pattern exactly match a node?""" + return self.match_seq([node], results) + + def match_seq(self, nodes, results=None): + """Does this pattern exactly match a sequence of nodes?""" + for c, r in self.generate_matches(nodes): + if c == len(nodes): + if results is not None: + results.update(r) + if self.name: + results[self.name] = list(nodes) + return True + return False + + def generate_matches(self, nodes): + """ + Generator yielding matches for a sequence of nodes. + + Args: + nodes: sequence of nodes + + Yields: + (count, results) tuples where: + count: the match comprises nodes[:count]; + results: dict containing named submatches. + """ + if self.content is None: + # Shortcut for special case (see __init__.__doc__) + for count in range(self.min, 1 + min(len(nodes), self.max)): + r = {} + if self.name: + r[self.name] = nodes[:count] + yield count, r + elif self.name == "bare_name": + yield self._bare_name_matches(nodes) + else: + # The reason for this is that hitting the recursion limit usually + # results in some ugly messages about how RuntimeErrors are being + # ignored. We only have to do this on CPython, though, because other + # implementations don't have this nasty bug in the first place. + if hasattr(sys, "getrefcount"): + save_stderr = sys.stderr + sys.stderr = StringIO() + try: + for count, r in self._recursive_matches(nodes, 0): + if self.name: + r[self.name] = nodes[:count] + yield count, r + except RuntimeError: + # Fall back to the iterative pattern matching scheme if the + # recursive scheme hits the recursion limit (RecursionError). + for count, r in self._iterative_matches(nodes): + if self.name: + r[self.name] = nodes[:count] + yield count, r + finally: + if hasattr(sys, "getrefcount"): + sys.stderr = save_stderr + + def _iterative_matches(self, nodes): + """Helper to iteratively yield the matches.""" + nodelen = len(nodes) + if 0 >= self.min: + yield 0, {} + + results = [] + # generate matches that use just one alt from self.content + for alt in self.content: + for c, r in generate_matches(alt, nodes): + yield c, r + results.append((c, r)) + + # for each match, iterate down the nodes + while results: + new_results = [] + for c0, r0 in results: + # stop if the entire set of nodes has been matched + if c0 < nodelen and c0 <= self.max: + for alt in self.content: + for c1, r1 in generate_matches(alt, nodes[c0:]): + if c1 > 0: + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + new_results.append((c0 + c1, r)) + results = new_results + + def _bare_name_matches(self, nodes): + """Special optimized matcher for bare_name.""" + count = 0 + r = {} + done = False + max = len(nodes) + while not done and count < max: + done = True + for leaf in self.content: + if leaf[0].match(nodes[count], r): + count += 1 + done = False + break + r[self.name] = nodes[:count] + return count, r + + def _recursive_matches(self, nodes, count): + """Helper to recursively yield the matches.""" + assert self.content is not None + if count >= self.min: + yield 0, {} + if count < self.max: + for alt in self.content: + for c0, r0 in generate_matches(alt, nodes): + for c1, r1 in self._recursive_matches(nodes[c0:], count+1): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r + + +class NegatedPattern(BasePattern): + + def __init__(self, content=None): + """ + Initializer. + + The argument is either a pattern or None. If it is None, this + only matches an empty sequence (effectively '$' in regex + lingo). If it is not None, this matches whenever the argument + pattern doesn't have any matches. + """ + if content is not None: + assert isinstance(content, BasePattern), repr(content) + self.content = content + + def match(self, node): + # We never match a node in its entirety + return False + + def match_seq(self, nodes): + # We only match an empty sequence of nodes in its entirety + return len(nodes) == 0 + + def generate_matches(self, nodes): + if self.content is None: + # Return a match if there is an empty sequence + if len(nodes) == 0: + yield 0, {} + else: + # Return a match if the argument pattern has no matches + for c, r in self.content.generate_matches(nodes): + return + yield 0, {} + + +def generate_matches(patterns, nodes): + """ + Generator yielding matches for a sequence of patterns and nodes. + + Args: + patterns: a sequence of patterns + nodes: a sequence of nodes + + Yields: + (count, results) tuples where: + count: the entire sequence of patterns matches nodes[:count]; + results: dict containing named submatches. + """ + if not patterns: + yield 0, {} + else: + p, rest = patterns[0], patterns[1:] + for c0, r0 in p.generate_matches(nodes): + if not rest: + yield c0, r0 + else: + for c1, r1 in generate_matches(rest, nodes[c0:]): + r = {} + r.update(r0) + r.update(r1) + yield c0 + c1, r diff --git a/contrib/tools/python3/Lib/lib2to3/refactor.py b/contrib/tools/python3/Lib/lib2to3/refactor.py new file mode 100644 index 0000000000..3a5aafffc6 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/refactor.py @@ -0,0 +1,732 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Refactoring framework. + +Used as a main program, this can refactor any number of files and/or +recursively descend down directories. Imported as a module, this +provides infrastructure to write your own refactoring tool. +""" + +__author__ = "Guido van Rossum <guido@python.org>" + + +# Python imports +import io +import os +import pkgutil +import sys +import logging +import operator +import collections +from itertools import chain + +# Local imports +from .pgen2 import driver, tokenize, token +from .fixer_util import find_root +from . import pytree, pygram +from . import btm_matcher as bm + + +def get_all_fix_names(fixer_pkg, remove_prefix=True): + """Return a sorted list of all available fix names in the given package.""" + pkg = __import__(fixer_pkg, [], [], ["*"]) + fix_names = [] + for finder, name, ispkg in pkgutil.iter_modules(pkg.__path__): + if name.startswith("fix_"): + if remove_prefix: + name = name[4:] + fix_names.append(name) + return fix_names + + +class _EveryNode(Exception): + pass + + +def _get_head_types(pat): + """ Accepts a pytree Pattern Node and returns a set + of the pattern types which will match first. """ + + if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)): + # NodePatters must either have no type and no content + # or a type and content -- so they don't get any farther + # Always return leafs + if pat.type is None: + raise _EveryNode + return {pat.type} + + if isinstance(pat, pytree.NegatedPattern): + if pat.content: + return _get_head_types(pat.content) + raise _EveryNode # Negated Patterns don't have a type + + if isinstance(pat, pytree.WildcardPattern): + # Recurse on each node in content + r = set() + for p in pat.content: + for x in p: + r.update(_get_head_types(x)) + return r + + raise Exception("Oh no! I don't understand pattern %s" %(pat)) + + +def _get_headnode_dict(fixer_list): + """ Accepts a list of fixers and returns a dictionary + of head node type --> fixer list. """ + head_nodes = collections.defaultdict(list) + every = [] + for fixer in fixer_list: + if fixer.pattern: + try: + heads = _get_head_types(fixer.pattern) + except _EveryNode: + every.append(fixer) + else: + for node_type in heads: + head_nodes[node_type].append(fixer) + else: + if fixer._accept_type is not None: + head_nodes[fixer._accept_type].append(fixer) + else: + every.append(fixer) + for node_type in chain(pygram.python_grammar.symbol2number.values(), + pygram.python_grammar.tokens): + head_nodes[node_type].extend(every) + return dict(head_nodes) + + +def get_fixers_from_package(pkg_name): + """ + Return the fully qualified names for fixers in the package pkg_name. + """ + return [pkg_name + "." + fix_name + for fix_name in get_all_fix_names(pkg_name, False)] + +def _identity(obj): + return obj + + +def _detect_future_features(source): + have_docstring = False + gen = tokenize.generate_tokens(io.StringIO(source).readline) + def advance(): + tok = next(gen) + return tok[0], tok[1] + ignore = frozenset({token.NEWLINE, tokenize.NL, token.COMMENT}) + features = set() + try: + while True: + tp, value = advance() + if tp in ignore: + continue + elif tp == token.STRING: + if have_docstring: + break + have_docstring = True + elif tp == token.NAME and value == "from": + tp, value = advance() + if tp != token.NAME or value != "__future__": + break + tp, value = advance() + if tp != token.NAME or value != "import": + break + tp, value = advance() + if tp == token.OP and value == "(": + tp, value = advance() + while tp == token.NAME: + features.add(value) + tp, value = advance() + if tp != token.OP or value != ",": + break + tp, value = advance() + else: + break + except StopIteration: + pass + return frozenset(features) + + +class FixerError(Exception): + """A fixer could not be loaded.""" + + +class RefactoringTool(object): + + _default_options = {"print_function" : False, + "exec_function": False, + "write_unchanged_files" : False} + + CLASS_PREFIX = "Fix" # The prefix for fixer classes + FILE_PREFIX = "fix_" # The prefix for modules with a fixer within + + def __init__(self, fixer_names, options=None, explicit=None): + """Initializer. + + Args: + fixer_names: a list of fixers to import + options: a dict with configuration. + explicit: a list of fixers to run even if they are explicit. + """ + self.fixers = fixer_names + self.explicit = explicit or [] + self.options = self._default_options.copy() + if options is not None: + self.options.update(options) + self.grammar = pygram.python_grammar.copy() + + if self.options['print_function']: + del self.grammar.keywords["print"] + elif self.options['exec_function']: + del self.grammar.keywords["exec"] + + # When this is True, the refactor*() methods will call write_file() for + # files processed even if they were not changed during refactoring. If + # and only if the refactor method's write parameter was True. + self.write_unchanged_files = self.options.get("write_unchanged_files") + self.errors = [] + self.logger = logging.getLogger("RefactoringTool") + self.fixer_log = [] + self.wrote = False + self.driver = driver.Driver(self.grammar, + convert=pytree.convert, + logger=self.logger) + self.pre_order, self.post_order = self.get_fixers() + + + self.files = [] # List of files that were or should be modified + + self.BM = bm.BottomMatcher() + self.bmi_pre_order = [] # Bottom Matcher incompatible fixers + self.bmi_post_order = [] + + for fixer in chain(self.post_order, self.pre_order): + if fixer.BM_compatible: + self.BM.add_fixer(fixer) + # remove fixers that will be handled by the bottom-up + # matcher + elif fixer in self.pre_order: + self.bmi_pre_order.append(fixer) + elif fixer in self.post_order: + self.bmi_post_order.append(fixer) + + self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order) + self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order) + + + + def get_fixers(self): + """Inspects the options to load the requested patterns and handlers. + + Returns: + (pre_order, post_order), where pre_order is the list of fixers that + want a pre-order AST traversal, and post_order is the list that want + post-order traversal. + """ + pre_order_fixers = [] + post_order_fixers = [] + for fix_mod_path in self.fixers: + mod = __import__(fix_mod_path, {}, {}, ["*"]) + fix_name = fix_mod_path.rsplit(".", 1)[-1] + if fix_name.startswith(self.FILE_PREFIX): + fix_name = fix_name[len(self.FILE_PREFIX):] + parts = fix_name.split("_") + class_name = self.CLASS_PREFIX + "".join([p.title() for p in parts]) + try: + fix_class = getattr(mod, class_name) + except AttributeError: + raise FixerError("Can't find %s.%s" % (fix_name, class_name)) from None + fixer = fix_class(self.options, self.fixer_log) + if fixer.explicit and self.explicit is not True and \ + fix_mod_path not in self.explicit: + self.log_message("Skipping optional fixer: %s", fix_name) + continue + + self.log_debug("Adding transformation: %s", fix_name) + if fixer.order == "pre": + pre_order_fixers.append(fixer) + elif fixer.order == "post": + post_order_fixers.append(fixer) + else: + raise FixerError("Illegal fixer order: %r" % fixer.order) + + key_func = operator.attrgetter("run_order") + pre_order_fixers.sort(key=key_func) + post_order_fixers.sort(key=key_func) + return (pre_order_fixers, post_order_fixers) + + def log_error(self, msg, *args, **kwds): + """Called when an error occurs.""" + raise + + def log_message(self, msg, *args): + """Hook to log a message.""" + if args: + msg = msg % args + self.logger.info(msg) + + def log_debug(self, msg, *args): + if args: + msg = msg % args + self.logger.debug(msg) + + def print_output(self, old_text, new_text, filename, equal): + """Called with the old version, new version, and filename of a + refactored file.""" + pass + + def refactor(self, items, write=False, doctests_only=False): + """Refactor a list of files and directories.""" + + for dir_or_file in items: + if os.path.isdir(dir_or_file): + self.refactor_dir(dir_or_file, write, doctests_only) + else: + self.refactor_file(dir_or_file, write, doctests_only) + + def refactor_dir(self, dir_name, write=False, doctests_only=False): + """Descends down a directory and refactor every Python file found. + + Python files are assumed to have a .py extension. + + Files and subdirectories starting with '.' are skipped. + """ + py_ext = os.extsep + "py" + for dirpath, dirnames, filenames in os.walk(dir_name): + self.log_debug("Descending into %s", dirpath) + dirnames.sort() + filenames.sort() + for name in filenames: + if (not name.startswith(".") and + os.path.splitext(name)[1] == py_ext): + fullname = os.path.join(dirpath, name) + self.refactor_file(fullname, write, doctests_only) + # Modify dirnames in-place to remove subdirs with leading dots + dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] + + def _read_python_source(self, filename): + """ + Do our best to decode a Python source file correctly. + """ + try: + f = open(filename, "rb") + except OSError as err: + self.log_error("Can't open %s: %s", filename, err) + return None, None + try: + encoding = tokenize.detect_encoding(f.readline)[0] + finally: + f.close() + with io.open(filename, "r", encoding=encoding, newline='') as f: + return f.read(), encoding + + def refactor_file(self, filename, write=False, doctests_only=False): + """Refactors a file.""" + input, encoding = self._read_python_source(filename) + if input is None: + # Reading the file failed. + return + input += "\n" # Silence certain parse errors + if doctests_only: + self.log_debug("Refactoring doctests in %s", filename) + output = self.refactor_docstring(input, filename) + if self.write_unchanged_files or output != input: + self.processed_file(output, filename, input, write, encoding) + else: + self.log_debug("No doctest changes in %s", filename) + else: + tree = self.refactor_string(input, filename) + if self.write_unchanged_files or (tree and tree.was_changed): + # The [:-1] is to take off the \n we added earlier + self.processed_file(str(tree)[:-1], filename, + write=write, encoding=encoding) + else: + self.log_debug("No changes in %s", filename) + + def refactor_string(self, data, name): + """Refactor a given input string. + + Args: + data: a string holding the code to be refactored. + name: a human-readable name for use in error/log messages. + + Returns: + An AST corresponding to the refactored input stream; None if + there were errors during the parse. + """ + features = _detect_future_features(data) + if "print_function" in features: + self.driver.grammar = pygram.python_grammar_no_print_statement + try: + tree = self.driver.parse_string(data) + except Exception as err: + self.log_error("Can't parse %s: %s: %s", + name, err.__class__.__name__, err) + return + finally: + self.driver.grammar = self.grammar + tree.future_features = features + self.log_debug("Refactoring %s", name) + self.refactor_tree(tree, name) + return tree + + def refactor_stdin(self, doctests_only=False): + input = sys.stdin.read() + if doctests_only: + self.log_debug("Refactoring doctests in stdin") + output = self.refactor_docstring(input, "<stdin>") + if self.write_unchanged_files or output != input: + self.processed_file(output, "<stdin>", input) + else: + self.log_debug("No doctest changes in stdin") + else: + tree = self.refactor_string(input, "<stdin>") + if self.write_unchanged_files or (tree and tree.was_changed): + self.processed_file(str(tree), "<stdin>", input) + else: + self.log_debug("No changes in stdin") + + def refactor_tree(self, tree, name): + """Refactors a parse tree (modifying the tree in place). + + For compatible patterns the bottom matcher module is + used. Otherwise the tree is traversed node-to-node for + matches. + + Args: + tree: a pytree.Node instance representing the root of the tree + to be refactored. + name: a human-readable name for this tree. + + Returns: + True if the tree was modified, False otherwise. + """ + + for fixer in chain(self.pre_order, self.post_order): + fixer.start_tree(tree, name) + + #use traditional matching for the incompatible fixers + self.traverse_by(self.bmi_pre_order_heads, tree.pre_order()) + self.traverse_by(self.bmi_post_order_heads, tree.post_order()) + + # obtain a set of candidate nodes + match_set = self.BM.run(tree.leaves()) + + while any(match_set.values()): + for fixer in self.BM.fixers: + if fixer in match_set and match_set[fixer]: + #sort by depth; apply fixers from bottom(of the AST) to top + match_set[fixer].sort(key=pytree.Base.depth, reverse=True) + + if fixer.keep_line_order: + #some fixers(eg fix_imports) must be applied + #with the original file's line order + match_set[fixer].sort(key=pytree.Base.get_lineno) + + for node in list(match_set[fixer]): + if node in match_set[fixer]: + match_set[fixer].remove(node) + + try: + find_root(node) + except ValueError: + # this node has been cut off from a + # previous transformation ; skip + continue + + if node.fixers_applied and fixer in node.fixers_applied: + # do not apply the same fixer again + continue + + results = fixer.match(node) + + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + #new.fixers_applied.append(fixer) + for node in new.post_order(): + # do not apply the fixer again to + # this or any subnode + if not node.fixers_applied: + node.fixers_applied = [] + node.fixers_applied.append(fixer) + + # update the original match set for + # the added code + new_matches = self.BM.run(new.leaves()) + for fxr in new_matches: + if not fxr in match_set: + match_set[fxr]=[] + + match_set[fxr].extend(new_matches[fxr]) + + for fixer in chain(self.pre_order, self.post_order): + fixer.finish_tree(tree, name) + return tree.was_changed + + def traverse_by(self, fixers, traversal): + """Traverse an AST, applying a set of fixers to each node. + + This is a helper method for refactor_tree(). + + Args: + fixers: a list of fixer instances. + traversal: a generator that yields AST nodes. + + Returns: + None + """ + if not fixers: + return + for node in traversal: + for fixer in fixers[node.type]: + results = fixer.match(node) + if results: + new = fixer.transform(node, results) + if new is not None: + node.replace(new) + node = new + + def processed_file(self, new_text, filename, old_text=None, write=False, + encoding=None): + """ + Called when a file has been refactored and there may be changes. + """ + self.files.append(filename) + if old_text is None: + old_text = self._read_python_source(filename)[0] + if old_text is None: + return + equal = old_text == new_text + self.print_output(old_text, new_text, filename, equal) + if equal: + self.log_debug("No changes to %s", filename) + if not self.write_unchanged_files: + return + if write: + self.write_file(new_text, filename, old_text, encoding) + else: + self.log_debug("Not writing changes to %s", filename) + + def write_file(self, new_text, filename, old_text, encoding=None): + """Writes a string to a file. + + It first shows a unified diff between the old text and the new text, and + then rewrites the file; the latter is only done if the write option is + set. + """ + try: + fp = io.open(filename, "w", encoding=encoding, newline='') + except OSError as err: + self.log_error("Can't create %s: %s", filename, err) + return + + with fp: + try: + fp.write(new_text) + except OSError as err: + self.log_error("Can't write %s: %s", filename, err) + self.log_debug("Wrote changes to %s", filename) + self.wrote = True + + PS1 = ">>> " + PS2 = "... " + + def refactor_docstring(self, input, filename): + """Refactors a docstring, looking for doctests. + + This returns a modified version of the input string. It looks + for doctests, which start with a ">>>" prompt, and may be + continued with "..." prompts, as long as the "..." is indented + the same as the ">>>". + + (Unfortunately we can't use the doctest module's parser, + since, like most parsers, it is not geared towards preserving + the original source.) + """ + result = [] + block = None + block_lineno = None + indent = None + lineno = 0 + for line in input.splitlines(keepends=True): + lineno += 1 + if line.lstrip().startswith(self.PS1): + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block_lineno = lineno + block = [line] + i = line.find(self.PS1) + indent = line[:i] + elif (indent is not None and + (line.startswith(indent + self.PS2) or + line == indent + self.PS2.rstrip() + "\n")): + block.append(line) + else: + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + block = None + indent = None + result.append(line) + if block is not None: + result.extend(self.refactor_doctest(block, block_lineno, + indent, filename)) + return "".join(result) + + def refactor_doctest(self, block, lineno, indent, filename): + """Refactors one doctest. + + A doctest is given as a block of lines, the first of which starts + with ">>>" (possibly indented), while the remaining lines start + with "..." (identically indented). + + """ + try: + tree = self.parse_block(block, lineno, indent) + except Exception as err: + if self.logger.isEnabledFor(logging.DEBUG): + for line in block: + self.log_debug("Source: %s", line.rstrip("\n")) + self.log_error("Can't parse docstring in %s line %s: %s: %s", + filename, lineno, err.__class__.__name__, err) + return block + if self.refactor_tree(tree, filename): + new = str(tree).splitlines(keepends=True) + # Undo the adjustment of the line numbers in wrap_toks() below. + clipped, new = new[:lineno-1], new[lineno-1:] + assert clipped == ["\n"] * (lineno-1), clipped + if not new[-1].endswith("\n"): + new[-1] += "\n" + block = [indent + self.PS1 + new.pop(0)] + if new: + block += [indent + self.PS2 + line for line in new] + return block + + def summarize(self): + if self.wrote: + were = "were" + else: + were = "need to be" + if not self.files: + self.log_message("No files %s modified.", were) + else: + self.log_message("Files that %s modified:", were) + for file in self.files: + self.log_message(file) + if self.fixer_log: + self.log_message("Warnings/messages while refactoring:") + for message in self.fixer_log: + self.log_message(message) + if self.errors: + if len(self.errors) == 1: + self.log_message("There was 1 error:") + else: + self.log_message("There were %d errors:", len(self.errors)) + for msg, args, kwds in self.errors: + self.log_message(msg, *args, **kwds) + + def parse_block(self, block, lineno, indent): + """Parses a block into a tree. + + This is necessary to get correct line number / offset information + in the parser diagnostics and embedded into the parse tree. + """ + tree = self.driver.parse_tokens(self.wrap_toks(block, lineno, indent)) + tree.future_features = frozenset() + return tree + + def wrap_toks(self, block, lineno, indent): + """Wraps a tokenize stream to systematically modify start/end.""" + tokens = tokenize.generate_tokens(self.gen_lines(block, indent).__next__) + for type, value, (line0, col0), (line1, col1), line_text in tokens: + line0 += lineno - 1 + line1 += lineno - 1 + # Don't bother updating the columns; this is too complicated + # since line_text would also have to be updated and it would + # still break for tokens spanning lines. Let the user guess + # that the column numbers for doctests are relative to the + # end of the prompt string (PS1 or PS2). + yield type, value, (line0, col0), (line1, col1), line_text + + + def gen_lines(self, block, indent): + """Generates lines as expected by tokenize from a list of lines. + + This strips the first len(indent + self.PS1) characters off each line. + """ + prefix1 = indent + self.PS1 + prefix2 = indent + self.PS2 + prefix = prefix1 + for line in block: + if line.startswith(prefix): + yield line[len(prefix):] + elif line == prefix.rstrip() + "\n": + yield "\n" + else: + raise AssertionError("line=%r, prefix=%r" % (line, prefix)) + prefix = prefix2 + while True: + yield "" + + +class MultiprocessingUnsupported(Exception): + pass + + +class MultiprocessRefactoringTool(RefactoringTool): + + def __init__(self, *args, **kwargs): + super(MultiprocessRefactoringTool, self).__init__(*args, **kwargs) + self.queue = None + self.output_lock = None + + def refactor(self, items, write=False, doctests_only=False, + num_processes=1): + if num_processes == 1: + return super(MultiprocessRefactoringTool, self).refactor( + items, write, doctests_only) + try: + import multiprocessing + except ImportError: + raise MultiprocessingUnsupported + if self.queue is not None: + raise RuntimeError("already doing multiple processes") + self.queue = multiprocessing.JoinableQueue() + self.output_lock = multiprocessing.Lock() + processes = [multiprocessing.Process(target=self._child) + for i in range(num_processes)] + try: + for p in processes: + p.start() + super(MultiprocessRefactoringTool, self).refactor(items, write, + doctests_only) + finally: + self.queue.join() + for i in range(num_processes): + self.queue.put(None) + for p in processes: + if p.is_alive(): + p.join() + self.queue = None + + def _child(self): + task = self.queue.get() + while task is not None: + args, kwargs = task + try: + super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) + finally: + self.queue.task_done() + task = self.queue.get() + + def refactor_file(self, *args, **kwargs): + if self.queue is not None: + self.queue.put((args, kwargs)) + else: + return super(MultiprocessRefactoringTool, self).refactor_file( + *args, **kwargs) |