diff options
author | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 12:29:46 +0300 |
---|---|---|
committer | maxim-yurchuk <maxim-yurchuk@yandex-team.com> | 2024-10-09 13:14:22 +0300 |
commit | 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch) | |
tree | a8fb3181d5947c0d78cf402aa56e686130179049 /contrib/tools/python3/Lib/lib2to3/btm_matcher.py | |
parent | a44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff) | |
download | ydb-9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80.tar.gz |
publishFullContrib: true for ydb
<HIDDEN_URL>
commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/tools/python3/Lib/lib2to3/btm_matcher.py')
-rw-r--r-- | contrib/tools/python3/Lib/lib2to3/btm_matcher.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/lib2to3/btm_matcher.py b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py new file mode 100644 index 0000000000..3b78868038 --- /dev/null +++ b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py @@ -0,0 +1,163 @@ +"""A bottom-up tree matching algorithm implementation meant to speed +up 2to3's matching process. After the tree patterns are reduced to +their rarest linear path, a linear Aho-Corasick automaton is +created. The linear automaton traverses the linear paths from the +leaves to the root of the AST and returns a set of nodes for further +matching. This reduces significantly the number of candidate nodes.""" + +__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" + +import logging +import itertools +from collections import defaultdict + +from . import pytree +from .btm_utils import reduce_tree + +class BMNode(object): + """Class for a node of the Aho-Corasick automaton used in matching""" + count = itertools.count() + def __init__(self): + self.transition_table = {} + self.fixers = [] + self.id = next(BMNode.count) + self.content = '' + +class BottomMatcher(object): + """The main matcher class. After instantiating the patterns should + be added using the add_fixer method""" + + def __init__(self): + self.match = set() + self.root = BMNode() + self.nodes = [self.root] + self.fixers = [] + self.logger = logging.getLogger("RefactoringTool") + + def add_fixer(self, fixer): + """Reduces a fixer's pattern tree to a linear path and adds it + to the matcher(a common Aho-Corasick automaton). The fixer is + appended on the matching states and called when they are + reached""" + self.fixers.append(fixer) + tree = reduce_tree(fixer.pattern_tree) + linear = tree.get_linear_subpattern() + match_nodes = self.add(linear, start=self.root) + for match_node in match_nodes: + match_node.fixers.append(fixer) + + def add(self, pattern, start): + "Recursively adds a linear pattern to the AC automaton" + #print("adding pattern", pattern, "to", start) + if not pattern: + #print("empty pattern") + return [start] + if isinstance(pattern[0], tuple): + #alternatives + #print("alternatives") + match_nodes = [] + for alternative in pattern[0]: + #add all alternatives, and add the rest of the pattern + #to each end node + end_nodes = self.add(alternative, start=start) + for end in end_nodes: + match_nodes.extend(self.add(pattern[1:], end)) + return match_nodes + else: + #single token + #not last + if pattern[0] not in start.transition_table: + #transition did not exist, create new + next_node = BMNode() + start.transition_table[pattern[0]] = next_node + else: + #transition exists already, follow + next_node = start.transition_table[pattern[0]] + + if pattern[1:]: + end_nodes = self.add(pattern[1:], start=next_node) + else: + end_nodes = [next_node] + return end_nodes + + def run(self, leaves): + """The main interface with the bottom matcher. The tree is + traversed from the bottom using the constructed + automaton. Nodes are only checked once as the tree is + retraversed. When the automaton fails, we give it one more + shot(in case the above tree matches as a whole with the + rejected leaf), then we break for the next leaf. There is the + special case of multiple arguments(see code comments) where we + recheck the nodes + + Args: + The leaves of the AST tree to be matched + + Returns: + A dictionary of node matches with fixers as the keys + """ + current_ac_node = self.root + results = defaultdict(list) + for leaf in leaves: + current_ast_node = leaf + while current_ast_node: + current_ast_node.was_checked = True + for child in current_ast_node.children: + # multiple statements, recheck + if isinstance(child, pytree.Leaf) and child.value == ";": + current_ast_node.was_checked = False + break + if current_ast_node.type == 1: + #name + node_token = current_ast_node.value + else: + node_token = current_ast_node.type + + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + else: + #matching failed, reset automaton + current_ac_node = self.root + if (current_ast_node.parent is not None + and current_ast_node.parent.was_checked): + #the rest of the tree upwards has been checked, next leaf + break + + #recheck the rejected node once from the root + if node_token in current_ac_node.transition_table: + #token matches + current_ac_node = current_ac_node.transition_table[node_token] + for fixer in current_ac_node.fixers: + results[fixer].append(current_ast_node) + + current_ast_node = current_ast_node.parent + return results + + def print_ac(self): + "Prints a graphviz diagram of the BM automaton(for debugging)" + print("digraph g{") + def print_node(node): + for subnode_key in node.transition_table.keys(): + subnode = node.transition_table[subnode_key] + print("%d -> %d [label=%s] //%s" % + (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) + if subnode_key == 1: + print(subnode.content) + print_node(subnode) + print_node(self.root) + print("}") + +# taken from pytree.py for debugging; only used by print_ac +_type_reprs = {} +def type_repr(type_num): + global _type_reprs + if not _type_reprs: + from .pygram import python_symbols + # printing tokens is possible but not as useful + # from .pgen2 import token // token.__dict__.items(): + for name, val in python_symbols.__dict__.items(): + if type(val) == int: _type_reprs[val] = name + return _type_reprs.setdefault(type_num, type_num) |