aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/lib2to3/btm_matcher.py
diff options
context:
space:
mode:
authormaxim-yurchuk <maxim-yurchuk@yandex-team.com>2024-10-09 12:29:46 +0300
committermaxim-yurchuk <maxim-yurchuk@yandex-team.com>2024-10-09 13:14:22 +0300
commit9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch)
treea8fb3181d5947c0d78cf402aa56e686130179049 /contrib/tools/python3/Lib/lib2to3/btm_matcher.py
parenta44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff)
downloadydb-9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80.tar.gz
publishFullContrib: true for ydb
<HIDDEN_URL> commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/tools/python3/Lib/lib2to3/btm_matcher.py')
-rw-r--r--contrib/tools/python3/Lib/lib2to3/btm_matcher.py163
1 files changed, 163 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/lib2to3/btm_matcher.py b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py
new file mode 100644
index 0000000000..3b78868038
--- /dev/null
+++ b/contrib/tools/python3/Lib/lib2to3/btm_matcher.py
@@ -0,0 +1,163 @@
+"""A bottom-up tree matching algorithm implementation meant to speed
+up 2to3's matching process. After the tree patterns are reduced to
+their rarest linear path, a linear Aho-Corasick automaton is
+created. The linear automaton traverses the linear paths from the
+leaves to the root of the AST and returns a set of nodes for further
+matching. This reduces significantly the number of candidate nodes."""
+
+__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
+
+import logging
+import itertools
+from collections import defaultdict
+
+from . import pytree
+from .btm_utils import reduce_tree
+
+class BMNode(object):
+ """Class for a node of the Aho-Corasick automaton used in matching"""
+ count = itertools.count()
+ def __init__(self):
+ self.transition_table = {}
+ self.fixers = []
+ self.id = next(BMNode.count)
+ self.content = ''
+
+class BottomMatcher(object):
+ """The main matcher class. After instantiating the patterns should
+ be added using the add_fixer method"""
+
+ def __init__(self):
+ self.match = set()
+ self.root = BMNode()
+ self.nodes = [self.root]
+ self.fixers = []
+ self.logger = logging.getLogger("RefactoringTool")
+
+ def add_fixer(self, fixer):
+ """Reduces a fixer's pattern tree to a linear path and adds it
+ to the matcher(a common Aho-Corasick automaton). The fixer is
+ appended on the matching states and called when they are
+ reached"""
+ self.fixers.append(fixer)
+ tree = reduce_tree(fixer.pattern_tree)
+ linear = tree.get_linear_subpattern()
+ match_nodes = self.add(linear, start=self.root)
+ for match_node in match_nodes:
+ match_node.fixers.append(fixer)
+
+ def add(self, pattern, start):
+ "Recursively adds a linear pattern to the AC automaton"
+ #print("adding pattern", pattern, "to", start)
+ if not pattern:
+ #print("empty pattern")
+ return [start]
+ if isinstance(pattern[0], tuple):
+ #alternatives
+ #print("alternatives")
+ match_nodes = []
+ for alternative in pattern[0]:
+ #add all alternatives, and add the rest of the pattern
+ #to each end node
+ end_nodes = self.add(alternative, start=start)
+ for end in end_nodes:
+ match_nodes.extend(self.add(pattern[1:], end))
+ return match_nodes
+ else:
+ #single token
+ #not last
+ if pattern[0] not in start.transition_table:
+ #transition did not exist, create new
+ next_node = BMNode()
+ start.transition_table[pattern[0]] = next_node
+ else:
+ #transition exists already, follow
+ next_node = start.transition_table[pattern[0]]
+
+ if pattern[1:]:
+ end_nodes = self.add(pattern[1:], start=next_node)
+ else:
+ end_nodes = [next_node]
+ return end_nodes
+
+ def run(self, leaves):
+ """The main interface with the bottom matcher. The tree is
+ traversed from the bottom using the constructed
+ automaton. Nodes are only checked once as the tree is
+ retraversed. When the automaton fails, we give it one more
+ shot(in case the above tree matches as a whole with the
+ rejected leaf), then we break for the next leaf. There is the
+ special case of multiple arguments(see code comments) where we
+ recheck the nodes
+
+ Args:
+ The leaves of the AST tree to be matched
+
+ Returns:
+ A dictionary of node matches with fixers as the keys
+ """
+ current_ac_node = self.root
+ results = defaultdict(list)
+ for leaf in leaves:
+ current_ast_node = leaf
+ while current_ast_node:
+ current_ast_node.was_checked = True
+ for child in current_ast_node.children:
+ # multiple statements, recheck
+ if isinstance(child, pytree.Leaf) and child.value == ";":
+ current_ast_node.was_checked = False
+ break
+ if current_ast_node.type == 1:
+ #name
+ node_token = current_ast_node.value
+ else:
+ node_token = current_ast_node.type
+
+ if node_token in current_ac_node.transition_table:
+ #token matches
+ current_ac_node = current_ac_node.transition_table[node_token]
+ for fixer in current_ac_node.fixers:
+ results[fixer].append(current_ast_node)
+ else:
+ #matching failed, reset automaton
+ current_ac_node = self.root
+ if (current_ast_node.parent is not None
+ and current_ast_node.parent.was_checked):
+ #the rest of the tree upwards has been checked, next leaf
+ break
+
+ #recheck the rejected node once from the root
+ if node_token in current_ac_node.transition_table:
+ #token matches
+ current_ac_node = current_ac_node.transition_table[node_token]
+ for fixer in current_ac_node.fixers:
+ results[fixer].append(current_ast_node)
+
+ current_ast_node = current_ast_node.parent
+ return results
+
+ def print_ac(self):
+ "Prints a graphviz diagram of the BM automaton(for debugging)"
+ print("digraph g{")
+ def print_node(node):
+ for subnode_key in node.transition_table.keys():
+ subnode = node.transition_table[subnode_key]
+ print("%d -> %d [label=%s] //%s" %
+ (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
+ if subnode_key == 1:
+ print(subnode.content)
+ print_node(subnode)
+ print_node(self.root)
+ print("}")
+
+# taken from pytree.py for debugging; only used by print_ac
+_type_reprs = {}
+def type_repr(type_num):
+ global _type_reprs
+ if not _type_reprs:
+ from .pygram import python_symbols
+ # printing tokens is possible but not as useful
+ # from .pgen2 import token // token.__dict__.items():
+ for name, val in python_symbols.__dict__.items():
+ if type(val) == int: _type_reprs[val] = name
+ return _type_reprs.setdefault(type_num, type_num)