aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Support/SuffixTree.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/llvm12/lib/Support/SuffixTree.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/llvm12/lib/Support/SuffixTree.cpp')
-rw-r--r--contrib/libs/llvm12/lib/Support/SuffixTree.cpp210
1 files changed, 210 insertions, 0 deletions
diff --git a/contrib/libs/llvm12/lib/Support/SuffixTree.cpp b/contrib/libs/llvm12/lib/Support/SuffixTree.cpp
new file mode 100644
index 0000000000..0d419f12cd
--- /dev/null
+++ b/contrib/libs/llvm12/lib/Support/SuffixTree.cpp
@@ -0,0 +1,210 @@
+//===- llvm/Support/SuffixTree.cpp - Implement Suffix Tree ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Suffix Tree class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SuffixTree.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+using namespace llvm;
+
+SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
+ Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+ Active.Node = Root;
+
+ // Keep track of the number of suffixes we have to add of the current
+ // prefix.
+ unsigned SuffixesToAdd = 0;
+
+ // Construct the suffix tree iteratively on each prefix of the string.
+ // PfxEndIdx is the end index of the current prefix.
+ // End is one past the last element in the string.
+ for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+ SuffixesToAdd++;
+ LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
+ SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
+ }
+
+ // Set the suffix indices of each leaf.
+ assert(Root && "Root node can't be nullptr!");
+ setSuffixIndices();
+}
+
+SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeNode &Parent,
+ unsigned StartIdx, unsigned Edge) {
+
+ assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
+
+ SuffixTreeNode *N = new (NodeAllocator.Allocate())
+ SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
+ Parent.Children[Edge] = N;
+
+ return N;
+}
+
+SuffixTreeNode *SuffixTree::insertInternalNode(SuffixTreeNode *Parent,
+ unsigned StartIdx,
+ unsigned EndIdx, unsigned Edge) {
+
+ assert(StartIdx <= EndIdx && "String can't start after it ends!");
+ assert(!(!Parent && StartIdx != EmptyIdx) &&
+ "Non-root internal nodes must have parents!");
+
+ unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
+ SuffixTreeNode *N =
+ new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
+ if (Parent)
+ Parent->Children[Edge] = N;
+
+ return N;
+}
+
+void SuffixTree::setSuffixIndices() {
+ // List of nodes we need to visit along with the current length of the
+ // string.
+ std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+
+ // Current node being visited.
+ SuffixTreeNode *CurrNode = Root;
+
+ // Sum of the lengths of the nodes down the path to the current one.
+ unsigned CurrNodeLen = 0;
+ ToVisit.push_back({CurrNode, CurrNodeLen});
+ while (!ToVisit.empty()) {
+ std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
+ ToVisit.pop_back();
+ CurrNode->ConcatLen = CurrNodeLen;
+ for (auto &ChildPair : CurrNode->Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ ToVisit.push_back(
+ {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
+ }
+
+ // No children, so we are at the end of the string.
+ if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
+ CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+ }
+}
+
+unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
+ SuffixTreeNode *NeedsLink = nullptr;
+
+ while (SuffixesToAdd > 0) {
+
+ // Are we waiting to add anything other than just the last character?
+ if (Active.Len == 0) {
+ // If not, then say the active index is the end index.
+ Active.Idx = EndIdx;
+ }
+
+ assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
+
+ // The first character in the current substring we're looking at.
+ unsigned FirstChar = Str[Active.Idx];
+
+ // Have we inserted anything starting with FirstChar at the current node?
+ if (Active.Node->Children.count(FirstChar) == 0) {
+ // If not, then we can just insert a leaf and move to the next step.
+ insertLeaf(*Active.Node, EndIdx, FirstChar);
+
+ // The active node is an internal node, and we visited it, so it must
+ // need a link if it doesn't have one.
+ if (NeedsLink) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+ } else {
+ // There's a match with FirstChar, so look for the point in the tree to
+ // insert a new node.
+ SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
+
+ unsigned SubstringLen = NextNode->size();
+
+ // Is the current suffix we're trying to insert longer than the size of
+ // the child we want to move to?
+ if (Active.Len >= SubstringLen) {
+ // If yes, then consume the characters we've seen and move to the next
+ // node.
+ Active.Idx += SubstringLen;
+ Active.Len -= SubstringLen;
+ Active.Node = NextNode;
+ continue;
+ }
+
+ // Otherwise, the suffix we're trying to insert must be contained in the
+ // next node we want to move to.
+ unsigned LastChar = Str[EndIdx];
+
+ // Is the string we're trying to insert a substring of the next node?
+ if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+ // If yes, then we're done for this step. Remember our insertion point
+ // and move to the next end index. At this point, we have an implicit
+ // suffix tree.
+ if (NeedsLink && !Active.Node->isRoot()) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+
+ Active.Len++;
+ break;
+ }
+
+ // The string we're trying to insert isn't a substring of the next node,
+ // but matches up to a point. Split the node.
+ //
+ // For example, say we ended our search at a node n and we're trying to
+ // insert ABD. Then we'll create a new node s for AB, reduce n to just
+ // representing C, and insert a new leaf node l to represent d. This
+ // allows us to ensure that if n was a leaf, it remains a leaf.
+ //
+ // | ABC ---split---> | AB
+ // n s
+ // C / \ D
+ // n l
+
+ // The node s from the diagram
+ SuffixTreeNode *SplitNode =
+ insertInternalNode(Active.Node, NextNode->StartIdx,
+ NextNode->StartIdx + Active.Len - 1, FirstChar);
+
+ // Insert the new node representing the new substring into the tree as
+ // a child of the split node. This is the node l from the diagram.
+ insertLeaf(*SplitNode, EndIdx, LastChar);
+
+ // Make the old node a child of the split node and update its start
+ // index. This is the node n from the diagram.
+ NextNode->StartIdx += Active.Len;
+ SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+
+ // SplitNode is an internal node, update the suffix link.
+ if (NeedsLink)
+ NeedsLink->Link = SplitNode;
+
+ NeedsLink = SplitNode;
+ }
+
+ // We've added something new to the tree, so there's one less suffix to
+ // add.
+ SuffixesToAdd--;
+
+ if (Active.Node->isRoot()) {
+ if (Active.Len > 0) {
+ Active.Len--;
+ Active.Idx = EndIdx - SuffixesToAdd + 1;
+ }
+ } else {
+ // Start the next phase at the next smallest suffix.
+ Active.Node = Active.Node->Link;
+ }
+ }
+
+ return SuffixesToAdd;
+}