aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/comptrie/minimize.cpp
diff options
context:
space:
mode:
authorgrig <grig@yandex-team.ru>2022-02-10 16:50:24 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:24 +0300
commitbeb63ece3a6872dfbe113104f524ab6fdbec0adc (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers/comptrie/minimize.cpp
parentda383a4f674027527827ad076134241fc5da0cbf (diff)
downloadydb-beb63ece3a6872dfbe113104f524ab6fdbec0adc.tar.gz
Restoring authorship annotation for <grig@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/minimize.cpp')
-rw-r--r--library/cpp/containers/comptrie/minimize.cpp86
1 files changed, 43 insertions, 43 deletions
diff --git a/library/cpp/containers/comptrie/minimize.cpp b/library/cpp/containers/comptrie/minimize.cpp
index cb25a366f6f..80d0b25217d 100644
--- a/library/cpp/containers/comptrie/minimize.cpp
+++ b/library/cpp/containers/comptrie/minimize.cpp
@@ -15,7 +15,7 @@ namespace NCompactTrie {
// nodes that have identical continuations (Daciuk's right language),
// and repack the trie. Repacking is done in-place, so memory is less
// of an issue; however, it may take considerable time.
-
+
// IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout.
// Because of non-local structure and epsilon links, it won't work
// as you expect it to, and can destroy the trie in the making.
@@ -23,7 +23,7 @@ namespace NCompactTrie {
namespace {
using TOffsetList = TVector<size_t>;
using TPieceIndex = THashMap<size_t, TOffsetList>;
-
+
using TSizePair = std::pair<size_t, size_t>;
using TSizePairVector = TVector<TSizePair>;
using TSizePairVectorVector = TVector<TSizePairVector>;
@@ -36,38 +36,38 @@ namespace NCompactTrie {
TOffsetMap() {
Data.reserve(0x10000);
}
-
+
void Add(size_t key, size_t value) {
size_t hikey = key & 0xFFFF;
-
+
if (Data.size() <= hikey)
Data.resize(hikey + 1);
TSizePairVector& sublist = Data[hikey];
-
+
for (auto& it : sublist) {
if (it.first == key) {
it.second = value;
-
+
return;
}
}
-
+
sublist.push_back(TSizePair(key, value));
}
bool Contains(size_t key) const {
return (Get(key) != 0);
- }
+ }
size_t Get(size_t key) const {
size_t hikey = key & 0xFFFF;
-
+
if (Data.size() <= hikey)
return 0;
-
+
const TSizePairVector& sublist = Data[hikey];
-
+
for (const auto& it : sublist) {
if (it.first == key)
return it.second;
@@ -76,7 +76,7 @@ namespace NCompactTrie {
return 0;
}
};
-
+
class TOffsetDeltas {
protected:
TSizePairVector Data;
@@ -88,10 +88,10 @@ namespace NCompactTrie {
return; // no offset
} else {
TSizePair last = Data.back();
-
+
if (key <= last.first) {
Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl;
-
+
return;
}
@@ -100,19 +100,19 @@ namespace NCompactTrie {
}
Data.push_back(TSizePair(key, value));
- }
+ }
size_t Get(size_t key) const {
if (Data.empty())
return key; // difference is zero;
-
+
if (key < Data.front().first)
return key;
-
+
// Binary search for the highest entry in the list that does not exceed the key
size_t from = 0;
size_t to = Data.size() - 1;
-
+
while (from < to) {
size_t midpoint = (from + to + 1) / 2;
@@ -121,7 +121,7 @@ namespace NCompactTrie {
else
from = midpoint;
}
-
+
TSizePair entry = Data[from];
return key - entry.first + entry.second;
@@ -139,7 +139,7 @@ namespace NCompactTrie {
, Length(len)
{
}
-
+
bool operator()(size_t p1, const size_t p2) {
int res = memcmp(Data + p1, Data + p2, Length);
@@ -159,13 +159,13 @@ namespace NCompactTrie {
: Selector(0)
{
}
-
+
TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction)
: Node(data, offset, skipFunction)
, Selector(0)
{
}
-
+
bool IsFinal() const {
return Node.IsFinal();
}
@@ -174,7 +174,7 @@ namespace NCompactTrie {
size_t NextNode(const TOffsetMap& mergedNodes) {
while (Selector < 3) {
size_t nextOffset = 0;
-
+
switch (++Selector) {
case 1:
if (Node.GetRightOffset())
@@ -199,7 +199,7 @@ namespace NCompactTrie {
return nextOffset;
}
return 0;
- }
+ }
};
class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator {
@@ -209,7 +209,7 @@ namespace NCompactTrie {
const TOffsetMap& MergeMap;
TVector<TBranchPoint> Trace;
TOffsetDeltas OffsetIndex;
-
+
public:
TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers)
: Fresh(true)
@@ -217,7 +217,7 @@ namespace NCompactTrie {
, MergeMap(mergers)
{
}
-
+
bool Move() override {
if (Fresh) {
Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction));
@@ -225,7 +225,7 @@ namespace NCompactTrie {
} else {
if (Trace.empty())
return false;
-
+
Trace.pop_back();
if (Trace.empty())
@@ -233,7 +233,7 @@ namespace NCompactTrie {
}
size_t nextnode = Trace.back().NextNode(MergeMap);
-
+
while (nextnode) {
Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction));
nextnode = Trace.back().NextNode(MergeMap);
@@ -241,30 +241,30 @@ namespace NCompactTrie {
return (!Trace.empty());
}
-
+
const TNode& Get() const {
return Trace.back().Node;
}
size_t GetLeafLength() const override {
return Get().GetLeafLength();
}
-
+
// Returns recalculated offset from the end of the current node
size_t PrepareOffset(size_t absoffset, size_t minilength) {
if (!absoffset)
return NPOS;
-
+
if (MergeMap.Contains(absoffset))
absoffset = MergeMap.Get(absoffset);
return minilength - OffsetIndex.Get(Trie.Length - absoffset);
}
-
+
size_t RecreateNode(char* buffer, size_t resultLength) override {
TWriteableNode newNode(Get(), Trie.Data);
newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
-
+
if (!buffer)
return newNode.Measure();
@@ -275,8 +275,8 @@ namespace NCompactTrie {
}
};
- }
-
+ }
+
static void AddPiece(TPieceIndex& index, size_t offset, size_t len) {
index[len].push_back(offset);
}
@@ -288,24 +288,24 @@ namespace NCompactTrie {
TOffsetMap merger;
// Start walking the trie from head.
AddPiece(subtries, 0, trie.Length);
-
+
size_t counter = 0;
// Now consider all nodes with sizeable continuations
for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) {
TPieceIndex::iterator iit = subtries.find(curlen);
-
+
if (iit == subtries.end())
continue; // fast forward to the next available length value
TOffsetList& batch = iit->second;
TPieceComparer comparer(trie.Data, curlen);
Sort(batch.begin(), batch.end(), comparer);
-
+
TOffsetList::iterator it = batch.begin();
while (it != batch.end()) {
if (verbose)
ShowProgress(++counter);
-
+
size_t offset = *it;
// Fill the array with the subnodes of the element
@@ -322,11 +322,11 @@ namespace NCompactTrie {
if (size_t forwardOffset = node.GetForwardOffset()) {
AddPiece(subtries, forwardOffset, end - forwardOffset);
}
-
+
while (++it != batch.end()) {
// Find next different; until then, just add the offsets to the list of merged nodes.
size_t nextoffset = *it;
-
+
if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen))
break;
@@ -335,12 +335,12 @@ namespace NCompactTrie {
}
subtries.erase(curlen);
- }
+ }
if (verbose) {
Cerr << counter << Endl;
}
return merger;
- }
+ }
size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) {
if (!trie.Data || !trie.Length) {