aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/comptrie/make_fast_layout.cpp
diff options
context:
space:
mode:
authoronpopov <onpopov@yandex-team.ru>2022-02-10 16:50:38 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:38 +0300
commit8773f7661194d4c0bdb1e3937b2ff7ae01dd13f8 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers/comptrie/make_fast_layout.cpp
parent84a29dd4980d5b39615e453f289bd1a81213296d (diff)
downloadydb-8773f7661194d4c0bdb1e3937b2ff7ae01dd13f8.tar.gz
Restoring authorship annotation for <onpopov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/make_fast_layout.cpp')
-rw-r--r--library/cpp/containers/comptrie/make_fast_layout.cpp154
1 files changed, 77 insertions, 77 deletions
diff --git a/library/cpp/containers/comptrie/make_fast_layout.cpp b/library/cpp/containers/comptrie/make_fast_layout.cpp
index 3dea56b7b9..ade78d7899 100644
--- a/library/cpp/containers/comptrie/make_fast_layout.cpp
+++ b/library/cpp/containers/comptrie/make_fast_layout.cpp
@@ -1,70 +1,70 @@
-#include "make_fast_layout.h"
-#include "node.h"
-#include "writeable_node.h"
-#include "write_trie_backwards.h"
-#include "comptrie_impl.h"
+#include "make_fast_layout.h"
+#include "node.h"
+#include "writeable_node.h"
+#include "write_trie_backwards.h"
+#include "comptrie_impl.h"
#include <util/generic/hash.h>
-#include <util/generic/utility.h>
-
-// Lay the trie in memory in such a way that there are less cache misses when jumping from root to leaf.
-// The trie becomes about 2% larger, but the access became about 25% faster in our experiments.
-// Can be called on minimized and non-minimized tries, in the first case in requires half a trie more memory.
-// Calling it the second time on the same trie does nothing.
-//
-// The algorithm is based on van Emde Boas layout as described in the yandex data school lectures on external memory algoritms
-// by Maxim Babenko and Ivan Puzyrevsky. The difference is that when we cut the tree into levels
-// two nodes connected by a forward link are put into the same level (because they usually lie near each other in the original tree).
-// The original paper (describing the layout in Section 2.1) is:
-// Michael A. Bender, Erik D. Demaine, Martin Farach-Colton. Cache-Oblivious B-Trees // SIAM Journal on Computing, volume 35, number 2, 2005, pages 341–358.
-// Available on the web: http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/
-// Or: Michael A. Bender, Erik D. Demaine, and Martin Farach-Colton. Cache-Oblivious B-Trees // Proceedings of the 41st Annual Symposium
-// on Foundations of Computer Science (FOCS 2000), Redondo Beach, California, November 12–14, 2000, pages 399–409.
-// Available on the web: http://erikdemaine.org/papers/FOCS2000b/
-// (there is not much difference between these papers, actually).
-//
-namespace NCompactTrie {
+#include <util/generic/utility.h>
+
+// Lay the trie in memory in such a way that there are less cache misses when jumping from root to leaf.
+// The trie becomes about 2% larger, but the access became about 25% faster in our experiments.
+// Can be called on minimized and non-minimized tries, in the first case in requires half a trie more memory.
+// Calling it the second time on the same trie does nothing.
+//
+// The algorithm is based on van Emde Boas layout as described in the yandex data school lectures on external memory algoritms
+// by Maxim Babenko and Ivan Puzyrevsky. The difference is that when we cut the tree into levels
+// two nodes connected by a forward link are put into the same level (because they usually lie near each other in the original tree).
+// The original paper (describing the layout in Section 2.1) is:
+// Michael A. Bender, Erik D. Demaine, Martin Farach-Colton. Cache-Oblivious B-Trees // SIAM Journal on Computing, volume 35, number 2, 2005, pages 341–358.
+// Available on the web: http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/
+// Or: Michael A. Bender, Erik D. Demaine, and Martin Farach-Colton. Cache-Oblivious B-Trees // Proceedings of the 41st Annual Symposium
+// on Foundations of Computer Science (FOCS 2000), Redondo Beach, California, November 12–14, 2000, pages 399–409.
+// Available on the web: http://erikdemaine.org/papers/FOCS2000b/
+// (there is not much difference between these papers, actually).
+//
+namespace NCompactTrie {
static size_t FindSupportingPowerOf2(size_t n) {
size_t result = 1ull << (8 * sizeof(size_t) - 1);
while (result > n) {
result >>= 1;
}
return result;
- }
-
+ }
+
namespace {
class TTrieNodeSet {
public:
TTrieNodeSet() = default;
-
+
explicit TTrieNodeSet(const TOpaqueTrie& trie)
: Body(trie.Length / (8 * MinNodeSize) + 1, 0)
{
}
-
+
bool Has(size_t offset) const {
const size_t reducedOffset = ReducedOffset(offset);
return OffsetCell(reducedOffset) & OffsetMask(reducedOffset);
}
-
+
void Add(size_t offset) {
const size_t reducedOffset = ReducedOffset(offset);
OffsetCell(reducedOffset) |= OffsetMask(reducedOffset);
}
-
+
void Remove(size_t offset) {
const size_t reducedOffset = ReducedOffset(offset);
OffsetCell(reducedOffset) &= ~OffsetMask(reducedOffset);
}
-
+
void Swap(TTrieNodeSet& other) {
Body.swap(other.Body);
}
-
+
private:
static const size_t MinNodeSize = 2;
TVector<ui8> Body;
-
+
static size_t ReducedOffset(size_t offset) {
return offset / MinNodeSize;
}
@@ -78,23 +78,23 @@ namespace NCompactTrie {
return Body.at(reducedOffset / 8);
}
};
-
+
//---------------------------------------------------------------------
-
+
class TTrieNodeCounts {
public:
TTrieNodeCounts() = default;
-
+
explicit TTrieNodeCounts(const TOpaqueTrie& trie)
: Body(trie.Length / MinNodeSize, 0)
, IsTree(false)
{
}
-
+
size_t Get(size_t offset) const {
return IsTree ? 1 : Body.at(offset / MinNodeSize);
}
-
+
void Inc(size_t offset) {
if (IsTree) {
return;
@@ -104,7 +104,7 @@ namespace NCompactTrie {
++count;
}
}
-
+
size_t Dec(size_t offset) {
if (IsTree) {
return 0;
@@ -116,33 +116,33 @@ namespace NCompactTrie {
}
return count;
}
-
+
void Swap(TTrieNodeCounts& other) {
Body.swap(other.Body);
::DoSwap(IsTree, other.IsTree);
}
-
+
void SetTreeMode() {
IsTree = true;
Body = TVector<ui8>();
}
-
+
private:
static const size_t MinNodeSize = 2;
static const ui8 MaxCount = 255;
-
+
TVector<ui8> Body;
bool IsTree = false;
};
-
+
//----------------------------------------------------------
-
+
class TOffsetIndex {
public:
// In all methods:
// Key --- offset from the beginning of the old trie.
// Value --- offset from the end of the new trie.
-
+
explicit TOffsetIndex(TTrieNodeCounts& counts) {
ParentCounts.Swap(counts);
}
@@ -150,7 +150,7 @@ namespace NCompactTrie {
void Add(size_t key, size_t value) {
Data[key] = value;
}
-
+
size_t Size() const {
return Data.size();
}
@@ -172,14 +172,14 @@ namespace NCompactTrie {
TTrieNodeCounts ParentCounts;
THashMap<size_t, size_t> Data;
};
-
+
//---------------------------------------------------------------------------------------
class TTrieMeasurer {
public:
TTrieMeasurer(const TOpaqueTrie& trie, bool verbose);
void Measure();
-
+
size_t GetDepth() const {
return Depth;
}
@@ -187,23 +187,23 @@ namespace NCompactTrie {
size_t GetNodeCount() const {
return NodeCount;
}
-
+
size_t GetUnminimizedNodeCount() const {
return UnminimizedNodeCount;
}
-
+
bool IsTree() const {
return NodeCount == UnminimizedNodeCount;
}
-
+
TTrieNodeCounts& GetParentCounts() {
return ParentCounts;
}
-
+
const TOpaqueTrie& GetTrie() const {
return Trie;
}
-
+
private:
const TOpaqueTrie& Trie;
size_t Depth;
@@ -211,11 +211,11 @@ namespace NCompactTrie {
size_t NodeCount;
size_t UnminimizedNodeCount;
const bool Verbose;
-
+
// returns depth, increments NodeCount.
size_t MeasureSubtrie(size_t rootOffset, bool isNewPath);
};
-
+
TTrieMeasurer::TTrieMeasurer(const TOpaqueTrie& trie, bool verbose)
: Trie(trie)
, Depth(0)
@@ -243,7 +243,7 @@ namespace NCompactTrie {
Cerr << "Node count: " << NodeCount << Endl;
}
}
-
+
// A chain of nodes linked by forward links
// is considered one node with many left and right children
// for depth measuring here and in
@@ -276,14 +276,14 @@ namespace NCompactTrie {
} else {
break;
}
- }
+ }
return depth;
- }
+ }
//--------------------------------------------------------------------------------------
-
+
using TLevelNodes = TVector<size_t>;
-
+
struct TLevel {
size_t Depth;
TLevelNodes Nodes;
@@ -295,7 +295,7 @@ namespace NCompactTrie {
};
//----------------------------------------------------------------------------------------
-
+
class TVanEmdeBoasReverseNodeEnumerator: public TReverseNodeEnumerator {
public:
TVanEmdeBoasReverseNodeEnumerator(TTrieMeasurer& measurer, bool verbose)
@@ -326,7 +326,7 @@ namespace NCompactTrie {
size_t GetLeafLength() const override {
return Get().GetLeafLength();
}
-
+
// Returns recalculated offset from the end of the current node.
size_t PrepareOffset(size_t absoffset, size_t resultLength) {
if (!absoffset)
@@ -352,19 +352,19 @@ namespace NCompactTrie {
TOpaqueTrie Trie;
size_t Depth;
size_t MaxIndexSize;
-
+
TVector<TLevel> Trace;
TOffsetIndex BackIndex;
TVector<TNode> CentralWord;
TTrieNodeSet ProcessedNodes;
-
+
const bool Verbose;
-
+
private:
bool IsVisited(size_t offset) const {
return ProcessedNodes.Has(offset);
}
-
+
void AddCascade(size_t step) {
Y_ASSERT(!(step & (step - 1))); // Should be a power of 2.
while (step > 0) {
@@ -381,7 +381,7 @@ namespace NCompactTrie {
step /= 2;
}
}
- }
+ }
void FillCentralWord() {
CentralWord.clear();
@@ -391,7 +391,7 @@ namespace NCompactTrie {
CentralWord.push_back(TNode(Trie.Data, CentralWord.back().GetForwardOffset(), Trie.SkipFunction));
}
}
-
+
bool MoveCentralWordStart() {
do {
if (Fresh) {
@@ -420,7 +420,7 @@ namespace NCompactTrie {
} while (IsVisited(Trace.back().Nodes.back()));
FillCentralWord();
return true;
- }
+ }
// Returns the maximal depth it has reached while searching.
// This is a method because it needs OffsetIndex to skip processed nodes.
@@ -435,7 +435,7 @@ namespace NCompactTrie {
if (node.GetLeftOffset() && !IsVisited(node.GetLeftOffset())) {
actualDepth = Max(actualDepth,
1 + FindDescendants(node.GetLeftOffset(), depth - 1, result));
- }
+ }
if (node.GetRightOffset() && !IsVisited(node.GetRightOffset())) {
actualDepth = Max(actualDepth,
1 + FindDescendants(node.GetRightOffset(), depth - 1, result));
@@ -445,23 +445,23 @@ namespace NCompactTrie {
} else {
break;
}
- }
+ }
return actualDepth;
- }
+ }
};
-
+
} // Anonymous namespace.
//-----------------------------------------------------------------------------------
size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const TOpaqueTrie& trie, bool verbose) {
if (!trie.Data || !trie.Length) {
- return 0;
- }
+ return 0;
+ }
TTrieMeasurer mes(trie, verbose);
mes.Measure();
TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose);
return WriteTrieBackwards(os, enumerator, verbose);
}
-}
+}