diff options
author | onpopov <onpopov@yandex-team.ru> | 2022-02-10 16:50:38 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:38 +0300 |
commit | 8773f7661194d4c0bdb1e3937b2ff7ae01dd13f8 (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers/comptrie/make_fast_layout.cpp | |
parent | 84a29dd4980d5b39615e453f289bd1a81213296d (diff) | |
download | ydb-8773f7661194d4c0bdb1e3937b2ff7ae01dd13f8.tar.gz |
Restoring authorship annotation for <onpopov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/make_fast_layout.cpp')
-rw-r--r-- | library/cpp/containers/comptrie/make_fast_layout.cpp | 154 |
1 files changed, 77 insertions, 77 deletions
diff --git a/library/cpp/containers/comptrie/make_fast_layout.cpp b/library/cpp/containers/comptrie/make_fast_layout.cpp index 3dea56b7b9..ade78d7899 100644 --- a/library/cpp/containers/comptrie/make_fast_layout.cpp +++ b/library/cpp/containers/comptrie/make_fast_layout.cpp @@ -1,70 +1,70 @@ -#include "make_fast_layout.h" -#include "node.h" -#include "writeable_node.h" -#include "write_trie_backwards.h" -#include "comptrie_impl.h" +#include "make_fast_layout.h" +#include "node.h" +#include "writeable_node.h" +#include "write_trie_backwards.h" +#include "comptrie_impl.h" #include <util/generic/hash.h> -#include <util/generic/utility.h> - -// Lay the trie in memory in such a way that there are less cache misses when jumping from root to leaf. -// The trie becomes about 2% larger, but the access became about 25% faster in our experiments. -// Can be called on minimized and non-minimized tries, in the first case in requires half a trie more memory. -// Calling it the second time on the same trie does nothing. -// -// The algorithm is based on van Emde Boas layout as described in the yandex data school lectures on external memory algoritms -// by Maxim Babenko and Ivan Puzyrevsky. The difference is that when we cut the tree into levels -// two nodes connected by a forward link are put into the same level (because they usually lie near each other in the original tree). -// The original paper (describing the layout in Section 2.1) is: -// Michael A. Bender, Erik D. Demaine, Martin Farach-Colton. Cache-Oblivious B-Trees // SIAM Journal on Computing, volume 35, number 2, 2005, pages 341–358. -// Available on the web: http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/ -// Or: Michael A. Bender, Erik D. Demaine, and Martin Farach-Colton. Cache-Oblivious B-Trees // Proceedings of the 41st Annual Symposium -// on Foundations of Computer Science (FOCS 2000), Redondo Beach, California, November 12–14, 2000, pages 399–409. -// Available on the web: http://erikdemaine.org/papers/FOCS2000b/ -// (there is not much difference between these papers, actually). -// -namespace NCompactTrie { +#include <util/generic/utility.h> + +// Lay the trie in memory in such a way that there are less cache misses when jumping from root to leaf. +// The trie becomes about 2% larger, but the access became about 25% faster in our experiments. +// Can be called on minimized and non-minimized tries, in the first case in requires half a trie more memory. +// Calling it the second time on the same trie does nothing. +// +// The algorithm is based on van Emde Boas layout as described in the yandex data school lectures on external memory algoritms +// by Maxim Babenko and Ivan Puzyrevsky. The difference is that when we cut the tree into levels +// two nodes connected by a forward link are put into the same level (because they usually lie near each other in the original tree). +// The original paper (describing the layout in Section 2.1) is: +// Michael A. Bender, Erik D. Demaine, Martin Farach-Colton. Cache-Oblivious B-Trees // SIAM Journal on Computing, volume 35, number 2, 2005, pages 341–358. +// Available on the web: http://erikdemaine.org/papers/CacheObliviousBTrees_SICOMP/ +// Or: Michael A. Bender, Erik D. Demaine, and Martin Farach-Colton. Cache-Oblivious B-Trees // Proceedings of the 41st Annual Symposium +// on Foundations of Computer Science (FOCS 2000), Redondo Beach, California, November 12–14, 2000, pages 399–409. +// Available on the web: http://erikdemaine.org/papers/FOCS2000b/ +// (there is not much difference between these papers, actually). +// +namespace NCompactTrie { static size_t FindSupportingPowerOf2(size_t n) { size_t result = 1ull << (8 * sizeof(size_t) - 1); while (result > n) { result >>= 1; } return result; - } - + } + namespace { class TTrieNodeSet { public: TTrieNodeSet() = default; - + explicit TTrieNodeSet(const TOpaqueTrie& trie) : Body(trie.Length / (8 * MinNodeSize) + 1, 0) { } - + bool Has(size_t offset) const { const size_t reducedOffset = ReducedOffset(offset); return OffsetCell(reducedOffset) & OffsetMask(reducedOffset); } - + void Add(size_t offset) { const size_t reducedOffset = ReducedOffset(offset); OffsetCell(reducedOffset) |= OffsetMask(reducedOffset); } - + void Remove(size_t offset) { const size_t reducedOffset = ReducedOffset(offset); OffsetCell(reducedOffset) &= ~OffsetMask(reducedOffset); } - + void Swap(TTrieNodeSet& other) { Body.swap(other.Body); } - + private: static const size_t MinNodeSize = 2; TVector<ui8> Body; - + static size_t ReducedOffset(size_t offset) { return offset / MinNodeSize; } @@ -78,23 +78,23 @@ namespace NCompactTrie { return Body.at(reducedOffset / 8); } }; - + //--------------------------------------------------------------------- - + class TTrieNodeCounts { public: TTrieNodeCounts() = default; - + explicit TTrieNodeCounts(const TOpaqueTrie& trie) : Body(trie.Length / MinNodeSize, 0) , IsTree(false) { } - + size_t Get(size_t offset) const { return IsTree ? 1 : Body.at(offset / MinNodeSize); } - + void Inc(size_t offset) { if (IsTree) { return; @@ -104,7 +104,7 @@ namespace NCompactTrie { ++count; } } - + size_t Dec(size_t offset) { if (IsTree) { return 0; @@ -116,33 +116,33 @@ namespace NCompactTrie { } return count; } - + void Swap(TTrieNodeCounts& other) { Body.swap(other.Body); ::DoSwap(IsTree, other.IsTree); } - + void SetTreeMode() { IsTree = true; Body = TVector<ui8>(); } - + private: static const size_t MinNodeSize = 2; static const ui8 MaxCount = 255; - + TVector<ui8> Body; bool IsTree = false; }; - + //---------------------------------------------------------- - + class TOffsetIndex { public: // In all methods: // Key --- offset from the beginning of the old trie. // Value --- offset from the end of the new trie. - + explicit TOffsetIndex(TTrieNodeCounts& counts) { ParentCounts.Swap(counts); } @@ -150,7 +150,7 @@ namespace NCompactTrie { void Add(size_t key, size_t value) { Data[key] = value; } - + size_t Size() const { return Data.size(); } @@ -172,14 +172,14 @@ namespace NCompactTrie { TTrieNodeCounts ParentCounts; THashMap<size_t, size_t> Data; }; - + //--------------------------------------------------------------------------------------- class TTrieMeasurer { public: TTrieMeasurer(const TOpaqueTrie& trie, bool verbose); void Measure(); - + size_t GetDepth() const { return Depth; } @@ -187,23 +187,23 @@ namespace NCompactTrie { size_t GetNodeCount() const { return NodeCount; } - + size_t GetUnminimizedNodeCount() const { return UnminimizedNodeCount; } - + bool IsTree() const { return NodeCount == UnminimizedNodeCount; } - + TTrieNodeCounts& GetParentCounts() { return ParentCounts; } - + const TOpaqueTrie& GetTrie() const { return Trie; } - + private: const TOpaqueTrie& Trie; size_t Depth; @@ -211,11 +211,11 @@ namespace NCompactTrie { size_t NodeCount; size_t UnminimizedNodeCount; const bool Verbose; - + // returns depth, increments NodeCount. size_t MeasureSubtrie(size_t rootOffset, bool isNewPath); }; - + TTrieMeasurer::TTrieMeasurer(const TOpaqueTrie& trie, bool verbose) : Trie(trie) , Depth(0) @@ -243,7 +243,7 @@ namespace NCompactTrie { Cerr << "Node count: " << NodeCount << Endl; } } - + // A chain of nodes linked by forward links // is considered one node with many left and right children // for depth measuring here and in @@ -276,14 +276,14 @@ namespace NCompactTrie { } else { break; } - } + } return depth; - } + } //-------------------------------------------------------------------------------------- - + using TLevelNodes = TVector<size_t>; - + struct TLevel { size_t Depth; TLevelNodes Nodes; @@ -295,7 +295,7 @@ namespace NCompactTrie { }; //---------------------------------------------------------------------------------------- - + class TVanEmdeBoasReverseNodeEnumerator: public TReverseNodeEnumerator { public: TVanEmdeBoasReverseNodeEnumerator(TTrieMeasurer& measurer, bool verbose) @@ -326,7 +326,7 @@ namespace NCompactTrie { size_t GetLeafLength() const override { return Get().GetLeafLength(); } - + // Returns recalculated offset from the end of the current node. size_t PrepareOffset(size_t absoffset, size_t resultLength) { if (!absoffset) @@ -352,19 +352,19 @@ namespace NCompactTrie { TOpaqueTrie Trie; size_t Depth; size_t MaxIndexSize; - + TVector<TLevel> Trace; TOffsetIndex BackIndex; TVector<TNode> CentralWord; TTrieNodeSet ProcessedNodes; - + const bool Verbose; - + private: bool IsVisited(size_t offset) const { return ProcessedNodes.Has(offset); } - + void AddCascade(size_t step) { Y_ASSERT(!(step & (step - 1))); // Should be a power of 2. while (step > 0) { @@ -381,7 +381,7 @@ namespace NCompactTrie { step /= 2; } } - } + } void FillCentralWord() { CentralWord.clear(); @@ -391,7 +391,7 @@ namespace NCompactTrie { CentralWord.push_back(TNode(Trie.Data, CentralWord.back().GetForwardOffset(), Trie.SkipFunction)); } } - + bool MoveCentralWordStart() { do { if (Fresh) { @@ -420,7 +420,7 @@ namespace NCompactTrie { } while (IsVisited(Trace.back().Nodes.back())); FillCentralWord(); return true; - } + } // Returns the maximal depth it has reached while searching. // This is a method because it needs OffsetIndex to skip processed nodes. @@ -435,7 +435,7 @@ namespace NCompactTrie { if (node.GetLeftOffset() && !IsVisited(node.GetLeftOffset())) { actualDepth = Max(actualDepth, 1 + FindDescendants(node.GetLeftOffset(), depth - 1, result)); - } + } if (node.GetRightOffset() && !IsVisited(node.GetRightOffset())) { actualDepth = Max(actualDepth, 1 + FindDescendants(node.GetRightOffset(), depth - 1, result)); @@ -445,23 +445,23 @@ namespace NCompactTrie { } else { break; } - } + } return actualDepth; - } + } }; - + } // Anonymous namespace. //----------------------------------------------------------------------------------- size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const TOpaqueTrie& trie, bool verbose) { if (!trie.Data || !trie.Length) { - return 0; - } + return 0; + } TTrieMeasurer mes(trie, verbose); mes.Measure(); TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose); return WriteTrieBackwards(os, enumerator, verbose); } -} +} |