diff options
author | grig <grig@yandex-team.ru> | 2022-02-10 16:50:24 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:24 +0300 |
commit | beb63ece3a6872dfbe113104f524ab6fdbec0adc (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers/comptrie/minimize.cpp | |
parent | da383a4f674027527827ad076134241fc5da0cbf (diff) | |
download | ydb-beb63ece3a6872dfbe113104f524ab6fdbec0adc.tar.gz |
Restoring authorship annotation for <grig@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/minimize.cpp')
-rw-r--r-- | library/cpp/containers/comptrie/minimize.cpp | 86 |
1 files changed, 43 insertions, 43 deletions
diff --git a/library/cpp/containers/comptrie/minimize.cpp b/library/cpp/containers/comptrie/minimize.cpp index cb25a366f6f..80d0b25217d 100644 --- a/library/cpp/containers/comptrie/minimize.cpp +++ b/library/cpp/containers/comptrie/minimize.cpp @@ -15,7 +15,7 @@ namespace NCompactTrie { // nodes that have identical continuations (Daciuk's right language), // and repack the trie. Repacking is done in-place, so memory is less // of an issue; however, it may take considerable time. - + // IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout. // Because of non-local structure and epsilon links, it won't work // as you expect it to, and can destroy the trie in the making. @@ -23,7 +23,7 @@ namespace NCompactTrie { namespace { using TOffsetList = TVector<size_t>; using TPieceIndex = THashMap<size_t, TOffsetList>; - + using TSizePair = std::pair<size_t, size_t>; using TSizePairVector = TVector<TSizePair>; using TSizePairVectorVector = TVector<TSizePairVector>; @@ -36,38 +36,38 @@ namespace NCompactTrie { TOffsetMap() { Data.reserve(0x10000); } - + void Add(size_t key, size_t value) { size_t hikey = key & 0xFFFF; - + if (Data.size() <= hikey) Data.resize(hikey + 1); TSizePairVector& sublist = Data[hikey]; - + for (auto& it : sublist) { if (it.first == key) { it.second = value; - + return; } } - + sublist.push_back(TSizePair(key, value)); } bool Contains(size_t key) const { return (Get(key) != 0); - } + } size_t Get(size_t key) const { size_t hikey = key & 0xFFFF; - + if (Data.size() <= hikey) return 0; - + const TSizePairVector& sublist = Data[hikey]; - + for (const auto& it : sublist) { if (it.first == key) return it.second; @@ -76,7 +76,7 @@ namespace NCompactTrie { return 0; } }; - + class TOffsetDeltas { protected: TSizePairVector Data; @@ -88,10 +88,10 @@ namespace NCompactTrie { return; // no offset } else { TSizePair last = Data.back(); - + if (key <= last.first) { Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl; - + return; } @@ -100,19 +100,19 @@ namespace NCompactTrie { } Data.push_back(TSizePair(key, value)); - } + } size_t Get(size_t key) const { if (Data.empty()) return key; // difference is zero; - + if (key < Data.front().first) return key; - + // Binary search for the highest entry in the list that does not exceed the key size_t from = 0; size_t to = Data.size() - 1; - + while (from < to) { size_t midpoint = (from + to + 1) / 2; @@ -121,7 +121,7 @@ namespace NCompactTrie { else from = midpoint; } - + TSizePair entry = Data[from]; return key - entry.first + entry.second; @@ -139,7 +139,7 @@ namespace NCompactTrie { , Length(len) { } - + bool operator()(size_t p1, const size_t p2) { int res = memcmp(Data + p1, Data + p2, Length); @@ -159,13 +159,13 @@ namespace NCompactTrie { : Selector(0) { } - + TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction) : Node(data, offset, skipFunction) , Selector(0) { } - + bool IsFinal() const { return Node.IsFinal(); } @@ -174,7 +174,7 @@ namespace NCompactTrie { size_t NextNode(const TOffsetMap& mergedNodes) { while (Selector < 3) { size_t nextOffset = 0; - + switch (++Selector) { case 1: if (Node.GetRightOffset()) @@ -199,7 +199,7 @@ namespace NCompactTrie { return nextOffset; } return 0; - } + } }; class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator { @@ -209,7 +209,7 @@ namespace NCompactTrie { const TOffsetMap& MergeMap; TVector<TBranchPoint> Trace; TOffsetDeltas OffsetIndex; - + public: TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers) : Fresh(true) @@ -217,7 +217,7 @@ namespace NCompactTrie { , MergeMap(mergers) { } - + bool Move() override { if (Fresh) { Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction)); @@ -225,7 +225,7 @@ namespace NCompactTrie { } else { if (Trace.empty()) return false; - + Trace.pop_back(); if (Trace.empty()) @@ -233,7 +233,7 @@ namespace NCompactTrie { } size_t nextnode = Trace.back().NextNode(MergeMap); - + while (nextnode) { Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction)); nextnode = Trace.back().NextNode(MergeMap); @@ -241,30 +241,30 @@ namespace NCompactTrie { return (!Trace.empty()); } - + const TNode& Get() const { return Trace.back().Node; } size_t GetLeafLength() const override { return Get().GetLeafLength(); } - + // Returns recalculated offset from the end of the current node size_t PrepareOffset(size_t absoffset, size_t minilength) { if (!absoffset) return NPOS; - + if (MergeMap.Contains(absoffset)) absoffset = MergeMap.Get(absoffset); return minilength - OffsetIndex.Get(Trie.Length - absoffset); } - + size_t RecreateNode(char* buffer, size_t resultLength) override { TWriteableNode newNode(Get(), Trie.Data); newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength); newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength); newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength); - + if (!buffer) return newNode.Measure(); @@ -275,8 +275,8 @@ namespace NCompactTrie { } }; - } - + } + static void AddPiece(TPieceIndex& index, size_t offset, size_t len) { index[len].push_back(offset); } @@ -288,24 +288,24 @@ namespace NCompactTrie { TOffsetMap merger; // Start walking the trie from head. AddPiece(subtries, 0, trie.Length); - + size_t counter = 0; // Now consider all nodes with sizeable continuations for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) { TPieceIndex::iterator iit = subtries.find(curlen); - + if (iit == subtries.end()) continue; // fast forward to the next available length value TOffsetList& batch = iit->second; TPieceComparer comparer(trie.Data, curlen); Sort(batch.begin(), batch.end(), comparer); - + TOffsetList::iterator it = batch.begin(); while (it != batch.end()) { if (verbose) ShowProgress(++counter); - + size_t offset = *it; // Fill the array with the subnodes of the element @@ -322,11 +322,11 @@ namespace NCompactTrie { if (size_t forwardOffset = node.GetForwardOffset()) { AddPiece(subtries, forwardOffset, end - forwardOffset); } - + while (++it != batch.end()) { // Find next different; until then, just add the offsets to the list of merged nodes. size_t nextoffset = *it; - + if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen)) break; @@ -335,12 +335,12 @@ namespace NCompactTrie { } subtries.erase(curlen); - } + } if (verbose) { Cerr << counter << Endl; } return merger; - } + } size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) { if (!trie.Data || !trie.Length) { |