diff options
author | grig <grig@yandex-team.ru> | 2022-02-10 16:50:24 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:24 +0300 |
commit | beb63ece3a6872dfbe113104f524ab6fdbec0adc (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers | |
parent | da383a4f674027527827ad076134241fc5da0cbf (diff) | |
download | ydb-beb63ece3a6872dfbe113104f524ab6fdbec0adc.tar.gz |
Restoring authorship annotation for <grig@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers')
18 files changed, 240 insertions, 240 deletions
diff --git a/library/cpp/containers/comptrie/comptrie.h b/library/cpp/containers/comptrie/comptrie.h index a37dcee421..f77024327e 100644 --- a/library/cpp/containers/comptrie/comptrie.h +++ b/library/cpp/containers/comptrie/comptrie.h @@ -1,4 +1,4 @@ #pragma once - + #include "comptrie_trie.h" #include "comptrie_builder.h" diff --git a/library/cpp/containers/comptrie/comptrie_builder.h b/library/cpp/containers/comptrie/comptrie_builder.h index 156f93db39..cf7d2e39a3 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.h +++ b/library/cpp/containers/comptrie/comptrie_builder.h @@ -1,5 +1,5 @@ #pragma once - + #include "comptrie_packer.h" #include "minimize.h" #include "key_selector.h" @@ -12,7 +12,7 @@ // is created incrementally. It actually helps a lot to have the input data prefix-grouped // by key; otherwise, memory consumption becomes a tough issue. // NOTE: building and serializing the automaton may be lengthy, and takes lots of memory. - + // PREFIX_GROUPED means that if we, while constructing a trie, add to the builder two keys with the same prefix, // then all the keys that we add between these two also have the same prefix. // Actually in this mode the builder can accept even more freely ordered input, @@ -45,16 +45,16 @@ public: typedef S TPacker; typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey; typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf; - + explicit TCompactTrieBuilder(TCompactTrieBuilderFlags flags = CTBF_NONE, TPacker packer = TPacker(), IAllocator* alloc = TDefaultAllocator::Instance()); - + // All Add.. methods return true if it was a new key, false if the key already existed. bool Add(const TSymbol* key, size_t keylen, const TData& value); bool Add(const TKeyBuf& key, const TData& value) { return Add(key.data(), key.size(), value); } - + // add already serialized data bool AddPtr(const TSymbol* key, size_t keylen, const char* data); bool AddPtr(const TKeyBuf& key, const char* data) { @@ -80,14 +80,14 @@ public: bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr) const { return FindLongestPrefix(key.data(), key.size(), prefixLen, value); } - + size_t Save(IOutputStream& os) const; size_t SaveAndDestroy(IOutputStream& os); size_t SaveToFile(const TString& fileName) const { TFixedBufferFileOutput out(fileName); return Save(out); } - + void Clear(); // Returns all memory to the system and resets the builder state. size_t GetEntryCount() const; @@ -101,8 +101,8 @@ public: protected: class TCompactTrieBuilderImpl; THolder<TCompactTrieBuilderImpl> Impl; -}; - +}; + //---------------------------------------------------------------------------------------------------------------------- // Minimize the trie. The result is equivalent to the original // trie, except that it takes less space (and has marginally lower @@ -119,7 +119,7 @@ protected: template <class TPacker> size_t CompactTrieMinimize(IOutputStream& os, const char* data, size_t datalength, bool verbose = false, const TPacker& packer = TPacker(), NCompactTrie::EMinimizeMode mode = NCompactTrie::MM_DEFAULT); - + template <class TTrieBuilder> size_t CompactTrieMinimize(IOutputStream& os, const TTrieBuilder& builder, bool verbose = false); diff --git a/library/cpp/containers/comptrie/comptrie_builder.inl b/library/cpp/containers/comptrie/comptrie_builder.inl index a0c1097862..f273fa6571 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.inl +++ b/library/cpp/containers/comptrie/comptrie_builder.inl @@ -1,5 +1,5 @@ #pragma once - + #include "comptrie_impl.h" #include "comptrie_trie.h" #include "make_fast_layout.h" @@ -618,7 +618,7 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* } else { ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); } - + char* ckey = ckeybuf.Data(); TNode* next; @@ -907,9 +907,9 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcMeasure( size_t leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize) : 0; size_t rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize) : 0; leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize + leftoffsetsize + rightoffsetsize) : 0; - rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0; + rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0; leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize + leftoffsetsize + rightoffsetsize) : 0; - rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0; + rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0; coresize += leftoffsetsize + rightoffsetsize; thiz->LeftOffset = leftsize ? coresize + treesize : 0; diff --git a/library/cpp/containers/comptrie/comptrie_impl.cpp b/library/cpp/containers/comptrie/comptrie_impl.cpp index a8c1f76bfb..a116ab6d1e 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.cpp +++ b/library/cpp/containers/comptrie/comptrie_impl.cpp @@ -8,26 +8,26 @@ namespace NCompactTrie { size_t MeasureOffset(size_t offset) { int n = 0; - + while (offset) { offset >>= 8; ++n; } return n; - } + } size_t PackOffset(char* buffer, size_t offset) { size_t len = MeasureOffset(offset); size_t i = len; - + while (i--) { buffer[i] = (char)(offset & 0xFF); offset >>= 8; } return len; - } + } void ShowProgress(size_t n) { if (n % 1000000 == 0) @@ -35,5 +35,5 @@ namespace NCompactTrie { else if (n % 20000 == 0) Cerr << "."; } - -} + +} diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h index 9a3e7f7d90..f41c38311a 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.h +++ b/library/cpp/containers/comptrie/comptrie_impl.h @@ -1,7 +1,7 @@ #pragma once - + #include <util/stream/output.h> - + #ifndef COMPTRIE_DATA_CHECK #define COMPTRIE_DATA_CHECK 1 #endif diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 6665dfdf4d..40ec1e52b3 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -1,11 +1,11 @@ #pragma once - + #include "comptrie_impl.h" #include "comptrie_packer.h" #include "opaque_trie_iterator.h" #include "leaf_skipper.h" #include "key_selector.h" - + #include <util/generic/buffer.h> #include <util/generic/ptr.h> #include <util/generic/vector.h> @@ -30,12 +30,12 @@ class TPrefixIterator; // in case of <char> specialization cannot distinguish between "" and "\0" keys template <class T = char, class D = ui64, class S = TCompactTriePacker<D>> -class TCompactTrie { -public: - typedef T TSymbol; +class TCompactTrie { +public: + typedef T TSymbol; typedef D TData; typedef S TPacker; - + typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey; typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf; @@ -50,8 +50,8 @@ protected: const char* EmptyValue = nullptr; TPacker Packer; NCompactTrie::TPackerLeafSkipper<TPacker> Skipper = &Packer; // This should be true for every constructor. - -public: + +public: TCompactTrie() = default; TCompactTrie(const char* d, size_t len, TPacker packer); @@ -76,7 +76,7 @@ public: void Init(const char* d, size_t len, TPacker packer = TPacker()); void Init(const TBlob& data, TPacker packer = TPacker()); - + bool IsInitialized() const; bool IsEmpty() const; @@ -142,14 +142,14 @@ public: inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const; class TConstIterator { - private: + private: typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator; typedef NCompactTrie::TOpaqueTrie TOpaqueTrie; - friend class TCompactTrie; + friend class TCompactTrie; TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer); // only usable from UpperBound() method - public: + public: TConstIterator() = default; bool IsEmpty() const { return !Impl; @@ -162,7 +162,7 @@ public: TConstIterator& operator--(); TConstIterator operator--(int /*unused*/); TValueType operator*(); - + TKey GetKey() const; size_t GetKeySize() const; TData GetValue() const; @@ -172,13 +172,13 @@ public: private: TPacker Packer; TCopyPtr<TOpaqueTrieIterator> Impl; - }; - + }; + TConstIterator Begin() const; TConstIterator begin() const; TConstIterator End() const; TConstIterator end() const; - + // Returns an iterator pointing to the smallest key in the trie >= the argument. // TODO: misleading name. Should be called LowerBound for consistency with stl. // No. It is the STL that has a misleading name. @@ -203,22 +203,22 @@ protected: return LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext); } void LookupPhrases(const char* datapos, size_t len, const TSymbol* key, size_t keylen, TVector<TPhraseMatch>& matches, TSymbol separator) const; -}; - +}; + template <class T = char, class D = ui64, class S = TCompactTriePacker<D>> class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable { private: typedef TCompactTrie<T, D, S> TBase; TArrayHolder<char> Storage; - + public: TCompactTrieHolder(IInputStream& is, size_t len); }; - + //------------------------// // Implementation section // //------------------------// - + // TCompactTrie template <class T, class D, class S> diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 0afb79113f..74bee09b5d 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -116,25 +116,25 @@ private: UNIT_TEST_SUITE_END(); static const char* SampleData[]; - - template <class T> + + template <class T> void CreateTrie(IOutputStream& out, bool minimize, bool useFastLayout); - - template <class T> + + template <class T> void CheckData(const char* src, size_t len); - template <class T> + template <class T> void CheckUpperBound(const char* src, size_t len); template <class T> - void CheckIterator(const char* src, size_t len); - - template <class T> + void CheckIterator(const char* src, size_t len); + + template <class T> void TestTrie(bool minimize, bool useFastLayout); - - template <class T> - void TestTrieIterator(bool minimize); - + + template <class T> + void TestTrieIterator(bool minimize); + template <class T, bool minimize> void TestRandom(const size_t n, const size_t maxKeySize); @@ -167,34 +167,34 @@ private: class TDummyPacker; class TStrokaPacker; -public: +public: void TestPackers(); - void TestTrie8(); - void TestTrie16(); - void TestTrie32(); + void TestTrie8(); + void TestTrie16(); + void TestTrie32(); void TestFastTrie8(); void TestFastTrie16(); void TestFastTrie32(); - void TestMinimizedTrie8(); - void TestMinimizedTrie16(); - void TestMinimizedTrie32(); + void TestMinimizedTrie8(); + void TestMinimizedTrie16(); + void TestMinimizedTrie32(); void TestFastMinimizedTrie8(); void TestFastMinimizedTrie16(); void TestFastMinimizedTrie32(); - void TestTrieIterator8(); - void TestTrieIterator16(); - void TestTrieIterator32(); + void TestTrieIterator8(); + void TestTrieIterator16(); + void TestTrieIterator32(); - void TestMinimizedTrieIterator8(); - void TestMinimizedTrieIterator16(); - void TestMinimizedTrieIterator32(); + void TestMinimizedTrieIterator8(); + void TestMinimizedTrieIterator16(); + void TestMinimizedTrieIterator32(); - void TestPhraseSearch(); + void TestPhraseSearch(); void TestAddGet(); void TestEmpty(); void TestUninitializedNonEmpty(); @@ -279,13 +279,13 @@ const char* TCompactTrieTest::SampleData[] = { template <class T> typename TCompactTrie<T>::TKey MakeWideKey(const char* str, size_t len) { typename TCompactTrie<T>::TKey buffer; - for (size_t i = 0; i < len; i++) { - unsigned int ch = (str[i] & 0xFF); + for (size_t i = 0; i < len; i++) { + unsigned int ch = (str[i] & 0xFF); buffer.push_back((T)(ch | ch << 8 | ch << 16 | ch << 24)); - } - return buffer; -} - + } + return buffer; +} + template <class T> typename TCompactTrie<T>::TKey MakeWideKey(const TString& str) { return MakeWideKey<T>(str.c_str(), str.length()); @@ -308,13 +308,13 @@ void TCompactTrieTest::CreateTrie(IOutputStream& out, bool minimize, bool useFas TBufferOutput tmp2; IOutputStream& currentOutput = useFastLayout ? tmp2 : out; - if (minimize) { - TBufferOutput buftmp; - builder.Save(buftmp); + if (minimize) { + TBufferOutput buftmp; + builder.Save(buftmp); CompactTrieMinimize<TCompactTriePacker<ui64>>(currentOutput, buftmp.Buffer().Data(), buftmp.Buffer().Size(), false); - } else { + } else { builder.Save(currentOutput); - } + } if (useFastLayout) { CompactTrieMakeFastLayout<TCompactTriePacker<T>>(out, tmp2.Buffer().Data(), tmp2.Buffer().Size(), false); } @@ -358,23 +358,23 @@ void TCompactTrieTest::CheckUpperBound(const char* data, size_t datalen) { } template <class T> -void TCompactTrieTest::CheckData(const char* data, size_t datalen) { - TCompactTrie<T> trie(data, datalen); +void TCompactTrieTest::CheckData(const char* data, size_t datalen) { + TCompactTrie<T> trie(data, datalen); UNIT_ASSERT_VALUES_EQUAL(Y_ARRAY_SIZE(SampleData), trie.Size()); for (auto& i : SampleData) { size_t len = strlen(i); - ui64 value = 0; + ui64 value = 0; size_t prefixLen = 0; typename TCompactTrie<T>::TKey key = MakeWideKey<T>(i, len); UNIT_ASSERT(trie.Find(key, &value)); - UNIT_ASSERT_EQUAL(len * 2, value); + UNIT_ASSERT_EQUAL(len * 2, value); UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(len, prefixLen); UNIT_ASSERT_EQUAL(len * 2, value); - + TString badkey("bb"); badkey += i; key = MakeWideKey<T>(badkey); @@ -420,35 +420,35 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) { } template <class T> -void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) { +void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) { typedef typename TCompactTrie<T>::TKey TKey; typedef typename TCompactTrie<T>::TValueType TValue; TMap<TKey, ui64> stored; - + for (auto& i : SampleData) { size_t len = strlen(i); - + stored[MakeWideKey<T>(i, len)] = len * 2; - } - - TCompactTrie<T> trie(data, datalen); + } + + TCompactTrie<T> trie(data, datalen); TVector<TValue> items; - typename TCompactTrie<T>::TConstIterator it = trie.Begin(); - size_t entry_count = 0; + typename TCompactTrie<T>::TConstIterator it = trie.Begin(); + size_t entry_count = 0; TMap<TKey, ui64> received; - while (it != trie.End()) { + while (it != trie.End()) { UNIT_ASSERT_VALUES_EQUAL(it.GetKeySize(), it.GetKey().size()); - received.insert(*it); + received.insert(*it); items.push_back(*it); - entry_count++; + entry_count++; it++; - } + } TMap<TKey, ui64> received2; for (std::pair<TKey, ui64> x : trie) { received2.insert(x); } - UNIT_ASSERT(entry_count == stored.size()); - UNIT_ASSERT(received == stored); + UNIT_ASSERT(entry_count == stored.size()); + UNIT_ASSERT(received == stored); UNIT_ASSERT(received2 == stored); std::reverse(items.begin(), items.end()); @@ -483,21 +483,21 @@ void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) { UNIT_ASSERT(revIt == emptyIt); UNIT_ASSERT(revIt.IsEmpty()); UNIT_ASSERT(revIt != trie.End()); -} - +} + template <class T> void TCompactTrieTest::TestTrie(bool minimize, bool useFastLayout) { TBufferOutput bufout; CreateTrie<T>(bufout, minimize, useFastLayout); - CheckData<T>(bufout.Buffer().Data(), bufout.Buffer().Size()); + CheckData<T>(bufout.Buffer().Data(), bufout.Buffer().Size()); CheckUpperBound<T>(bufout.Buffer().Data(), bufout.Buffer().Size()); } template <class T> -void TCompactTrieTest::TestTrieIterator(bool minimize) { +void TCompactTrieTest::TestTrieIterator(bool minimize) { TBufferOutput bufout; CreateTrie<T>(bufout, minimize, false); - CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size()); + CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size()); } void TCompactTrieTest::TestTrie8() { @@ -519,7 +519,7 @@ void TCompactTrieTest::TestFastTrie16() { void TCompactTrieTest::TestFastTrie32() { TestTrie<wchar32>(false, true); } - + void TCompactTrieTest::TestMinimizedTrie8() { TestTrie<char>(true, false); } @@ -529,7 +529,7 @@ void TCompactTrieTest::TestMinimizedTrie16() { void TCompactTrieTest::TestMinimizedTrie32() { TestTrie<wchar32>(true, false); } - + void TCompactTrieTest::TestFastMinimizedTrie8() { TestTrie<char>(true, true); } @@ -559,56 +559,56 @@ void TCompactTrieTest::TestMinimizedTrieIterator16() { void TCompactTrieTest::TestMinimizedTrieIterator32() { TestTrieIterator<wchar32>(true); } - -void TCompactTrieTest::TestPhraseSearch() { + +void TCompactTrieTest::TestPhraseSearch() { static const char* phrases[] = {"ab", "ab cd", "ab cd ef"}; - static const char* const goodphrase = "ab cd ef gh"; - static const char* const badphrase = "cd ef gh ab"; - TBufferOutput bufout; - + static const char* const goodphrase = "ab cd ef gh"; + static const char* const badphrase = "cd ef gh ab"; + TBufferOutput bufout; + TCompactTrieBuilder<char> builder; for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) { - builder.Add(phrases[i], strlen(phrases[i]), i); - } - builder.Save(bufout); - - TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size()); + builder.Add(phrases[i], strlen(phrases[i]), i); + } + builder.Save(bufout); + + TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size()); TVector<TCompactTrie<char>::TPhraseMatch> matches; - trie.FindPhrases(goodphrase, strlen(goodphrase), matches); - + trie.FindPhrases(goodphrase, strlen(goodphrase), matches); + UNIT_ASSERT(matches.size() == Y_ARRAY_SIZE(phrases)); for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) { - UNIT_ASSERT(matches[i].first == strlen(phrases[i])); - UNIT_ASSERT(matches[i].second == i); - } - - trie.FindPhrases(badphrase, strlen(badphrase), matches); - UNIT_ASSERT(matches.size() == 0); -} - + UNIT_ASSERT(matches[i].first == strlen(phrases[i])); + UNIT_ASSERT(matches[i].second == i); + } + + trie.FindPhrases(badphrase, strlen(badphrase), matches); + UNIT_ASSERT(matches.size() == 0); +} + void TCompactTrieTest::TestAddGet() { - TCompactTrieBuilder<char> builder; - builder.Add("abcd", 4, 1); - builder.Add("acde", 4, 2); + TCompactTrieBuilder<char> builder; + builder.Add("abcd", 4, 1); + builder.Add("acde", 4, 2); ui64 dummy; - UNIT_ASSERT(builder.Find("abcd", 4, &dummy)); + UNIT_ASSERT(builder.Find("abcd", 4, &dummy)); UNIT_ASSERT(1 == dummy); - UNIT_ASSERT(builder.Find("acde", 4, &dummy)); + UNIT_ASSERT(builder.Find("acde", 4, &dummy)); UNIT_ASSERT(2 == dummy); - UNIT_ASSERT(!builder.Find("fgdgfacde", 9, &dummy)); - UNIT_ASSERT(!builder.Find("ab", 2, &dummy)); + UNIT_ASSERT(!builder.Find("fgdgfacde", 9, &dummy)); + UNIT_ASSERT(!builder.Find("ab", 2, &dummy)); } void TCompactTrieTest::TestEmpty() { - TCompactTrieBuilder<char> builder; + TCompactTrieBuilder<char> builder; ui64 dummy = 12345; size_t prefixLen; - UNIT_ASSERT(!builder.Find("abc", 3, &dummy)); + UNIT_ASSERT(!builder.Find("abc", 3, &dummy)); TBufferOutput bufout; builder.Save(bufout); - TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size()); - UNIT_ASSERT(!trie.Find("abc", 3, &dummy)); + TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size()); + UNIT_ASSERT(!trie.Find("abc", 3, &dummy)); UNIT_ASSERT(!trie.Find("", 0, &dummy)); UNIT_ASSERT(!trie.FindLongestPrefix("abc", 3, &prefixLen, &dummy)); UNIT_ASSERT(!trie.FindLongestPrefix("", 0, &prefixLen, &dummy)); diff --git a/library/cpp/containers/comptrie/leaf_skipper.h b/library/cpp/containers/comptrie/leaf_skipper.h index 06e93e10dc..3959258948 100644 --- a/library/cpp/containers/comptrie/leaf_skipper.h +++ b/library/cpp/containers/comptrie/leaf_skipper.h @@ -1,7 +1,7 @@ #pragma once - + #include <cstddef> - + namespace NCompactTrie { class ILeafSkipper { public: diff --git a/library/cpp/containers/comptrie/make_fast_layout.cpp b/library/cpp/containers/comptrie/make_fast_layout.cpp index 5de128e0e9..ade78d7899 100644 --- a/library/cpp/containers/comptrie/make_fast_layout.cpp +++ b/library/cpp/containers/comptrie/make_fast_layout.cpp @@ -146,7 +146,7 @@ namespace NCompactTrie { explicit TOffsetIndex(TTrieNodeCounts& counts) { ParentCounts.Swap(counts); } - + void Add(size_t key, size_t value) { Data[key] = value; } @@ -154,7 +154,7 @@ namespace NCompactTrie { size_t Size() const { return Data.size(); } - + size_t Get(size_t key) { auto pos = Data.find(key); if (pos == Data.end()) { @@ -167,14 +167,14 @@ namespace NCompactTrie { } return result; } - + private: TTrieNodeCounts ParentCounts; THashMap<size_t, size_t> Data; }; //--------------------------------------------------------------------------------------- - + class TTrieMeasurer { public: TTrieMeasurer(const TOpaqueTrie& trie, bool verbose); @@ -226,7 +226,7 @@ namespace NCompactTrie { { Y_ASSERT(Trie.Data); } - + void TTrieMeasurer::Measure() { if (Verbose) { Cerr << "Measuring the trie..." << Endl; @@ -308,7 +308,7 @@ namespace NCompactTrie { , Verbose(verbose) { } - + bool Move() override { if (!Fresh) { CentralWord.pop_back(); @@ -318,11 +318,11 @@ namespace NCompactTrie { } return true; } - + const TNode& Get() const { return CentralWord.back(); } - + size_t GetLeafLength() const override { return Get().GetLeafLength(); } @@ -333,7 +333,7 @@ namespace NCompactTrie { return NPOS; return resultLength - BackIndex.Get(absoffset); } - + size_t RecreateNode(char* buffer, size_t resultLength) override { TWriteableNode newNode(Get(), Trie.Data); newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength); @@ -346,7 +346,7 @@ namespace NCompactTrie { MaxIndexSize = Max(MaxIndexSize, BackIndex.Size()); return len; } - + private: bool Fresh; TOpaqueTrie Trie; @@ -382,7 +382,7 @@ namespace NCompactTrie { } } } - + void FillCentralWord() { CentralWord.clear(); CentralWord.push_back(TNode(Trie.Data, Trace.back().Nodes.back(), Trie.SkipFunction)); @@ -462,6 +462,6 @@ namespace NCompactTrie { mes.Measure(); TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose); return WriteTrieBackwards(os, enumerator, verbose); - } + } } diff --git a/library/cpp/containers/comptrie/make_fast_layout.h b/library/cpp/containers/comptrie/make_fast_layout.h index a55e9a5689..b8fab5d65b 100644 --- a/library/cpp/containers/comptrie/make_fast_layout.h +++ b/library/cpp/containers/comptrie/make_fast_layout.h @@ -1,8 +1,8 @@ #pragma once - + #include "leaf_skipper.h" #include <cstddef> - + class IOutputStream; namespace NCompactTrie { diff --git a/library/cpp/containers/comptrie/minimize.cpp b/library/cpp/containers/comptrie/minimize.cpp index cb25a366f6..80d0b25217 100644 --- a/library/cpp/containers/comptrie/minimize.cpp +++ b/library/cpp/containers/comptrie/minimize.cpp @@ -15,7 +15,7 @@ namespace NCompactTrie { // nodes that have identical continuations (Daciuk's right language), // and repack the trie. Repacking is done in-place, so memory is less // of an issue; however, it may take considerable time. - + // IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout. // Because of non-local structure and epsilon links, it won't work // as you expect it to, and can destroy the trie in the making. @@ -23,7 +23,7 @@ namespace NCompactTrie { namespace { using TOffsetList = TVector<size_t>; using TPieceIndex = THashMap<size_t, TOffsetList>; - + using TSizePair = std::pair<size_t, size_t>; using TSizePairVector = TVector<TSizePair>; using TSizePairVectorVector = TVector<TSizePairVector>; @@ -36,38 +36,38 @@ namespace NCompactTrie { TOffsetMap() { Data.reserve(0x10000); } - + void Add(size_t key, size_t value) { size_t hikey = key & 0xFFFF; - + if (Data.size() <= hikey) Data.resize(hikey + 1); TSizePairVector& sublist = Data[hikey]; - + for (auto& it : sublist) { if (it.first == key) { it.second = value; - + return; } } - + sublist.push_back(TSizePair(key, value)); } bool Contains(size_t key) const { return (Get(key) != 0); - } + } size_t Get(size_t key) const { size_t hikey = key & 0xFFFF; - + if (Data.size() <= hikey) return 0; - + const TSizePairVector& sublist = Data[hikey]; - + for (const auto& it : sublist) { if (it.first == key) return it.second; @@ -76,7 +76,7 @@ namespace NCompactTrie { return 0; } }; - + class TOffsetDeltas { protected: TSizePairVector Data; @@ -88,10 +88,10 @@ namespace NCompactTrie { return; // no offset } else { TSizePair last = Data.back(); - + if (key <= last.first) { Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl; - + return; } @@ -100,19 +100,19 @@ namespace NCompactTrie { } Data.push_back(TSizePair(key, value)); - } + } size_t Get(size_t key) const { if (Data.empty()) return key; // difference is zero; - + if (key < Data.front().first) return key; - + // Binary search for the highest entry in the list that does not exceed the key size_t from = 0; size_t to = Data.size() - 1; - + while (from < to) { size_t midpoint = (from + to + 1) / 2; @@ -121,7 +121,7 @@ namespace NCompactTrie { else from = midpoint; } - + TSizePair entry = Data[from]; return key - entry.first + entry.second; @@ -139,7 +139,7 @@ namespace NCompactTrie { , Length(len) { } - + bool operator()(size_t p1, const size_t p2) { int res = memcmp(Data + p1, Data + p2, Length); @@ -159,13 +159,13 @@ namespace NCompactTrie { : Selector(0) { } - + TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction) : Node(data, offset, skipFunction) , Selector(0) { } - + bool IsFinal() const { return Node.IsFinal(); } @@ -174,7 +174,7 @@ namespace NCompactTrie { size_t NextNode(const TOffsetMap& mergedNodes) { while (Selector < 3) { size_t nextOffset = 0; - + switch (++Selector) { case 1: if (Node.GetRightOffset()) @@ -199,7 +199,7 @@ namespace NCompactTrie { return nextOffset; } return 0; - } + } }; class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator { @@ -209,7 +209,7 @@ namespace NCompactTrie { const TOffsetMap& MergeMap; TVector<TBranchPoint> Trace; TOffsetDeltas OffsetIndex; - + public: TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers) : Fresh(true) @@ -217,7 +217,7 @@ namespace NCompactTrie { , MergeMap(mergers) { } - + bool Move() override { if (Fresh) { Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction)); @@ -225,7 +225,7 @@ namespace NCompactTrie { } else { if (Trace.empty()) return false; - + Trace.pop_back(); if (Trace.empty()) @@ -233,7 +233,7 @@ namespace NCompactTrie { } size_t nextnode = Trace.back().NextNode(MergeMap); - + while (nextnode) { Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction)); nextnode = Trace.back().NextNode(MergeMap); @@ -241,30 +241,30 @@ namespace NCompactTrie { return (!Trace.empty()); } - + const TNode& Get() const { return Trace.back().Node; } size_t GetLeafLength() const override { return Get().GetLeafLength(); } - + // Returns recalculated offset from the end of the current node size_t PrepareOffset(size_t absoffset, size_t minilength) { if (!absoffset) return NPOS; - + if (MergeMap.Contains(absoffset)) absoffset = MergeMap.Get(absoffset); return minilength - OffsetIndex.Get(Trie.Length - absoffset); } - + size_t RecreateNode(char* buffer, size_t resultLength) override { TWriteableNode newNode(Get(), Trie.Data); newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength); newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength); newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength); - + if (!buffer) return newNode.Measure(); @@ -275,8 +275,8 @@ namespace NCompactTrie { } }; - } - + } + static void AddPiece(TPieceIndex& index, size_t offset, size_t len) { index[len].push_back(offset); } @@ -288,24 +288,24 @@ namespace NCompactTrie { TOffsetMap merger; // Start walking the trie from head. AddPiece(subtries, 0, trie.Length); - + size_t counter = 0; // Now consider all nodes with sizeable continuations for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) { TPieceIndex::iterator iit = subtries.find(curlen); - + if (iit == subtries.end()) continue; // fast forward to the next available length value TOffsetList& batch = iit->second; TPieceComparer comparer(trie.Data, curlen); Sort(batch.begin(), batch.end(), comparer); - + TOffsetList::iterator it = batch.begin(); while (it != batch.end()) { if (verbose) ShowProgress(++counter); - + size_t offset = *it; // Fill the array with the subnodes of the element @@ -322,11 +322,11 @@ namespace NCompactTrie { if (size_t forwardOffset = node.GetForwardOffset()) { AddPiece(subtries, forwardOffset, end - forwardOffset); } - + while (++it != batch.end()) { // Find next different; until then, just add the offsets to the list of merged nodes. size_t nextoffset = *it; - + if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen)) break; @@ -335,12 +335,12 @@ namespace NCompactTrie { } subtries.erase(curlen); - } + } if (verbose) { Cerr << counter << Endl; } return merger; - } + } size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) { if (!trie.Data || !trie.Length) { diff --git a/library/cpp/containers/comptrie/minimize.h b/library/cpp/containers/comptrie/minimize.h index d9fff3cfa5..baaa431d04 100644 --- a/library/cpp/containers/comptrie/minimize.h +++ b/library/cpp/containers/comptrie/minimize.h @@ -1,8 +1,8 @@ #pragma once - + #include "leaf_skipper.h" #include <cstddef> - + class IOutputStream; namespace NCompactTrie { diff --git a/library/cpp/containers/comptrie/node.cpp b/library/cpp/containers/comptrie/node.cpp index 1df102fb84..5fd22f15ec 100644 --- a/library/cpp/containers/comptrie/node.cpp +++ b/library/cpp/containers/comptrie/node.cpp @@ -1,7 +1,7 @@ #include "node.h" #include "leaf_skipper.h" #include "comptrie_impl.h" - + #include <util/system/yassert.h> #include <util/generic/yexception.h> @@ -16,7 +16,7 @@ namespace NCompactTrie { offset = 0; } } - + // We believe that epsilon links are found only on the forward-position and that afer jumping an epsilon link you come to an ordinary node. TNode::TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction) @@ -35,7 +35,7 @@ namespace NCompactTrie { char flags = *(datapos++); Y_ASSERT(!IsEpsilonLink(flags)); Label = *(datapos++); - + size_t leftsize = LeftOffsetLen(flags); size_t& leftOffset = Offsets[D_LEFT]; leftOffset = UnpackOffset(datapos, leftsize); @@ -43,7 +43,7 @@ namespace NCompactTrie { leftOffset += Offset; } datapos += leftsize; - + size_t rightsize = RightOffsetLen(flags); size_t& rightOffset = Offsets[D_RIGHT]; rightOffset = UnpackOffset(datapos, rightsize); @@ -72,8 +72,8 @@ namespace NCompactTrie { ythrow yexception() << "Corrupted epsilon link"; } forwardOffset += epsilonOffset; - } - } - } + } + } + } -} +} diff --git a/library/cpp/containers/comptrie/node.h b/library/cpp/containers/comptrie/node.h index 3194a1340f..d6f4317db0 100644 --- a/library/cpp/containers/comptrie/node.h +++ b/library/cpp/containers/comptrie/node.h @@ -4,7 +4,7 @@ namespace NCompactTrie { class ILeafSkipper; - + enum TDirection { D_LEFT, D_FINAL, @@ -42,11 +42,11 @@ namespace NCompactTrie { size_t GetCoreLength() const { return CoreLength; } - + size_t GetOffsetByDirection(TDirection direction) const { return Offsets[direction]; } - + size_t GetForwardOffset() const { return Offsets[D_NEXT]; } @@ -63,7 +63,7 @@ namespace NCompactTrie { bool IsFinal() const { return GetLeafOffset() != 0; } - + bool HasEpsilonLinkForward() const { return GetForwardOffset() > Offset + CoreLength; } @@ -76,5 +76,5 @@ namespace NCompactTrie { char Label; }; - + } diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp index 356c6a72ff..5fd3914be6 100644 --- a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp +++ b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp @@ -1,7 +1,7 @@ #include "opaque_trie_iterator.h" #include "comptrie_impl.h" #include "node.h" - + namespace NCompactTrie { TOpaqueTrieIterator::TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, size_t maxKeyLength) diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.h b/library/cpp/containers/comptrie/opaque_trie_iterator.h index 04d7134b07..195da3c191 100644 --- a/library/cpp/containers/comptrie/opaque_trie_iterator.h +++ b/library/cpp/containers/comptrie/opaque_trie_iterator.h @@ -1,10 +1,10 @@ #pragma once - + #include "comptrie_impl.h" #include "node.h" #include "key_selector.h" #include "leaf_skipper.h" - + #include <util/generic/vector.h> #include <util/generic/yexception.h> @@ -17,7 +17,7 @@ namespace NCompactTrie { const char* Data; size_t Limit; // valid data is in range [Data + Node.GetOffset(), Data + Limit) TDirection CurrentDirection; - + public: TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper); diff --git a/library/cpp/containers/comptrie/write_trie_backwards.cpp b/library/cpp/containers/comptrie/write_trie_backwards.cpp index 3109e342a7..fd8c28b0ed 100644 --- a/library/cpp/containers/comptrie/write_trie_backwards.cpp +++ b/library/cpp/containers/comptrie/write_trie_backwards.cpp @@ -11,23 +11,23 @@ namespace NCompactTrie { if (verbose) { Cerr << "Writing down the trie..." << Endl; } - + // Rewrite everything from the back, removing unused pieces. const size_t chunksize = 0x10000; TVector<char*> resultData; - + resultData.push_back(new char[chunksize]); char* chunkend = resultData.back() + chunksize; - + size_t resultLength = 0; size_t chunkLength = 0; - + size_t counter = 0; TBuffer bufferHolder; while (enumerator.Move()) { if (verbose) ShowProgress(++counter); - + size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be bufferHolder.Clear(); bufferHolder.Resize(bufferLength); @@ -35,7 +35,7 @@ namespace NCompactTrie { size_t nodelength = enumerator.RecreateNode(buffer, resultLength); Y_ASSERT(nodelength <= bufferLength); - + resultLength += nodelength; if (chunkLength + nodelength <= chunksize) { @@ -44,14 +44,14 @@ namespace NCompactTrie { } else { // allocate a new chunk memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength); chunkLength = chunkLength + nodelength - chunksize; - + resultData.push_back(new char[chunksize]); chunkend = resultData.back() + chunksize; - + while (chunkLength > chunksize) { // allocate a new chunks chunkLength -= chunksize; memcpy(chunkend - chunksize, buffer + chunkLength, chunksize); - + resultData.push_back(new char[chunksize]); chunkend = resultData.back() + chunksize; } @@ -70,11 +70,11 @@ namespace NCompactTrie { chunkLength = chunksize; delete[] chunk; resultData.pop_back(); - } + } return resultLength; - } - + } + size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) { char* data = const_cast<char*>(trie.Data); char* end = data + trie.Length; diff --git a/library/cpp/containers/comptrie/writeable_node.cpp b/library/cpp/containers/comptrie/writeable_node.cpp index b45f9f47ce..404003dbbd 100644 --- a/library/cpp/containers/comptrie/writeable_node.cpp +++ b/library/cpp/containers/comptrie/writeable_node.cpp @@ -12,7 +12,7 @@ namespace NCompactTrie { , Label(0) { } - + static size_t GetOffsetFromEnd(const TNode& node, size_t absOffset) { return absOffset ? absOffset - node.GetOffset() - node.GetCoreLength() : NPOS; } @@ -36,7 +36,7 @@ namespace NCompactTrie { do { lastLen = len; lastFwdLen = fwdLen; - + len = 2 + LeafLength; len += MeasureOffset(LeftOffset != NPOS ? LeftOffset + lastLen : 0); len += MeasureOffset(RightOffset != NPOS ? RightOffset + lastLen : 0); @@ -48,15 +48,15 @@ namespace NCompactTrie { fwdLen = MeasureOffset(ForwardOffset + lastFwdLen) + 1; len += fwdLen; } - + } while (lastLen != len || lastFwdLen != fwdLen); - + return len; } - + size_t TWriteableNode::Pack(char* buffer) const { const size_t length = Measure(); - + char flags = 0; if (LeafPos) { flags |= MT_FINAL; @@ -64,14 +64,14 @@ namespace NCompactTrie { if (ForwardOffset != NPOS) { flags |= MT_NEXT; } - + const size_t leftOffset = LeftOffset != NPOS ? LeftOffset + length : 0; const size_t rightOffset = RightOffset != NPOS ? RightOffset + length : 0; const size_t leftOffsetSize = MeasureOffset(leftOffset); const size_t rightOffsetSize = MeasureOffset(rightOffset); flags |= (leftOffsetSize << MT_LEFTSHIFT); flags |= (rightOffsetSize << MT_RIGHTSHIFT); - + buffer[0] = flags; buffer[1] = Label; size_t usedLen = 2; @@ -91,6 +91,6 @@ namespace NCompactTrie { } Y_ASSERT(usedLen == length); return usedLen; - } - -} + } + +} |