diff options
author | Vasily Gerasimov <UgnineSirdis@gmail.com> | 2022-02-10 16:49:09 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:09 +0300 |
commit | 6cdc8f140213c595e4ad38bc3d97fcef1146b8c3 (patch) | |
tree | f69637041e6fed76ebae0c74ae1fa0c4be6ab5b4 /library/cpp/containers/comptrie | |
parent | e5d4696304c6689379ac7ce334512404d4b7836c (diff) | |
download | ydb-6cdc8f140213c595e4ad38bc3d97fcef1146b8c3.tar.gz |
Restoring authorship annotation for Vasily Gerasimov <UgnineSirdis@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie')
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_builder.h | 10 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_builder.inl | 396 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_impl.h | 6 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_trie.h | 26 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_ut.cpp | 276 |
5 files changed, 357 insertions, 357 deletions
diff --git a/library/cpp/containers/comptrie/comptrie_builder.h b/library/cpp/containers/comptrie/comptrie_builder.h index cf7d2e39a3..f8a4926ef0 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.h +++ b/library/cpp/containers/comptrie/comptrie_builder.h @@ -46,7 +46,7 @@ public: typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey; typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf; - explicit TCompactTrieBuilder(TCompactTrieBuilderFlags flags = CTBF_NONE, TPacker packer = TPacker(), IAllocator* alloc = TDefaultAllocator::Instance()); + explicit TCompactTrieBuilder(TCompactTrieBuilderFlags flags = CTBF_NONE, TPacker packer = TPacker(), IAllocator* alloc = TDefaultAllocator::Instance()); // All Add.. methods return true if it was a new key, false if the key already existed. @@ -72,14 +72,14 @@ public: } bool Find(const TSymbol* key, size_t keylen, TData* value) const; - bool Find(const TKeyBuf& key, TData* value = nullptr) const { + bool Find(const TKeyBuf& key, TData* value = nullptr) const { return Find(key.data(), key.size(), value); } - bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr) const; - bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr) const { + bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr) const; + bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr) const { return FindLongestPrefix(key.data(), key.size(), prefixLen, value); - } + } size_t Save(IOutputStream& os) const; size_t SaveAndDestroy(IOutputStream& os); diff --git a/library/cpp/containers/comptrie/comptrie_builder.inl b/library/cpp/containers/comptrie/comptrie_builder.inl index f273fa6571..e1a99da902 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.inl +++ b/library/cpp/containers/comptrie/comptrie_builder.inl @@ -1,20 +1,20 @@ #pragma once #include "comptrie_impl.h" -#include "comptrie_trie.h" +#include "comptrie_trie.h" #include "make_fast_layout.h" #include "array_with_size.h" #include <library/cpp/containers/compact_vector/compact_vector.h> -#include <util/memory/alloc.h> +#include <util/memory/alloc.h> #include <util/memory/blob.h> #include <util/memory/pool.h> #include <util/memory/tempbuf.h> #include <util/memory/smallobj.h> #include <util/generic/algorithm.h> #include <util/generic/buffer.h> -#include <util/generic/strbuf.h> +#include <util/generic/strbuf.h> #include <util/system/align.h> #include <util/stream/buffer.h> @@ -49,8 +49,8 @@ protected: void ConvertSymbolArrayToChar(const TSymbol* key, size_t keylen, TTempBuf& buf, size_t ckeylen) const; void NodeLinkTo(TNode* thiz, const TBlob& label, TNode* node); TNode* NodeForwardAdd(TNode* thiz, const char* label, size_t len, size_t& passed, size_t* nodeCount); - bool FindEntryImpl(const char* key, size_t keylen, TData* value) const; - bool FindLongestPrefixImpl(const char* keyptr, size_t keylen, size_t* prefixLen, TData* value) const; + bool FindEntryImpl(const char* key, size_t keylen, TData* value) const; + bool FindLongestPrefixImpl(const char* keyptr, size_t keylen, size_t* prefixLen, TData* value) const; size_t NodeMeasureSubtree(TNode* thiz) const; ui64 NodeSaveSubtree(TNode* thiz, IOutputStream& os) const; @@ -67,7 +67,7 @@ protected: ui64 ArcSaveAndDestroy(const TArc* thiz, IOutputStream& os); public: - TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc); + TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc); virtual ~TCompactTrieBuilderImpl(); void DestroyNode(TNode* node); @@ -81,14 +81,14 @@ public: bool AddSubtreeInFile(const TSymbol* key, size_t keylen, const TString& fileName); bool AddSubtreeInBuffer(const TSymbol* key, size_t keylen, TArrayWithSizeHolder<char>&& buffer); bool FindEntry(const TSymbol* key, size_t keylen, TData* value) const; - bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const; + bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const; size_t Save(IOutputStream& os) const; size_t SaveAndDestroy(IOutputStream& os); void Clear(); - // lies if some key was added at least twice + // lies if some key was added at least twice size_t GetEntryCount() const; size_t GetNodeCount() const; @@ -121,25 +121,25 @@ public: virtual ui64 Save(const TBuilderImpl* builder, IOutputStream& os) const = 0; virtual ui64 SaveAndDestroy(TBuilderImpl* builder, IOutputStream& os) = 0; virtual void Destroy(TBuilderImpl*) { } - - // Tries to find key in subtree. - // Returns next node to find the key in (to avoid recursive calls) - // If it has end result, writes it to @value and @result arguments and returns nullptr - virtual const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; - virtual const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; + + // Tries to find key in subtree. + // Returns next node to find the key in (to avoid recursive calls) + // If it has end result, writes it to @value and @result arguments and returns nullptr + virtual const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; + virtual const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; }; class TArcSet: public ISubtree, public TCompactVector<TArc> { public: typedef typename TCompactVector<TArc>::iterator iterator; - typedef typename TCompactVector<TArc>::const_iterator const_iterator; + typedef typename TCompactVector<TArc>::const_iterator const_iterator; - TArcSet() { + TArcSet() { Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree() - } - + } + iterator Find(char ch); - const_iterator Find(char ch) const; + const_iterator Find(char ch) const; void Add(const TBlob& s, TNode* node); bool IsLast() const override { @@ -148,9 +148,9 @@ public: const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override; const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override { - return Find(key, value, result, packer); - } - + return Find(key, value, result, packer); + } + ui64 Measure(const TBuilderImpl* builder) const override { return MeasureRange(builder, 0, this->size()); } @@ -217,40 +217,40 @@ public: struct TBufferedSubtree: public ISubtree { TArrayWithSizeHolder<char> Buffer; - TBufferedSubtree() { + TBufferedSubtree() { Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree() - } - + } + bool IsLast() const override { return Buffer.Empty(); } const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override { - if (Buffer.Empty()) { - result = false; - return nullptr; - } - - TCompactTrie<char, D, S> trie(Buffer.Get(), Buffer.Size(), packer); + if (Buffer.Empty()) { + result = false; + return nullptr; + } + + TCompactTrie<char, D, S> trie(Buffer.Get(), Buffer.Size(), packer); result = trie.Find(key.data(), key.size(), value); - - return nullptr; - } - + + return nullptr; + } + const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override { - if (Buffer.Empty()) { - result = false; - return nullptr; - } - - TCompactTrie<char, D, S> trie(Buffer.Get(), Buffer.Size(), packer); - size_t prefixLen = 0; + if (Buffer.Empty()) { + result = false; + return nullptr; + } + + TCompactTrie<char, D, S> trie(Buffer.Get(), Buffer.Size(), packer); + size_t prefixLen = 0; result = trie.FindLongestPrefix(key.data(), key.size(), &prefixLen, value); - key = key.SubStr(prefixLen); - - return nullptr; - } - + key = key.SubStr(prefixLen); + + return nullptr; + } + ui64 Measure(const TBuilderImpl*) const override { return Buffer.Size(); } @@ -283,7 +283,7 @@ public: Data.Reset(new TData); Data->FileName = fileName; Data->Size = size; - + Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree() } @@ -292,30 +292,30 @@ public: } const TNode* Find(TStringBuf& key, typename TCompactTrieBuilder::TData* value, bool& result, const TPacker& packer) const override { - if (!Data) { - result = false; - return nullptr; - } - - TCompactTrie<char, D, S> trie(TBlob::FromFile(Data->FileName), packer); + if (!Data) { + result = false; + return nullptr; + } + + TCompactTrie<char, D, S> trie(TBlob::FromFile(Data->FileName), packer); result = trie.Find(key.data(), key.size(), value); - return nullptr; - } - + return nullptr; + } + const TNode* FindLongestPrefix(TStringBuf& key, typename TCompactTrieBuilder::TData* value, bool& result, const TPacker& packer) const override { - if (!Data) { - result = false; - return nullptr; - } - - TCompactTrie<char, D, S> trie(TBlob::FromFile(Data->FileName), packer); - size_t prefixLen = 0; + if (!Data) { + result = false; + return nullptr; + } + + TCompactTrie<char, D, S> trie(TBlob::FromFile(Data->FileName), packer); + size_t prefixLen = 0; result = trie.FindLongestPrefix(key.data(), key.size(), &prefixLen, value); - key = key.SubStr(prefixLen); - - return nullptr; - } - + key = key.SubStr(prefixLen); + + return nullptr; + } + ui64 Measure(const TBuilderImpl*) const override { return Data->Size; } @@ -351,26 +351,26 @@ public: EPayload PayloadType; - inline const char* PayloadPtr() const { - return ((const char*) this) + sizeof(TNode); - } - + inline const char* PayloadPtr() const { + return ((const char*) this) + sizeof(TNode); + } + inline char* PayloadPtr() { return ((char*) this) + sizeof(TNode); } // *Payload() - inline const char*& PayloadAsPtr() const { - const char** payload = (const char**) PayloadPtr(); - return *payload; - } - + inline const char*& PayloadAsPtr() const { + const char** payload = (const char**) PayloadPtr(); + return *payload; + } + inline char*& PayloadAsPtr() { char** payload = (char**) PayloadPtr(); return *payload; } - inline const char* GetPayload() const { + inline const char* GetPayload() const { switch (PayloadType) { case DATA_INSIDE: return PayloadPtr(); @@ -383,11 +383,11 @@ public: } } - inline char* GetPayload() { - const TNode* thiz = this; - return const_cast<char*>(thiz->GetPayload()); // const_cast is to avoid copy-paste style - } - + inline char* GetPayload() { + const TNode* thiz = this; + return const_cast<char*>(thiz->GetPayload()); // const_cast is to avoid copy-paste style + } + bool IsFinal() const { return PayloadType != DATA_ABSENT; } @@ -420,8 +420,8 @@ public: // TCompactTrieBuilder template <class T, class D, class S> -TCompactTrieBuilder<T, D, S>::TCompactTrieBuilder(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc) - : Impl(new TCompactTrieBuilderImpl(flags, packer, alloc)) +TCompactTrieBuilder<T, D, S>::TCompactTrieBuilder(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc) + : Impl(new TCompactTrieBuilderImpl(flags, packer, alloc)) { } @@ -452,7 +452,7 @@ bool TCompactTrieBuilder<T, D, S>::Find(const TSymbol* key, size_t keylen, TData template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::FindLongestPrefix( - const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const { + const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const { return Impl->FindLongestPrefix(key, keylen, prefixlen, value); } @@ -484,10 +484,10 @@ size_t TCompactTrieBuilder<T, D, S>::GetNodeCount() const { // TCompactTrieBuilder::TCompactTrieBuilderImpl template <class T, class D, class S> -TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc) - : Pool(1000000, TMemoryPool::TLinearGrow::Instance(), alloc) +TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc) + : Pool(1000000, TMemoryPool::TLinearGrow::Instance(), alloc) , PayloadSize(sizeof(void*)) // XXX: find better value - , NodeAllocator(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, alloc)) + , NodeAllocator(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, alloc)) , Flags(flags) , EntryCount(0) , NodeCount(1) @@ -662,81 +662,81 @@ template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindEntry(const TSymbol* key, size_t keylen, TData* value) const { using namespace NCompactTrie; - if (!keylen) { - const char zero = '\0'; - return FindEntryImpl(&zero, 1, value); - } else { - size_t ckeylen = keylen * sizeof(TSymbol); - TTempBuf ckeybuf(ckeylen); - ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); - return FindEntryImpl(ckeybuf.Data(), ckeylen, value); + if (!keylen) { + const char zero = '\0'; + return FindEntryImpl(&zero, 1, value); + } else { + size_t ckeylen = keylen * sizeof(TSymbol); + TTempBuf ckeybuf(ckeylen); + ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); + return FindEntryImpl(ckeybuf.Data(), ckeylen, value); } -} - -template <class T, class D, class S> -bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindEntryImpl(const char* keyptr, size_t keylen, TData* value) const { - const TNode* node = Root; - bool result = false; - TStringBuf key(keyptr, keylen); - while (key && (node = node->Subtree()->Find(key, value, result, Packer))) { +} + +template <class T, class D, class S> +bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindEntryImpl(const char* keyptr, size_t keylen, TData* value) const { + const TNode* node = Root; + bool result = false; + TStringBuf key(keyptr, keylen); + while (key && (node = node->Subtree()->Find(key, value, result, Packer))) { } - return result; + return result; } template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindLongestPrefix( - const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const { + const TSymbol* key, size_t keylen, size_t* prefixlen, TData* value) const { using namespace NCompactTrie; - if (!keylen) { - const char zero = '\0'; - const bool ret = FindLongestPrefixImpl(&zero, 1, prefixlen, value); - if (ret && prefixlen) - *prefixlen = 0; // empty key found - return ret; - } else { - size_t ckeylen = keylen * sizeof(TSymbol); - TTempBuf ckeybuf(ckeylen); - ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); - bool ret = FindLongestPrefixImpl(ckeybuf.Data(), ckeylen, prefixlen, value); - if (ret && prefixlen && *prefixlen == 1 && ckeybuf.Data()[0] == '\0') - *prefixlen = 0; // if we have found empty key, set prefixlen to zero - else if (!ret) // try to find value with empty key, because empty key is prefix of a every key - ret = FindLongestPrefix(nullptr, 0, prefixlen, value); - - if (ret && prefixlen) - *prefixlen /= sizeof(TSymbol); - - return ret; + if (!keylen) { + const char zero = '\0'; + const bool ret = FindLongestPrefixImpl(&zero, 1, prefixlen, value); + if (ret && prefixlen) + *prefixlen = 0; // empty key found + return ret; + } else { + size_t ckeylen = keylen * sizeof(TSymbol); + TTempBuf ckeybuf(ckeylen); + ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); + bool ret = FindLongestPrefixImpl(ckeybuf.Data(), ckeylen, prefixlen, value); + if (ret && prefixlen && *prefixlen == 1 && ckeybuf.Data()[0] == '\0') + *prefixlen = 0; // if we have found empty key, set prefixlen to zero + else if (!ret) // try to find value with empty key, because empty key is prefix of a every key + ret = FindLongestPrefix(nullptr, 0, prefixlen, value); + + if (ret && prefixlen) + *prefixlen /= sizeof(TSymbol); + + return ret; } -} - -template <class T, class D, class S> -bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindLongestPrefixImpl(const char* keyptr, size_t keylen, size_t* prefixLen, TData* value) const { - const TNode* node = Root; - const TNode* lastFinalNode = nullptr; - bool endResult = false; - TStringBuf key(keyptr, keylen); - TStringBuf keyTail = key; - TStringBuf lastFinalKeyTail; - while (keyTail && (node = node->Subtree()->FindLongestPrefix(keyTail, value, endResult, Packer))) { - if (endResult) // no more ways to find prefix and prefix has been found - break; - - if (node->IsFinal()) { - lastFinalNode = node; - lastFinalKeyTail = keyTail; +} + +template <class T, class D, class S> +bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindLongestPrefixImpl(const char* keyptr, size_t keylen, size_t* prefixLen, TData* value) const { + const TNode* node = Root; + const TNode* lastFinalNode = nullptr; + bool endResult = false; + TStringBuf key(keyptr, keylen); + TStringBuf keyTail = key; + TStringBuf lastFinalKeyTail; + while (keyTail && (node = node->Subtree()->FindLongestPrefix(keyTail, value, endResult, Packer))) { + if (endResult) // no more ways to find prefix and prefix has been found + break; + + if (node->IsFinal()) { + lastFinalNode = node; + lastFinalKeyTail = keyTail; } } - if (!endResult && lastFinalNode) { + if (!endResult && lastFinalNode) { if (value) - Packer.UnpackLeaf(lastFinalNode->GetPayload(), *value); - keyTail = lastFinalKeyTail; - endResult = true; + Packer.UnpackLeaf(lastFinalNode->GetPayload(), *value); + keyTail = lastFinalKeyTail; + endResult = true; } - if (endResult && prefixLen) + if (endResult && prefixLen) *prefixLen = keyTail ? key.size() - keyTail.size() : key.size(); - return endResult; + return endResult; } template <class T, class D, class S> @@ -991,60 +991,60 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet:: } template <class T, class D, class S> -typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::const_iterator - TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Find(char ch) const { - using namespace NCompTriePrivate; - const_iterator it = LowerBound(this->begin(), this->end(), ch, TCmp()); - - if (it != this->end() && it->Label[0] == (unsigned char)ch) { - return it; - } - - return this->end(); -} - -template <class T, class D, class S> +typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::const_iterator + TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Find(char ch) const { + using namespace NCompTriePrivate; + const_iterator it = LowerBound(this->begin(), this->end(), ch, TCmp()); + + if (it != this->end() && it->Label[0] == (unsigned char)ch) { + return it; + } + + return this->end(); +} + +template <class T, class D, class S> void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Add(const TBlob& s, TNode* node) { using namespace NCompTriePrivate; this->insert(LowerBound(this->begin(), this->end(), s[0], TCmp()), TArc(s, node)); } -template <class T, class D, class S> -const typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* - TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Find( - TStringBuf& key, TData* value, bool& result, const TPacker& packer) const { - result = false; - if (!key) - return nullptr; - - const const_iterator it = Find(key[0]); - if (it != this->end()) { - const char* const arcLabel = it->Label.AsCharPtr(); - const size_t arcLabelLen = it->Label.Length(); +template <class T, class D, class S> +const typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* + TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Find( + TStringBuf& key, TData* value, bool& result, const TPacker& packer) const { + result = false; + if (!key) + return nullptr; + + const const_iterator it = Find(key[0]); + if (it != this->end()) { + const char* const arcLabel = it->Label.AsCharPtr(); + const size_t arcLabelLen = it->Label.Length(); if (key.size() >= arcLabelLen && memcmp(key.data(), arcLabel, arcLabelLen) == 0) { - const TStringBuf srcKey = key; - key = key.SubStr(arcLabelLen); - const TNode* const node = it->Node; + const TStringBuf srcKey = key; + key = key.SubStr(arcLabelLen); + const TNode* const node = it->Node; if (srcKey.size() == arcLabelLen) { - // unpack value of it->Node, if it has value - if (!node->IsFinal()) - return nullptr; - - if (value) - packer.UnpackLeaf(node->GetPayload(), *value); - - result = true; - return nullptr; - } - - // find in subtree - return node; - } - } - - return nullptr; -} - + // unpack value of it->Node, if it has value + if (!node->IsFinal()) + return nullptr; + + if (value) + packer.UnpackLeaf(node->GetPayload(), *value); + + result = true; + return nullptr; + } + + // find in subtree + return node; + } + } + + return nullptr; +} + // Different //---------------------------------------------------------------------------------------------------------------------- diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h index f41c38311a..d0ef94a518 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.h +++ b/library/cpp/containers/comptrie/comptrie_impl.h @@ -180,7 +180,7 @@ namespace NCompactTrie { // Advances the data pointer to the root of the subtrie beginning after the symbol, // zeroes it if this subtrie is empty. // If there is a value associated with the symbol, makes the value pointer point to it, - // otherwise sets it to nullptr. + // otherwise sets it to nullptr. // Returns true if the symbol was succesfully found in the trie, false otherwise. template <typename TSymbol, class TPacker> Y_FORCE_INLINE bool Advance(const char*& datapos, const char* const dataend, const char*& value, @@ -193,7 +193,7 @@ namespace NCompactTrie { return false; // no such arc } - value = nullptr; + value = nullptr; Y_ASSERT(datapos <= dataend); if ((flags & MT_FINAL)) { @@ -203,7 +203,7 @@ namespace NCompactTrie { if (!(flags & MT_NEXT)) { if (i == 0) { - datapos = nullptr; + datapos = nullptr; return true; } return false; // no further way diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 40ec1e52b3..f006f3cf79 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -80,8 +80,8 @@ public: bool IsInitialized() const; bool IsEmpty() const; - bool Find(const TSymbol* key, size_t keylen, TData* value = nullptr) const; - bool Find(const TKeyBuf& key, TData* value = nullptr) const { + bool Find(const TSymbol* key, size_t keylen, TData* value = nullptr) const; + bool Find(const TKeyBuf& key, TData* value = nullptr) const { return Find(key.data(), key.size(), value); } @@ -122,8 +122,8 @@ public: void FindPhrases(const TKeyBuf& key, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const { return FindPhrases(key.data(), key.size(), matches, separator); } - bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const; - bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const { + bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const; + bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const { return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext); } @@ -315,18 +315,18 @@ void TCompactTrie<T, D, S>::Init(const TBlob& data, TPacker packer) { template <class T, class D, class S> bool TCompactTrie<T, D, S>::IsInitialized() const { - return DataHolder.Data() != nullptr; + return DataHolder.Data() != nullptr; } template <class T, class D, class S> bool TCompactTrie<T, D, S>::IsEmpty() const { - return DataHolder.Size() == 0 && EmptyValue == nullptr; + return DataHolder.Size() == 0 && EmptyValue == nullptr; } template <class T, class D, class S> -bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value) const { +bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value) const { size_t prefixLen = 0; - const char* valuepos = nullptr; + const char* valuepos = nullptr; bool hasNext; if (!LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext) || prefixLen != keylen) return false; @@ -366,7 +366,7 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac const char* const dataend = datapos + len; const TSymbol* keyend = key + keylen; - const char* value = nullptr; + const char* value = nullptr; while (key != keyend) { T label = *(key++); @@ -400,7 +400,7 @@ inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S const char* datastart = DataHolder.AsCharPtr(); const char* dataend = datastart + len; const char* datapos = datastart; - const char* value = nullptr; + const char* value = nullptr; if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer)) return false; @@ -460,8 +460,8 @@ void TCompactTrie<T, D, S>::Print(IOutputStream& os) { } template <class T, class D, class S> -bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value, bool* hasNext) const { - const char* valuepos = nullptr; +bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value, bool* hasNext) const { + const char* valuepos = nullptr; size_t tempPrefixLen = 0; bool tempHasNext; bool found = LookupLongestPrefix(key, keylen, tempPrefixLen, valuepos, tempHasNext); @@ -475,7 +475,7 @@ bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, } template <class T, class D, class S> -bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const { +bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const { using namespace NCompactTrie; const char* datapos = DataHolder.AsCharPtr(); diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 74bee09b5d..707468d90e 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -5,7 +5,7 @@ #include <utility> #include <util/charset/wide.h> -#include <util/generic/algorithm.h> +#include <util/generic/algorithm.h> #include <util/generic/buffer.h> #include <util/generic/map.h> #include <util/generic/vector.h> @@ -17,9 +17,9 @@ #include <util/random/random.h> #include <util/random/fast.h> -#include <util/string/hex.h> +#include <util/string/hex.h> #include <util/string/cast.h> - + #include "comptrie.h" #include "set.h" #include "first_symbol_iterator.h" @@ -27,8 +27,8 @@ #include "pattern_searcher.h" #include <array> -#include <iterator> - +#include <iterator> + class TCompactTrieTest: public TTestBase { private: @@ -108,7 +108,7 @@ private: UNIT_TEST(TestBuilderFindLongestPrefix); UNIT_TEST(TestBuilderFindLongestPrefixWithEmptyValue); - + UNIT_TEST(TestPatternSearcherEmpty); UNIT_TEST(TestPatternSearcherSimple); UNIT_TEST(TestPatternSearcherRandom); @@ -242,10 +242,10 @@ public: void TestFirstSymbolIteratorChar32(); void TestArrayPacker(); - - void TestBuilderFindLongestPrefix(); - void TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey); - void TestBuilderFindLongestPrefixWithEmptyValue(); + + void TestBuilderFindLongestPrefix(); + void TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey); + void TestBuilderFindLongestPrefixWithEmptyValue(); void TestPatternSearcherOnDataset( const TVector<TString>& patterns, @@ -396,7 +396,7 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) { UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(len, prefixLen); UNIT_ASSERT_EQUAL(len * 2, value); - UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, nullptr)); + UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, nullptr)); UNIT_ASSERT_EQUAL(len, prefixLen); } @@ -646,21 +646,21 @@ void TCompactTrieTest::TestUninitializedNonEmpty() { UNIT_ASSERT(it == tails.End()); } -static char RandChar() { - return char(RandomNumber<size_t>() % 256); -} - +static char RandChar() { + return char(RandomNumber<size_t>() % 256); +} + static TString RandStr(const size_t max) { size_t len = RandomNumber<size_t>() % max; TString key; for (size_t j = 0; j < len; ++j) - key += RandChar(); + key += RandChar(); return key; } template <class T, bool minimize> void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { - const TStringBuf EMPTY_KEY = TStringBuf("", 1); + const TStringBuf EMPTY_KEY = TStringBuf("", 1); TCompactTrieBuilder<char, typename T::TData, T> builder; typedef TMap<TString, typename T::TData> TKeys; TKeys keys; @@ -668,7 +668,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { typename T::TData dummy; for (size_t i = 0; i < n; ++i) { const TString key = RandStr(maxKeySize); - if (key != EMPTY_KEY && keys.find(key) == keys.end()) { + if (key != EMPTY_KEY && keys.find(key) == keys.end()) { const typename T::TData val = T::Data(key); keys[key] = val; UNIT_ASSERT_C(!builder.Find(key.data(), key.size(), &dummy), "key = " << HexEncode(TString(key))); @@ -691,7 +691,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { TCompactTrieBuilder<char, typename T::TData, T> prefixGroupedBuilder(CTBF_PREFIX_GROUPED); for (typename TKeys::const_iterator i = keys.begin(), mi = keys.end(); i != mi; ++i) { - UNIT_ASSERT(!prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); + UNIT_ASSERT(!prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); UNIT_ASSERT(trie.Find(i->first.c_str(), i->first.size(), &dummy)); UNIT_ASSERT(dummy == i->second); if (minimize) { @@ -700,17 +700,17 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { } prefixGroupedBuilder.Add(i->first.c_str(), i->first.size(), dummy); - UNIT_ASSERT(prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); - - for (typename TKeys::const_iterator j = keys.begin(), end = keys.end(); j != end; ++j) { - typename T::TData valFound; - if (j->first <= i->first) { - UNIT_ASSERT(prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound)); - UNIT_ASSERT_VALUES_EQUAL(j->second, valFound); - } else { - UNIT_ASSERT(!prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound)); - } - } + UNIT_ASSERT(prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); + + for (typename TKeys::const_iterator j = keys.begin(), end = keys.end(); j != end; ++j) { + typename T::TData valFound; + if (j->first <= i->first) { + UNIT_ASSERT(prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound)); + UNIT_ASSERT_VALUES_EQUAL(j->second, valFound); + } else { + UNIT_ASSERT(!prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound)); + } + } } TBufferStream prefixGroupedBuffer; @@ -790,18 +790,18 @@ void TCompactTrieTest::TestPrefixGrouped() { }; for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) { - ui32 val = strlen(data[i]) + 1; - b1.Add(data[i], strlen(data[i]), val); + ui32 val = strlen(data[i]) + 1; + b1.Add(data[i], strlen(data[i]), val); for (size_t j = 0; j < Y_ARRAY_SIZE(data); ++j) { - ui32 mustHave = strlen(data[j]) + 1; - ui32 found = 0; - if (j <= i) { - UNIT_ASSERT(b1.Find(data[j], strlen(data[j]), &found)); - UNIT_ASSERT_VALUES_EQUAL(mustHave, found); - } else { - UNIT_ASSERT(!b1.Find(data[j], strlen(data[j]), &found)); - } - } + ui32 mustHave = strlen(data[j]) + 1; + ui32 found = 0; + if (j <= i) { + UNIT_ASSERT(b1.Find(data[j], strlen(data[j]), &found)); + UNIT_ASSERT_VALUES_EQUAL(mustHave, found); + } else { + UNIT_ASSERT(!b1.Find(data[j], strlen(data[j]), &found)); + } + } } { @@ -1017,7 +1017,7 @@ class TCompactTrieTest::TDummyPacker: public TNullPacker<T> { public: static T Data(const TString&) { T data; - TNullPacker<T>().UnpackLeaf(nullptr, data); + TNullPacker<T>().UnpackLeaf(nullptr, data); return data; } @@ -1280,7 +1280,7 @@ void TCompactTrieTest::TestFindLongestPrefixWithEmptyValue() { } { TCompactTrie<wchar16, ui32> trie(buffer.Buffer().Data(), buffer.Buffer().Size()); - size_t prefixLen = 123; + size_t prefixLen = 123; ui32 value = 0; UNIT_ASSERT(trie.FindLongestPrefix(u"google", &prefixLen, &value)); @@ -1465,121 +1465,121 @@ void TCompactTrieTest::TestArrayPacker() { UNIT_ASSERT_VALUES_EQUAL(dataZzz.second, trieTwo.Get(dataZzz.first)); UNIT_ASSERT_VALUES_EQUAL(dataWww.second, trieTwo.Get(dataWww.first)); } - -void TCompactTrieTest::TestBuilderFindLongestPrefix() { - const size_t sizes[] = {10, 100}; + +void TCompactTrieTest::TestBuilderFindLongestPrefix() { + const size_t sizes[] = {10, 100}; const double branchProbabilities[] = {0.01, 0.1, 0.5, 0.9, 0.99}; - for (size_t size : sizes) { - for (double branchProbability : branchProbabilities) { - TestBuilderFindLongestPrefix(size, branchProbability, false, false); - TestBuilderFindLongestPrefix(size, branchProbability, false, true); - TestBuilderFindLongestPrefix(size, branchProbability, true, false); - TestBuilderFindLongestPrefix(size, branchProbability, true, true); - } - } -} - -void TCompactTrieTest::TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey) { + for (size_t size : sizes) { + for (double branchProbability : branchProbabilities) { + TestBuilderFindLongestPrefix(size, branchProbability, false, false); + TestBuilderFindLongestPrefix(size, branchProbability, false, true); + TestBuilderFindLongestPrefix(size, branchProbability, true, false); + TestBuilderFindLongestPrefix(size, branchProbability, true, true); + } + } +} + +void TCompactTrieTest::TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey) { TVector<TString> keys; TString keyToAdd; - for (size_t i = 0; i < keysCount; ++i) { - const size_t prevKeyLen = keyToAdd.Size(); - // add two random chars to prev key - keyToAdd += RandChar(); - keyToAdd += RandChar(); - const bool changeBranch = prevKeyLen && RandomNumber<double>() < branchProbability; - if (changeBranch) { - const size_t branchPlace = RandomNumber<size_t>(prevKeyLen + 1); // random place in [0, prevKeyLen] - *(keyToAdd.begin() + branchPlace) = RandChar(); - } - keys.push_back(keyToAdd); - } - - if (isPrefixGrouped) - Sort(keys.begin(), keys.end()); - else + for (size_t i = 0; i < keysCount; ++i) { + const size_t prevKeyLen = keyToAdd.Size(); + // add two random chars to prev key + keyToAdd += RandChar(); + keyToAdd += RandChar(); + const bool changeBranch = prevKeyLen && RandomNumber<double>() < branchProbability; + if (changeBranch) { + const size_t branchPlace = RandomNumber<size_t>(prevKeyLen + 1); // random place in [0, prevKeyLen] + *(keyToAdd.begin() + branchPlace) = RandChar(); + } + keys.push_back(keyToAdd); + } + + if (isPrefixGrouped) + Sort(keys.begin(), keys.end()); + else Shuffle(keys.begin(), keys.end()); - + TCompactTrieBuilder<char, TString> builder(isPrefixGrouped ? CTBF_PREFIX_GROUPED : CTBF_NONE); const TString EMPTY_VALUE = "empty"; - if (hasEmptyKey) - builder.Add(nullptr, 0, EMPTY_VALUE); - - for (size_t i = 0; i < keysCount; ++i) { + if (hasEmptyKey) + builder.Add(nullptr, 0, EMPTY_VALUE); + + for (size_t i = 0; i < keysCount; ++i) { const TString& key = keys[i]; - - for (size_t j = 0; j < keysCount; ++j) { + + for (size_t j = 0; j < keysCount; ++j) { const TString& otherKey = keys[j]; - const bool exists = j < i; - size_t expectedSize = 0; - if (exists) { - expectedSize = otherKey.size(); - } else { - size_t max = 0; - for (size_t k = 0; k < i; ++k) + const bool exists = j < i; + size_t expectedSize = 0; + if (exists) { + expectedSize = otherKey.size(); + } else { + size_t max = 0; + for (size_t k = 0; k < i; ++k) if (keys[k].Size() < otherKey.Size() && keys[k].Size() > max && otherKey.StartsWith(keys[k])) - max = keys[k].Size(); - expectedSize = max; - } - - size_t prefixSize = 0xfcfcfc; + max = keys[k].Size(); + expectedSize = max; + } + + size_t prefixSize = 0xfcfcfc; TString value = "abcd"; - const bool expectedResult = hasEmptyKey || expectedSize != 0; + const bool expectedResult = hasEmptyKey || expectedSize != 0; UNIT_ASSERT_VALUES_EQUAL_C(expectedResult, builder.FindLongestPrefix(otherKey.data(), otherKey.size(), &prefixSize, &value), "otherKey = " << HexEncode(otherKey)); - if (expectedResult) { - UNIT_ASSERT_VALUES_EQUAL(expectedSize, prefixSize); - if (expectedSize) { - UNIT_ASSERT_VALUES_EQUAL(TStringBuf(otherKey).SubStr(0, prefixSize), value); - } else { - UNIT_ASSERT_VALUES_EQUAL(EMPTY_VALUE, value); - } - } else { - UNIT_ASSERT_VALUES_EQUAL("abcd", value); - UNIT_ASSERT_VALUES_EQUAL(0xfcfcfc, prefixSize); - } - - for (int c = 0; c < 10; ++c) { + if (expectedResult) { + UNIT_ASSERT_VALUES_EQUAL(expectedSize, prefixSize); + if (expectedSize) { + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(otherKey).SubStr(0, prefixSize), value); + } else { + UNIT_ASSERT_VALUES_EQUAL(EMPTY_VALUE, value); + } + } else { + UNIT_ASSERT_VALUES_EQUAL("abcd", value); + UNIT_ASSERT_VALUES_EQUAL(0xfcfcfc, prefixSize); + } + + for (int c = 0; c < 10; ++c) { TString extendedKey = otherKey; - extendedKey += RandChar(); - size_t extendedPrefixSize = 0xdddddd; + extendedKey += RandChar(); + size_t extendedPrefixSize = 0xdddddd; TString extendedValue = "dcba"; UNIT_ASSERT_VALUES_EQUAL(expectedResult, builder.FindLongestPrefix(extendedKey.data(), extendedKey.size(), &extendedPrefixSize, &extendedValue)); - if (expectedResult) { - UNIT_ASSERT_VALUES_EQUAL(value, extendedValue); - UNIT_ASSERT_VALUES_EQUAL(prefixSize, extendedPrefixSize); - } else { - UNIT_ASSERT_VALUES_EQUAL("dcba", extendedValue); - UNIT_ASSERT_VALUES_EQUAL(0xdddddd, extendedPrefixSize); - } - } - } + if (expectedResult) { + UNIT_ASSERT_VALUES_EQUAL(value, extendedValue); + UNIT_ASSERT_VALUES_EQUAL(prefixSize, extendedPrefixSize); + } else { + UNIT_ASSERT_VALUES_EQUAL("dcba", extendedValue); + UNIT_ASSERT_VALUES_EQUAL(0xdddddd, extendedPrefixSize); + } + } + } builder.Add(key.data(), key.size(), key); - } - - TBufferOutput buffer; - builder.Save(buffer); -} - -void TCompactTrieTest::TestBuilderFindLongestPrefixWithEmptyValue() { - TCompactTrieBuilder<wchar16, ui32> builder; + } + + TBufferOutput buffer; + builder.Save(buffer); +} + +void TCompactTrieTest::TestBuilderFindLongestPrefixWithEmptyValue() { + TCompactTrieBuilder<wchar16, ui32> builder; builder.Add(u"", 42); builder.Add(u"yandex", 271828); builder.Add(u"ya", 31415); - - size_t prefixLen = 123; - ui32 value = 0; - + + size_t prefixLen = 123; + ui32 value = 0; + UNIT_ASSERT(builder.FindLongestPrefix(u"google", &prefixLen, &value)); - UNIT_ASSERT_VALUES_EQUAL(prefixLen, 0); - UNIT_ASSERT_VALUES_EQUAL(value, 42); - + UNIT_ASSERT_VALUES_EQUAL(prefixLen, 0); + UNIT_ASSERT_VALUES_EQUAL(value, 42); + UNIT_ASSERT(builder.FindLongestPrefix(u"yahoo", &prefixLen, &value)); - UNIT_ASSERT_VALUES_EQUAL(prefixLen, 2); - UNIT_ASSERT_VALUES_EQUAL(value, 31415); - - TBufferOutput buffer; - builder.Save(buffer); -} + UNIT_ASSERT_VALUES_EQUAL(prefixLen, 2); + UNIT_ASSERT_VALUES_EQUAL(value, 31415); + + TBufferOutput buffer; + builder.Save(buffer); +} void TCompactTrieTest::TestPatternSearcherEmpty() { TCompactPatternSearcherBuilder<char, ui32> builder; |