diff options
author | sereglond <sereglond@yandex-team.ru> | 2022-02-10 16:47:47 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:47 +0300 |
commit | 73bb02f2495181e0719a800f979df508924f4b71 (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c /library/cpp/containers/comptrie/comptrie_trie.h | |
parent | eb3d925534734c808602b31b38b953677f0a279f (diff) | |
download | ydb-73bb02f2495181e0719a800f979df508924f4b71.tar.gz |
Restoring authorship annotation for <sereglond@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie/comptrie_trie.h')
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_trie.h | 322 |
1 files changed, 161 insertions, 161 deletions
diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 2b1a42aa0a..40ec1e52b3 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -33,8 +33,8 @@ template <class T = char, class D = ui64, class S = TCompactTriePacker<D>> class TCompactTrie { public: typedef T TSymbol; - typedef D TData; - typedef S TPacker; + typedef D TData; + typedef S TPacker; typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey; typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf; @@ -42,7 +42,7 @@ public: typedef std::pair<TKey, TData> TValueType; typedef std::pair<size_t, TData> TPhraseMatch; typedef TVector<TPhraseMatch> TPhraseMatchVector; - + typedef TCompactTrieBuilder<T, D, S> TBuilder; protected: @@ -77,8 +77,8 @@ public: void Init(const char* d, size_t len, TPacker packer = TPacker()); void Init(const TBlob& data, TPacker packer = TPacker()); - bool IsInitialized() const; - bool IsEmpty() const; + bool IsInitialized() const; + bool IsEmpty() const; bool Find(const TSymbol* key, size_t keylen, TData* value = nullptr) const; bool Find(const TKeyBuf& key, TData* value = nullptr) const { @@ -118,7 +118,7 @@ public: return Skipper.GetPacker() == &Packer; } - void FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const; + void FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const; void FindPhrases(const TKeyBuf& key, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const { return FindPhrases(key.data(), key.size(), matches, separator); } @@ -141,14 +141,14 @@ public: // return false, if no arc with @label exists inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const; - class TConstIterator { + class TConstIterator { private: typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator; typedef NCompactTrie::TOpaqueTrie TOpaqueTrie; friend class TCompactTrie; TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer); // only usable from UpperBound() method - + public: TConstIterator() = default; bool IsEmpty() const { @@ -157,11 +157,11 @@ public: bool operator==(const TConstIterator& other) const; bool operator!=(const TConstIterator& other) const; - TConstIterator& operator++(); - TConstIterator operator++(int /*unused*/); + TConstIterator& operator++(); + TConstIterator operator++(int /*unused*/); TConstIterator& operator--(); TConstIterator operator--(int /*unused*/); - TValueType operator*(); + TValueType operator*(); TKey GetKey() const; size_t GetKeySize() const; @@ -169,14 +169,14 @@ public: void GetValue(TData& data) const; const char* GetValuePtr() const; - private: + private: TPacker Packer; TCopyPtr<TOpaqueTrieIterator> Impl; }; - TConstIterator Begin() const; + TConstIterator Begin() const; TConstIterator begin() const; - TConstIterator End() const; + TConstIterator End() const; TConstIterator end() const; // Returns an iterator pointing to the smallest key in the trie >= the argument. @@ -194,9 +194,9 @@ public: friend class TPrefixIterator<TCompactTrie>; protected: - explicit TCompactTrie(const char* emptyValue); + explicit TCompactTrie(const char* emptyValue); TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer = TPacker()); - + bool LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const; bool LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos) const { bool hasNext; @@ -207,49 +207,49 @@ protected: template <class T = char, class D = ui64, class S = TCompactTriePacker<D>> class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable { -private: +private: typedef TCompactTrie<T, D, S> TBase; TArrayHolder<char> Storage; -public: +public: TCompactTrieHolder(IInputStream& is, size_t len); -}; +}; + +//------------------------// +// Implementation section // +//------------------------// -//------------------------// -// Implementation section // -//------------------------// +// TCompactTrie -// TCompactTrie - template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, TPacker packer) - : DataHolder(data) + : DataHolder(data) , Packer(packer) -{ - Init(data, packer); -} - +{ + Init(data, packer); +} + template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const char* d, size_t len, TPacker packer) : Packer(packer) -{ +{ Init(d, len, packer); -} - +} + template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const char* emptyValue) - : EmptyValue(emptyValue) + : EmptyValue(emptyValue) { } - + template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer) - : DataHolder(data) - , EmptyValue(emptyValue) + : DataHolder(data) + , EmptyValue(emptyValue) , Packer(packer) { } - + template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const TCompactTrie& other) : DataHolder(other.DataHolder) @@ -289,43 +289,43 @@ TCompactTrie<T, D, S>& TCompactTrie<T, D, S>::operator=(TCompactTrie&& other) no template <class T, class D, class S> void TCompactTrie<T, D, S>::Init(const char* d, size_t len, TPacker packer) { Init(TBlob::NoCopy(d, len), packer); -} - +} + template <class T, class D, class S> void TCompactTrie<T, D, S>::Init(const TBlob& data, TPacker packer) { - using namespace NCompactTrie; - - DataHolder = data; + using namespace NCompactTrie; + + DataHolder = data; Packer = packer; - + const char* datapos = DataHolder.AsCharPtr(); size_t len = DataHolder.Length(); - if (!len) - return; - - const char* const dataend = datapos + len; - - const char* emptypos = datapos; - char flags = LeapByte(emptypos, dataend, 0); - if (emptypos && (flags & MT_FINAL)) { + if (!len) + return; + + const char* const dataend = datapos + len; + + const char* emptypos = datapos; + char flags = LeapByte(emptypos, dataend, 0); + if (emptypos && (flags & MT_FINAL)) { Y_ASSERT(emptypos <= dataend); - EmptyValue = emptypos; - } -} - + EmptyValue = emptypos; + } +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::IsInitialized() const { return DataHolder.Data() != nullptr; -} - +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::IsEmpty() const { return DataHolder.Size() == 0 && EmptyValue == nullptr; -} - +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value) const { - size_t prefixLen = 0; + size_t prefixLen = 0; const char* valuepos = nullptr; bool hasNext; if (!LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext) || prefixLen != keylen) @@ -333,13 +333,13 @@ bool TCompactTrie<T, D, S>::Find(const TSymbol* key, size_t keylen, TData* value if (value) Packer.UnpackLeaf(valuepos, *value); return true; -} - +} + template <class T, class D, class S> void TCompactTrie<T, D, S>::FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator) const { LookupPhrases(DataHolder.AsCharPtr(), DataHolder.Length(), key, keylen, matches, separator); -} - +} + template <class T, class D, class S> inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen) const { TCompactTrie<T, D, S> ret; @@ -349,46 +349,46 @@ inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key template <class T, class D, class S> bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompactTrie<T, D, S>& res) const { - using namespace NCompactTrie; - - size_t len = DataHolder.Length(); - - if (!key || !len) + using namespace NCompactTrie; + + size_t len = DataHolder.Length(); + + if (!key || !len) return false; - - if (!keylen) { + + if (!keylen) { res = *this; return true; - } - + } + const char* datastart = DataHolder.AsCharPtr(); const char* datapos = datastart; const char* const dataend = datapos + len; - const TSymbol* keyend = key + keylen; + const TSymbol* keyend = key + keylen; const char* value = nullptr; - while (key != keyend) { - T label = *(key++); + while (key != keyend) { + T label = *(key++); if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer)) return false; - + if (key == keyend) { if (datapos) { Y_ASSERT(datapos >= datastart); res = TCompactTrie<T, D, S>(TBlob::NoCopy(datapos, dataend - datapos), value); } else { res = TCompactTrie<T, D, S>(value); - } + } return true; } else if (!datapos) { return false; // No further way - } - } - + } + } + return false; -} - +} + template <class T, class D, class S> inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const { using namespace NCompactTrie; @@ -419,8 +419,8 @@ template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::Begin() const { NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper); return TConstIterator(self, EmptyValue, false, Packer); -} - +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::begin() const { return Begin(); @@ -430,8 +430,8 @@ template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::End() const { NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper); return TConstIterator(self, EmptyValue, true, Packer); -} - +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::end() const { return End(); @@ -462,11 +462,11 @@ void TCompactTrie<T, D, S>::Print(IOutputStream& os) { template <class T, class D, class S> bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value, bool* hasNext) const { const char* valuepos = nullptr; - size_t tempPrefixLen = 0; + size_t tempPrefixLen = 0; bool tempHasNext; bool found = LookupLongestPrefix(key, keylen, tempPrefixLen, valuepos, tempHasNext); - if (prefixLen) - *prefixLen = tempPrefixLen; + if (prefixLen) + *prefixLen = tempPrefixLen; if (found && value) Packer.UnpackLeaf(valuepos, *value); if (hasNext) @@ -476,38 +476,38 @@ bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, template <class T, class D, class S> bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keylen, size_t& prefixLen, const char*& valuepos, bool& hasNext) const { - using namespace NCompactTrie; - + using namespace NCompactTrie; + const char* datapos = DataHolder.AsCharPtr(); size_t len = DataHolder.Length(); - prefixLen = 0; + prefixLen = 0; hasNext = false; bool found = false; - if (EmptyValue) { - valuepos = EmptyValue; - found = true; - } - - if (!key || !len) + if (EmptyValue) { + valuepos = EmptyValue; + found = true; + } + + if (!key || !len) return found; - - const char* const dataend = datapos + len; - + + const char* const dataend = datapos + len; + const T* keyend = key + keylen; - while (key != keyend) { - T label = *(key++); + while (key != keyend) { + T label = *(key++); for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { const char flags = LeapByte(datapos, dataend, (char)(label >> i)); if (!datapos) { return found; // no such arc } - + Y_ASSERT(datapos <= dataend); if ((flags & MT_FINAL)) { prefixLen = keylen - (keyend - key) - (i ? 1 : 0); - valuepos = datapos; + valuepos = datapos; hasNext = flags & MT_NEXT; found = true; @@ -516,67 +516,67 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle } datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes } - + if (!(flags & MT_NEXT)) { return found; // no further way - } - } - } - + } + } + } + return found; -} - +} + template <class T, class D, class S> void TCompactTrie<T, D, S>::LookupPhrases( - const char* datapos, size_t len, const TSymbol* key, size_t keylen, + const char* datapos, size_t len, const TSymbol* key, size_t keylen, TVector<TPhraseMatch>& matches, TSymbol separator) const { - using namespace NCompactTrie; - - matches.clear(); - - if (!key || !len) - return; - - const T* const keystart = key; - const T* const keyend = key + keylen; - const char* const dataend = datapos + len; + using namespace NCompactTrie; + + matches.clear(); + + if (!key || !len) + return; + + const T* const keystart = key; + const T* const keyend = key + keylen; + const char* const dataend = datapos + len; while (datapos && key != keyend) { - T label = *(key++); + T label = *(key++); const char* value = nullptr; if (!Advance(datapos, dataend, value, label, Packer)) { return; - } + } if (value && (key == keyend || *key == separator)) { size_t matchlength = (size_t)(key - keystart); D data; Packer.UnpackLeaf(value, data); matches.push_back(TPhraseMatch(matchlength, data)); } - } -} - -// TCompactTrieHolder - -template <class T, class D, class S> + } +} + +// TCompactTrieHolder + +template <class T, class D, class S> TCompactTrieHolder<T, D, S>::TCompactTrieHolder(IInputStream& is, size_t len) - : Storage(new char[len]) -{ - if (is.Load(Storage.Get(), len) != len) { + : Storage(new char[len]) +{ + if (is.Load(Storage.Get(), len) != len) { ythrow yexception() << "bad data load"; - } - TBase::Init(Storage.Get(), len); -} - + } + TBase::Init(Storage.Get(), len); +} + //---------------------------------------------------------------------------------------------------------------- -// TCompactTrie::TConstIterator - +// TCompactTrie::TConstIterator + template <class T, class D, class S> TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer) : Packer(packer) , Impl(new TOpaqueTrieIterator(trie, emptyValue, atend)) { } - + template <class T, class D, class S> TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer) : Packer(packer) @@ -591,27 +591,27 @@ bool TCompactTrie<T, D, S>::TConstIterator::operator==(const TConstIterator& oth return !other.Impl; if (!other.Impl) return false; - return *Impl == *other.Impl; -} - + return *Impl == *other.Impl; +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::TConstIterator::operator!=(const TConstIterator& other) const { return !operator==(other); -} - +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator& TCompactTrie<T, D, S>::TConstIterator::operator++() { - Impl->Forward(); - return *this; -} - + Impl->Forward(); + return *this; +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::TConstIterator::operator++(int /*unused*/) { - TConstIterator copy(*this); - Impl->Forward(); - return copy; -} - + TConstIterator copy(*this); + Impl->Forward(); + return copy; +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator& TCompactTrie<T, D, S>::TConstIterator::operator--() { Impl->Backward(); @@ -627,14 +627,14 @@ typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::TConstIter template <class T, class D, class S> typename TCompactTrie<T, D, S>::TValueType TCompactTrie<T, D, S>::TConstIterator::operator*() { - return TValueType(GetKey(), GetValue()); -} - + return TValueType(GetKey(), GetValue()); +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TKey TCompactTrie<T, D, S>::TConstIterator::GetKey() const { return Impl->GetKey<TSymbol>(); -} - +} + template <class T, class D, class S> size_t TCompactTrie<T, D, S>::TConstIterator::GetKeySize() const { return Impl->MeasureKey<TSymbol>(); |