diff options
author | mowgli <mowgli@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
commit | 56c39b3cf908e7202b1f7551a1653681e8015607 (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers | |
parent | 89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (diff) | |
download | ydb-56c39b3cf908e7202b1f7551a1653681e8015607.tar.gz |
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers')
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_impl.h | 168 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_trie.h | 74 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_ut.cpp | 90 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/set.h | 60 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/containers/ring_buffer/ring_buffer.h | 134 |
6 files changed, 264 insertions, 264 deletions
diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h index c36da12e13..f41c38311a 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.h +++ b/library/cpp/containers/comptrie/comptrie_impl.h @@ -26,10 +26,10 @@ namespace NCompactTrie { return (sizeof(T) - 1) * 8; } - static inline bool IsEpsilonLink(const char flags) { - return !(flags & (MT_FINAL | MT_NEXT)); - } - + static inline bool IsEpsilonLink(const char flags) { + return !(flags & (MT_FINAL | MT_NEXT)); + } + static inline void TraverseEpsilon(const char*& datapos) { const char flags = *datapos; if (!IsEpsilonLink(flags)) { @@ -41,14 +41,14 @@ namespace NCompactTrie { datapos += offset; } - static inline size_t LeftOffsetLen(const char flags) { - return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK; - } - - static inline size_t RightOffsetLen(const char flags) { - return flags & MT_SIZEMASK; - } - + static inline size_t LeftOffsetLen(const char flags) { + return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK; + } + + static inline size_t RightOffsetLen(const char flags) { + return flags & MT_SIZEMASK; + } + void ShowProgress(size_t n); // just print dots } @@ -100,82 +100,82 @@ namespace NCompactTrie { os.Write(buf, len); return len; } - - // Unpack the offset to the next node. The encoding scheme can store offsets - // up to 7 bytes; whether they fit into size_t is another issue. + + // Unpack the offset to the next node. The encoding scheme can store offsets + // up to 7 bytes; whether they fit into size_t is another issue. Y_FORCE_INLINE size_t UnpackOffset(const char* p, size_t len) { - size_t result = 0; - - while (len--) - result = ((result << 8) | (*(p++) & 0xFF)); - - return result; - } - - // Auxiliary function: consumes one character from the input. Advances the data pointer - // to the position immediately preceding the value for the link just traversed (if any); - // returns flags associated with the link. If no arc with the required label is present, - // zeroes the data pointer. + size_t result = 0; + + while (len--) + result = ((result << 8) | (*(p++) & 0xFF)); + + return result; + } + + // Auxiliary function: consumes one character from the input. Advances the data pointer + // to the position immediately preceding the value for the link just traversed (if any); + // returns flags associated with the link. If no arc with the required label is present, + // zeroes the data pointer. Y_FORCE_INLINE char LeapByte(const char*& datapos, const char* dataend, char label) { - while (datapos < dataend) { - size_t offsetlength, offset; - const char* startpos = datapos; - char flags = *(datapos++); - - if (IsEpsilonLink(flags)) { - // Epsilon link - jump to the specified offset without further checks. - // These links are created during minimization: original uncompressed - // tree does not need them. (If we find a way to package 3 offset lengths - // into 1 byte, we could get rid of them; but it looks like they do no harm. + while (datapos < dataend) { + size_t offsetlength, offset; + const char* startpos = datapos; + char flags = *(datapos++); + + if (IsEpsilonLink(flags)) { + // Epsilon link - jump to the specified offset without further checks. + // These links are created during minimization: original uncompressed + // tree does not need them. (If we find a way to package 3 offset lengths + // into 1 byte, we could get rid of them; but it looks like they do no harm. Y_ASSERT(datapos < dataend); - offsetlength = flags & MT_SIZEMASK; - offset = UnpackOffset(datapos, offsetlength); - if (!offset) - break; - datapos = startpos + offset; - - continue; - } - - char ch = *(datapos++); - - // Left branch - offsetlength = LeftOffsetLen(flags); - if ((unsigned char)label < (unsigned char)ch) { - offset = UnpackOffset(datapos, offsetlength); - if (!offset) - break; - - datapos = startpos + offset; - - continue; - } - - datapos += offsetlength; - - // Right branch - offsetlength = RightOffsetLen(flags); - if ((unsigned char)label > (unsigned char)ch) { - offset = UnpackOffset(datapos, offsetlength); - - if (!offset) - break; - - datapos = startpos + offset; - - continue; - } - - // Got a match; return position right before the contents for the label - datapos += offsetlength; - return flags; - } - - // if we got here, we're past the dataend - bail out ASAP + offsetlength = flags & MT_SIZEMASK; + offset = UnpackOffset(datapos, offsetlength); + if (!offset) + break; + datapos = startpos + offset; + + continue; + } + + char ch = *(datapos++); + + // Left branch + offsetlength = LeftOffsetLen(flags); + if ((unsigned char)label < (unsigned char)ch) { + offset = UnpackOffset(datapos, offsetlength); + if (!offset) + break; + + datapos = startpos + offset; + + continue; + } + + datapos += offsetlength; + + // Right branch + offsetlength = RightOffsetLen(flags); + if ((unsigned char)label > (unsigned char)ch) { + offset = UnpackOffset(datapos, offsetlength); + + if (!offset) + break; + + datapos = startpos + offset; + + continue; + } + + // Got a match; return position right before the contents for the label + datapos += offsetlength; + return flags; + } + + // if we got here, we're past the dataend - bail out ASAP datapos = nullptr; - return 0; - } - + return 0; + } + // Auxiliary function: consumes one (multibyte) symbol from the input. // Advances the data pointer to the root of the subtrie beginning after the symbol, // zeroes it if this subtrie is empty. diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 9bf4d61825..40ec1e52b3 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -127,7 +127,7 @@ public: return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext); } - // Return trie, containing all tails for the given key + // Return trie, containing all tails for the given key inline TCompactTrie<T, D, S> FindTails(const TSymbol* key, size_t keylen) const; TCompactTrie<T, D, S> FindTails(const TKeyBuf& key) const { return FindTails(key.data(), key.size()); @@ -137,10 +137,10 @@ public: return FindTails(key.data(), key.size(), res); } - // same as FindTails(&key, 1), a bit faster - // return false, if no arc with @label exists + // same as FindTails(&key, 1), a bit faster + // return false, if no arc with @label exists inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const; - + class TConstIterator { private: typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator; @@ -343,10 +343,10 @@ void TCompactTrie<T, D, S>::FindPhrases(const TSymbol* key, size_t keylen, TPhra template <class T, class D, class S> inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen) const { TCompactTrie<T, D, S> ret; - FindTails(key, keylen, ret); - return ret; -} - + FindTails(key, keylen, ret); + return ret; +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompactTrie<T, D, S>& res) const { using namespace NCompactTrie; @@ -354,11 +354,11 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac size_t len = DataHolder.Length(); if (!key || !len) - return false; + return false; if (!keylen) { - res = *this; - return true; + res = *this; + return true; } const char* datastart = DataHolder.AsCharPtr(); @@ -386,35 +386,35 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac } } - return false; + return false; } template <class T, class D, class S> inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const { - using namespace NCompactTrie; - + using namespace NCompactTrie; + const size_t len = DataHolder.Length(); - if (!len) - return false; - + if (!len) + return false; + const char* datastart = DataHolder.AsCharPtr(); - const char* dataend = datastart + len; - const char* datapos = datastart; + const char* dataend = datastart + len; + const char* datapos = datastart; const char* value = nullptr; if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer)) return false; - + if (datapos) { Y_ASSERT(datapos >= datastart); res = TCompactTrie<T, D, S>(TBlob::NoCopy(datapos, dataend - datapos), value); } else { res = TCompactTrie<T, D, S>(value); - } - + } + return true; -} - +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::Begin() const { NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper); @@ -495,30 +495,30 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle const char* const dataend = datapos + len; - const T* keyend = key + keylen; + const T* keyend = key + keylen; while (key != keyend) { T label = *(key++); - for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { + for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { const char flags = LeapByte(datapos, dataend, (char)(label >> i)); - if (!datapos) { - return found; // no such arc - } + if (!datapos) { + return found; // no such arc + } Y_ASSERT(datapos <= dataend); - if ((flags & MT_FINAL)) { + if ((flags & MT_FINAL)) { prefixLen = keylen - (keyend - key) - (i ? 1 : 0); valuepos = datapos; hasNext = flags & MT_NEXT; found = true; - if (!i && key == keyend) { // last byte, and got a match - return found; - } - datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes - } + if (!i && key == keyend) { // last byte, and got a match + return found; + } + datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes + } - if (!(flags & MT_NEXT)) { - return found; // no further way + if (!(flags & MT_NEXT)) { + return found; // no further way } } } diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 1a8dca293a..74bee09b5d 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -21,7 +21,7 @@ #include <util/string/cast.h> #include "comptrie.h" -#include "set.h" +#include "set.h" #include "first_symbol_iterator.h" #include "search_iterator.h" #include "pattern_searcher.h" @@ -74,7 +74,7 @@ private: UNIT_TEST(TestIterateEmptyKey); UNIT_TEST(TestTrieSet); - + UNIT_TEST(TestTrieForVectorInt64); UNIT_TEST(TestTrieForListInt64); UNIT_TEST(TestTrieForSetInt64); @@ -209,8 +209,8 @@ public: void TestClear(); void TestIterateEmptyKey(); - - void TestTrieSet(); + + void TestTrieSet(); void TestTrieForVectorInt64(); void TestTrieForListInt64(); @@ -1060,48 +1060,48 @@ void TCompactTrieTest::TestIterateEmptyKey() { UNIT_ASSERT(it.GetValue() == 1); } -void TCompactTrieTest::TestTrieSet() { - TBuffer buffer; - { - TCompactTrieSet<char>::TBuilder builder; - UNIT_ASSERT(builder.Add("a", 0)); - UNIT_ASSERT(builder.Add("ab", 1)); - UNIT_ASSERT(builder.Add("abc", 1)); - UNIT_ASSERT(builder.Add("abcd", 0)); - UNIT_ASSERT(!builder.Add("abcd", 1)); - - TBufferStream stream(buffer); - builder.Save(stream); - } - - TCompactTrieSet<char> set(TBlob::FromBuffer(buffer)); - UNIT_ASSERT(set.Has("a")); - UNIT_ASSERT(set.Has("ab")); - UNIT_ASSERT(set.Has("abc")); - UNIT_ASSERT(set.Has("abcd")); - UNIT_ASSERT(!set.Has("abcde")); - UNIT_ASSERT(!set.Has("aa")); - UNIT_ASSERT(!set.Has("b")); - UNIT_ASSERT(!set.Has("")); - - TCompactTrieSet<char> tails; - UNIT_ASSERT(set.FindTails("a", tails)); - UNIT_ASSERT(tails.Has("b")); - UNIT_ASSERT(tails.Has("bcd")); - UNIT_ASSERT(!tails.Has("ab")); - UNIT_ASSERT(!set.Has("")); - - TCompactTrieSet<char> empty; - UNIT_ASSERT(set.FindTails("abcd", empty)); - UNIT_ASSERT(!empty.Has("a")); - UNIT_ASSERT(!empty.Has("b")); - UNIT_ASSERT(!empty.Has("c")); - UNIT_ASSERT(!empty.Has("d")); - UNIT_ASSERT(!empty.Has("d")); - +void TCompactTrieTest::TestTrieSet() { + TBuffer buffer; + { + TCompactTrieSet<char>::TBuilder builder; + UNIT_ASSERT(builder.Add("a", 0)); + UNIT_ASSERT(builder.Add("ab", 1)); + UNIT_ASSERT(builder.Add("abc", 1)); + UNIT_ASSERT(builder.Add("abcd", 0)); + UNIT_ASSERT(!builder.Add("abcd", 1)); + + TBufferStream stream(buffer); + builder.Save(stream); + } + + TCompactTrieSet<char> set(TBlob::FromBuffer(buffer)); + UNIT_ASSERT(set.Has("a")); + UNIT_ASSERT(set.Has("ab")); + UNIT_ASSERT(set.Has("abc")); + UNIT_ASSERT(set.Has("abcd")); + UNIT_ASSERT(!set.Has("abcde")); + UNIT_ASSERT(!set.Has("aa")); + UNIT_ASSERT(!set.Has("b")); + UNIT_ASSERT(!set.Has("")); + + TCompactTrieSet<char> tails; + UNIT_ASSERT(set.FindTails("a", tails)); + UNIT_ASSERT(tails.Has("b")); + UNIT_ASSERT(tails.Has("bcd")); + UNIT_ASSERT(!tails.Has("ab")); + UNIT_ASSERT(!set.Has("")); + + TCompactTrieSet<char> empty; + UNIT_ASSERT(set.FindTails("abcd", empty)); + UNIT_ASSERT(!empty.Has("a")); + UNIT_ASSERT(!empty.Has("b")); + UNIT_ASSERT(!empty.Has("c")); + UNIT_ASSERT(!empty.Has("d")); + UNIT_ASSERT(!empty.Has("d")); + UNIT_ASSERT(empty.Has("")); // contains only empty string -} - +} + // Tests for trie with vector (list, set) values TVector<TUtf16String> TCompactTrieTest::GetSampleKeys(size_t nKeys) const { diff --git a/library/cpp/containers/comptrie/set.h b/library/cpp/containers/comptrie/set.h index e165e8650f..acd43338f0 100644 --- a/library/cpp/containers/comptrie/set.h +++ b/library/cpp/containers/comptrie/set.h @@ -1,40 +1,40 @@ #pragma once -#include "comptrie_trie.h" - -template <typename T = char> +#include "comptrie_trie.h" + +template <typename T = char> class TCompactTrieSet: public TCompactTrie<T, ui8, TNullPacker<ui8>> { -public: +public: typedef TCompactTrie<T, ui8, TNullPacker<ui8>> TBase; - + using typename TBase::TBuilder; - using typename TBase::TKey; - using typename TBase::TKeyBuf; + using typename TBase::TKey; + using typename TBase::TKeyBuf; using typename TBase::TSymbol; - + TCompactTrieSet() = default; - - explicit TCompactTrieSet(const TBlob& data) - : TBase(data) - { - } - - template <typename D> + + explicit TCompactTrieSet(const TBlob& data) + : TBase(data) + { + } + + template <typename D> explicit TCompactTrieSet(const TCompactTrie<T, D, TNullPacker<D>>& trie) : TBase(trie.Data()) // should be binary compatible for any D - { - } - - TCompactTrieSet(const char* data, size_t len) - : TBase(data, len) - { - } - - bool Has(const typename TBase::TKeyBuf& key) const { + { + } + + TCompactTrieSet(const char* data, size_t len) + : TBase(data, len) + { + } + + bool Has(const typename TBase::TKeyBuf& key) const { return TBase::Find(key.data(), key.size()); - } - - bool FindTails(const typename TBase::TKeyBuf& key, TCompactTrieSet<T>& res) const { - return TBase::FindTails(key, res); - } -}; + } + + bool FindTails(const typename TBase::TKeyBuf& key, TCompactTrieSet<T>& res) const { + return TBase::FindTails(key, res); + } +}; diff --git a/library/cpp/containers/comptrie/ya.make b/library/cpp/containers/comptrie/ya.make index 7a83c353bd..81352da4b2 100644 --- a/library/cpp/containers/comptrie/ya.make +++ b/library/cpp/containers/comptrie/ya.make @@ -11,7 +11,7 @@ SRCS( first_symbol_iterator.h key_selector.h leaf_skipper.h - set.h + set.h comptrie.cpp comptrie_builder.cpp comptrie_impl.cpp diff --git a/library/cpp/containers/ring_buffer/ring_buffer.h b/library/cpp/containers/ring_buffer/ring_buffer.h index e1f232712c..41220dcf6b 100644 --- a/library/cpp/containers/ring_buffer/ring_buffer.h +++ b/library/cpp/containers/ring_buffer/ring_buffer.h @@ -1,81 +1,81 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/system/yassert.h> - -template <typename T> -class TSimpleRingBuffer { -public: - TSimpleRingBuffer(size_t maxSize) - : MaxSize(maxSize) - { - Items.reserve(MaxSize); - } - +#pragma once + +#include <util/generic/vector.h> +#include <util/system/yassert.h> + +template <typename T> +class TSimpleRingBuffer { +public: + TSimpleRingBuffer(size_t maxSize) + : MaxSize(maxSize) + { + Items.reserve(MaxSize); + } + TSimpleRingBuffer(const TSimpleRingBuffer&) = default; TSimpleRingBuffer(TSimpleRingBuffer&&) = default; TSimpleRingBuffer& operator=(const TSimpleRingBuffer&) = default; TSimpleRingBuffer& operator=(TSimpleRingBuffer&&) = default; - // First available item - size_t FirstIndex() const { - return Begin; - } - - size_t AvailSize() const { - return Items.size(); - } - - // Total number of items inserted - size_t TotalSize() const { - return FirstIndex() + AvailSize(); - } - - bool IsAvail(size_t index) const { - return index >= FirstIndex() && index < TotalSize(); - } - - const T& operator[](size_t index) const { + // First available item + size_t FirstIndex() const { + return Begin; + } + + size_t AvailSize() const { + return Items.size(); + } + + // Total number of items inserted + size_t TotalSize() const { + return FirstIndex() + AvailSize(); + } + + bool IsAvail(size_t index) const { + return index >= FirstIndex() && index < TotalSize(); + } + + const T& operator[](size_t index) const { Y_ASSERT(IsAvail(index)); - return Items[RealIndex(index)]; - } - - T& operator[](size_t index) { + return Items[RealIndex(index)]; + } + + T& operator[](size_t index) { Y_ASSERT(IsAvail(index)); - return Items[RealIndex(index)]; - } - - void PushBack(const T& t) { - if (Items.size() < MaxSize) { - Items.push_back(t); - } else { - Items[RealIndex(Begin)] = t; - Begin += 1; - } - } - + return Items[RealIndex(index)]; + } + + void PushBack(const T& t) { + if (Items.size() < MaxSize) { + Items.push_back(t); + } else { + Items[RealIndex(Begin)] = t; + Begin += 1; + } + } + void Clear() { Items.clear(); Begin = 0; } -private: - size_t RealIndex(size_t index) const { - return index % MaxSize; - } - -private: - size_t MaxSize; - size_t Begin = 0; +private: + size_t RealIndex(size_t index) const { + return index % MaxSize; + } + +private: + size_t MaxSize; + size_t Begin = 0; TVector<T> Items; -}; - -template <typename T, size_t maxSize> -class TStaticRingBuffer: public TSimpleRingBuffer<T> { -public: - TStaticRingBuffer() - : TSimpleRingBuffer<T>(maxSize) - { - } -}; +}; + +template <typename T, size_t maxSize> +class TStaticRingBuffer: public TSimpleRingBuffer<T> { +public: + TStaticRingBuffer() + : TSimpleRingBuffer<T>(maxSize) + { + } +}; |