aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers/comptrie
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/containers/comptrie
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie')
-rw-r--r--library/cpp/containers/comptrie/array_with_size.h20
-rw-r--r--library/cpp/containers/comptrie/chunked_helpers_trie.h32
-rw-r--r--library/cpp/containers/comptrie/comptrie.cpp16
-rw-r--r--library/cpp/containers/comptrie/comptrie_builder.h10
-rw-r--r--library/cpp/containers/comptrie/comptrie_builder.inl20
-rw-r--r--library/cpp/containers/comptrie/comptrie_impl.cpp52
-rw-r--r--library/cpp/containers/comptrie/comptrie_impl.h34
-rw-r--r--library/cpp/containers/comptrie/comptrie_trie.h70
-rw-r--r--library/cpp/containers/comptrie/comptrie_ut.cpp464
-rw-r--r--library/cpp/containers/comptrie/first_symbol_iterator.h4
-rw-r--r--library/cpp/containers/comptrie/key_selector.h12
-rw-r--r--library/cpp/containers/comptrie/leaf_skipper.h20
-rw-r--r--library/cpp/containers/comptrie/loader/loader.h2
-rw-r--r--library/cpp/containers/comptrie/loader/loader_ut.cpp26
-rw-r--r--library/cpp/containers/comptrie/make_fast_layout.cpp852
-rw-r--r--library/cpp/containers/comptrie/make_fast_layout.h20
-rw-r--r--library/cpp/containers/comptrie/minimize.cpp678
-rw-r--r--library/cpp/containers/comptrie/minimize.h32
-rw-r--r--library/cpp/containers/comptrie/node.cpp124
-rw-r--r--library/cpp/containers/comptrie/node.h128
-rw-r--r--library/cpp/containers/comptrie/opaque_trie_iterator.cpp428
-rw-r--r--library/cpp/containers/comptrie/opaque_trie_iterator.h474
-rw-r--r--library/cpp/containers/comptrie/prefix_iterator.h6
-rw-r--r--library/cpp/containers/comptrie/protopacker.h2
-rw-r--r--library/cpp/containers/comptrie/search_iterator.h4
-rw-r--r--library/cpp/containers/comptrie/set.h16
-rw-r--r--library/cpp/containers/comptrie/write_trie_backwards.cpp188
-rw-r--r--library/cpp/containers/comptrie/writeable_node.cpp156
-rw-r--r--library/cpp/containers/comptrie/writeable_node.h30
-rw-r--r--library/cpp/containers/comptrie/ya.make6
30 files changed, 1963 insertions, 1963 deletions
diff --git a/library/cpp/containers/comptrie/array_with_size.h b/library/cpp/containers/comptrie/array_with_size.h
index 148200fe6a..36e61c7410 100644
--- a/library/cpp/containers/comptrie/array_with_size.h
+++ b/library/cpp/containers/comptrie/array_with_size.h
@@ -3,19 +3,19 @@
#include <util/generic/ptr.h>
#include <util/generic/noncopyable.h>
#include <util/generic/utility.h>
-#include <util/system/sys_alloc.h>
+#include <util/system/sys_alloc.h>
template <typename T>
-class TArrayWithSizeHolder : TNonCopyable {
+class TArrayWithSizeHolder : TNonCopyable {
typedef TArrayWithSizeHolder<T> TThis;
T* Data;
public:
- TArrayWithSizeHolder()
- : Data(nullptr)
- {
- }
+ TArrayWithSizeHolder()
+ : Data(nullptr)
+ {
+ }
~TArrayWithSizeHolder() {
if (!Data)
@@ -26,7 +26,7 @@ public:
} catch (...) {
}
}
- y_deallocate(((size_t*)Data) - 1);
+ y_deallocate(((size_t*)Data) - 1);
}
void Swap(TThis& copy) {
@@ -37,7 +37,7 @@ public:
if (newSize == Size())
return;
TThis copy;
- copy.Data = (T*)(((size_t*)y_allocate(sizeof(size_t) + sizeof(T) * newSize)) + 1);
+ copy.Data = (T*)(((size_t*)y_allocate(sizeof(size_t) + sizeof(T) * newSize)) + 1);
// does not handle constructor exceptions properly
for (size_t i = 0; i < Min(Size(), newSize); ++i) {
new (copy.Data + i) T(Data[i]);
@@ -45,12 +45,12 @@ public:
for (size_t i = Min(Size(), newSize); i < newSize; ++i) {
new (copy.Data + i) T;
}
- ((size_t*)copy.Data)[-1] = newSize;
+ ((size_t*)copy.Data)[-1] = newSize;
Swap(copy);
}
size_t Size() const {
- return Data ? ((size_t*)Data)[-1] : 0;
+ return Data ? ((size_t*)Data)[-1] : 0;
}
bool Empty() const {
diff --git a/library/cpp/containers/comptrie/chunked_helpers_trie.h b/library/cpp/containers/comptrie/chunked_helpers_trie.h
index 6efe55b5f0..cfa35f5ba2 100644
--- a/library/cpp/containers/comptrie/chunked_helpers_trie.h
+++ b/library/cpp/containers/comptrie/chunked_helpers_trie.h
@@ -13,7 +13,7 @@ public:
: Trie(blob)
{
}
-
+
bool Has(const char* key) const {
return Trie.Find(key, strlen(key));
}
@@ -23,7 +23,7 @@ public:
}
};
-template <bool sorted = false>
+template <bool sorted = false>
class TTrieSetWriter {
private:
TCompactTrieBuilder<char> Builder;
@@ -57,24 +57,24 @@ public:
}
};
-template <bool isWriter, bool sorted = false>
+template <bool isWriter, bool sorted = false>
struct TTrieSetG;
-template <bool sorted>
+template <bool sorted>
struct TTrieSetG<false, sorted> {
typedef TTrieSet T;
};
-template <bool sorted>
+template <bool sorted>
struct TTrieSetG<true, sorted> {
typedef TTrieSetWriter<sorted> T;
};
-template <typename T>
+template <typename T>
class TTrieMap {
private:
TCompactTrie<char> Trie;
- static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
+ static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
public:
TTrieMap(const TBlob& blob)
@@ -101,17 +101,17 @@ public:
}
}
- const TCompactTrie<char>& GetTrie() const {
+ const TCompactTrie<char>& GetTrie() const {
return Trie;
}
};
-template <typename T, bool sorted = false>
+template <typename T, bool sorted = false>
class TTrieMapWriter {
private:
typedef TCompactTrieBuilder<char> TBuilder;
TBuilder Builder;
- static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
+ static_assert(sizeof(T) <= sizeof(ui64), "expect sizeof(T) <= sizeof(ui64)");
#ifndef NDEBUG
bool IsSorted;
#endif
@@ -130,12 +130,12 @@ public:
memcpy(&intValue, &value, sizeof(T));
Builder.Add(key, strlen(key), intValue);
#ifndef NDEBUG
- /*
+ /*
if (!IsSorted) {
T test;
assert(Get(key, &test) && value == test);
}
- */
+ */
#endif
}
@@ -177,7 +177,7 @@ public:
}
};
-template <typename T>
+template <typename T>
class TTrieSortedMapWriter {
private:
typedef std::pair<TString, T> TValue;
@@ -204,15 +204,15 @@ public:
}
};
-template <typename X, bool isWriter, bool sorted = false>
+template <typename X, bool isWriter, bool sorted = false>
struct TTrieMapG;
-template <typename X, bool sorted>
+template <typename X, bool sorted>
struct TTrieMapG<X, false, sorted> {
typedef TTrieMap<X> T;
};
-template <typename X, bool sorted>
+template <typename X, bool sorted>
struct TTrieMapG<X, true, sorted> {
typedef TTrieMapWriter<X, sorted> T;
};
diff --git a/library/cpp/containers/comptrie/comptrie.cpp b/library/cpp/containers/comptrie/comptrie.cpp
index 86a006a231..4556e5b571 100644
--- a/library/cpp/containers/comptrie/comptrie.cpp
+++ b/library/cpp/containers/comptrie/comptrie.cpp
@@ -1,8 +1,8 @@
-#include "comptrie_impl.h"
-#include "comptrie.h"
-#include "array_with_size.h"
-#include "comptrie_trie.h"
-#include "comptrie_builder.h"
-#include "protopacker.h"
-#include "set.h"
-#include "chunked_helpers_trie.h"
+#include "comptrie_impl.h"
+#include "comptrie.h"
+#include "array_with_size.h"
+#include "comptrie_trie.h"
+#include "comptrie_builder.h"
+#include "protopacker.h"
+#include "set.h"
+#include "chunked_helpers_trie.h"
diff --git a/library/cpp/containers/comptrie/comptrie_builder.h b/library/cpp/containers/comptrie/comptrie_builder.h
index e0364eef9f..cf7d2e39a3 100644
--- a/library/cpp/containers/comptrie/comptrie_builder.h
+++ b/library/cpp/containers/comptrie/comptrie_builder.h
@@ -26,18 +26,18 @@ enum ECompactTrieBuilderFlags {
using TCompactTrieBuilderFlags = ECompactTrieBuilderFlags;
-inline TCompactTrieBuilderFlags operator|(TCompactTrieBuilderFlags first, TCompactTrieBuilderFlags second) {
+inline TCompactTrieBuilderFlags operator|(TCompactTrieBuilderFlags first, TCompactTrieBuilderFlags second) {
return static_cast<TCompactTrieBuilderFlags>(static_cast<int>(first) | second);
}
-inline TCompactTrieBuilderFlags& operator|=(TCompactTrieBuilderFlags& first, TCompactTrieBuilderFlags second) {
+inline TCompactTrieBuilderFlags& operator|=(TCompactTrieBuilderFlags& first, TCompactTrieBuilderFlags second) {
return first = first | second;
}
template <typename T>
class TArrayWithSizeHolder;
-template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
+template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
class TCompactTrieBuilder {
public:
typedef T TSymbol;
@@ -92,12 +92,12 @@ public:
size_t GetEntryCount() const;
size_t GetNodeCount() const;
-
+
// Exact output file size in bytes.
size_t MeasureByteSize() const {
return Impl->MeasureByteSize();
}
-
+
protected:
class TCompactTrieBuilderImpl;
THolder<TCompactTrieBuilderImpl> Impl;
diff --git a/library/cpp/containers/comptrie/comptrie_builder.inl b/library/cpp/containers/comptrie/comptrie_builder.inl
index e19f005d7c..f273fa6571 100644
--- a/library/cpp/containers/comptrie/comptrie_builder.inl
+++ b/library/cpp/containers/comptrie/comptrie_builder.inl
@@ -27,7 +27,7 @@
template <class T, class D, class S>
class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl {
protected:
- TMemoryPool Pool;
+ TMemoryPool Pool;
size_t PayloadSize;
THolder<TFixedSizeAllocator> NodeAllocator;
class TNode;
@@ -138,7 +138,7 @@ public:
Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree()
}
- iterator Find(char ch);
+ iterator Find(char ch);
const_iterator Find(char ch) const;
void Add(const TBlob& s, TNode* node);
@@ -422,8 +422,8 @@ public:
template <class T, class D, class S>
TCompactTrieBuilder<T, D, S>::TCompactTrieBuilder(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc)
: Impl(new TCompactTrieBuilderImpl(flags, packer, alloc))
-{
-}
+{
+}
template <class T, class D, class S>
bool TCompactTrieBuilder<T, D, S>::Add(const TSymbol* key, size_t keylen, const TData& value) {
@@ -742,7 +742,7 @@ bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindLongestPrefixImp
template <class T, class D, class S>
void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::Clear() {
DestroyNode(Root);
- Pool.Clear();
+ Pool.Clear();
NodeAllocator.Reset(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, TDefaultAllocator::Instance()));
Root = new (*NodeAllocator) TNode;
EntryCount = 0;
@@ -980,11 +980,11 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcSaveAndDestroy(co
template <class T, class D, class S>
typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::iterator
TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Find(char ch) {
- using namespace NCompTriePrivate;
- iterator it = LowerBound(this->begin(), this->end(), ch, TCmp());
-
+ using namespace NCompTriePrivate;
+ iterator it = LowerBound(this->begin(), this->end(), ch, TCmp());
+
if (it != this->end() && it->Label[0] == (unsigned char)ch) {
- return it;
+ return it;
}
return this->end();
@@ -1005,7 +1005,7 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::
template <class T, class D, class S>
void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode::TArcSet::Add(const TBlob& s, TNode* node) {
- using namespace NCompTriePrivate;
+ using namespace NCompTriePrivate;
this->insert(LowerBound(this->begin(), this->end(), s[0], TCmp()), TArc(s, node));
}
diff --git a/library/cpp/containers/comptrie/comptrie_impl.cpp b/library/cpp/containers/comptrie/comptrie_impl.cpp
index a293b45d1c..a116ab6d1e 100644
--- a/library/cpp/containers/comptrie/comptrie_impl.cpp
+++ b/library/cpp/containers/comptrie/comptrie_impl.cpp
@@ -6,34 +6,34 @@
// Unpack the leaf value. The algorithm can store up to 8 full bytes in leafs.
namespace NCompactTrie {
- size_t MeasureOffset(size_t offset) {
- int n = 0;
-
- while (offset) {
- offset >>= 8;
- ++n;
- }
-
- return n;
+ size_t MeasureOffset(size_t offset) {
+ int n = 0;
+
+ while (offset) {
+ offset >>= 8;
+ ++n;
+ }
+
+ return n;
}
-
- size_t PackOffset(char* buffer, size_t offset) {
- size_t len = MeasureOffset(offset);
- size_t i = len;
-
- while (i--) {
- buffer[i] = (char)(offset & 0xFF);
- offset >>= 8;
- }
-
- return len;
+
+ size_t PackOffset(char* buffer, size_t offset) {
+ size_t len = MeasureOffset(offset);
+ size_t i = len;
+
+ while (i--) {
+ buffer[i] = (char)(offset & 0xFF);
+ offset >>= 8;
+ }
+
+ return len;
}
-
- void ShowProgress(size_t n) {
- if (n % 1000000 == 0)
+
+ void ShowProgress(size_t n) {
+ if (n % 1000000 == 0)
Cerr << n << ", RSS=" << (TRusage::Get().MaxRss >> 20) << "mb" << Endl;
- else if (n % 20000 == 0)
- Cerr << ".";
- }
+ else if (n % 20000 == 0)
+ Cerr << ".";
+ }
}
diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h
index f4b0bf072e..f41c38311a 100644
--- a/library/cpp/containers/comptrie/comptrie_impl.h
+++ b/library/cpp/containers/comptrie/comptrie_impl.h
@@ -9,10 +9,10 @@
// NCompactTrie
namespace NCompactTrie {
- const char MT_FINAL = '\x80';
- const char MT_NEXT = '\x40';
+ const char MT_FINAL = '\x80';
+ const char MT_NEXT = '\x40';
const char MT_SIZEMASK = '\x07';
- const size_t MT_LEFTSHIFT = 3;
+ const size_t MT_LEFTSHIFT = 3;
const size_t MT_RIGHTSHIFT = 0;
Y_FORCE_INLINE size_t UnpackOffset(const char* p, size_t len);
@@ -72,22 +72,22 @@ namespace NCompTriePrivate {
typedef TUtf32String TResult;
};
-}
+}
-namespace NCompTriePrivate {
- struct TCmp {
- template <class T>
- inline bool operator()(const T& l, const T& r) {
+namespace NCompTriePrivate {
+ struct TCmp {
+ template <class T>
+ inline bool operator()(const T& l, const T& r) {
return (unsigned char)(l.Label[0]) < (unsigned char)(r.Label[0]);
- }
-
- template <class T>
- inline bool operator()(const T& l, char r) {
+ }
+
+ template <class T>
+ inline bool operator()(const T& l, char r) {
return (unsigned char)(l.Label[0]) < (unsigned char)r;
- }
- };
-}
-
+ }
+ };
+}
+
namespace NCompactTrie {
static inline ui64 ArcSaveOffset(size_t offset, IOutputStream& os) {
using namespace NCompactTrie;
@@ -184,7 +184,7 @@ namespace NCompactTrie {
// Returns true if the symbol was succesfully found in the trie, false otherwise.
template <typename TSymbol, class TPacker>
Y_FORCE_INLINE bool Advance(const char*& datapos, const char* const dataend, const char*& value,
- TSymbol label, TPacker packer) {
+ TSymbol label, TPacker packer) {
Y_ASSERT(datapos < dataend);
char flags = MT_NEXT;
for (int i = (int)ExtraBits<TSymbol>(); i >= 0; i -= 8) {
diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h
index 1fa8794897..40ec1e52b3 100644
--- a/library/cpp/containers/comptrie/comptrie_trie.h
+++ b/library/cpp/containers/comptrie/comptrie_trie.h
@@ -29,7 +29,7 @@ template <class TTrie>
class TPrefixIterator;
// in case of <char> specialization cannot distinguish between "" and "\0" keys
-template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
+template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
class TCompactTrie {
public:
typedef T TSymbol;
@@ -56,12 +56,12 @@ public:
TCompactTrie(const char* d, size_t len, TPacker packer);
TCompactTrie(const char* d, size_t len)
- : TCompactTrie{d, len, TPacker{}} {
+ : TCompactTrie{d, len, TPacker{}} {
}
TCompactTrie(const TBlob& data, TPacker packer);
explicit TCompactTrie(const TBlob& data)
- : TCompactTrie{data, TPacker{}} {
+ : TCompactTrie{data, TPacker{}} {
}
// Skipper should be initialized with &Packer, not with &other.Packer, so you have to redefine these.
@@ -126,7 +126,7 @@ public:
bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const {
return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext);
}
-
+
// Return trie, containing all tails for the given key
inline TCompactTrie<T, D, S> FindTails(const TSymbol* key, size_t keylen) const;
TCompactTrie<T, D, S> FindTails(const TKeyBuf& key) const {
@@ -146,17 +146,17 @@ public:
typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator;
typedef NCompactTrie::TOpaqueTrie TOpaqueTrie;
friend class TCompactTrie;
- TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods
+ TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods
TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer); // only usable from UpperBound() method
public:
TConstIterator() = default;
- bool IsEmpty() const {
- return !Impl;
- } // Almost no other method can be called.
+ bool IsEmpty() const {
+ return !Impl;
+ } // Almost no other method can be called.
- bool operator==(const TConstIterator& other) const;
- bool operator!=(const TConstIterator& other) const;
+ bool operator==(const TConstIterator& other) const;
+ bool operator!=(const TConstIterator& other) const;
TConstIterator& operator++();
TConstIterator operator++(int /*unused*/);
TConstIterator& operator--();
@@ -205,8 +205,8 @@ protected:
void LookupPhrases(const char* datapos, size_t len, const TSymbol* key, size_t keylen, TVector<TPhraseMatch>& matches, TSymbol separator) const;
};
-template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
-class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable {
+template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
+class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable {
private:
typedef TCompactTrie<T, D, S> TBase;
TArrayHolder<char> Storage;
@@ -239,35 +239,35 @@ TCompactTrie<T, D, S>::TCompactTrie(const char* d, size_t len, TPacker packer)
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const char* emptyValue)
: EmptyValue(emptyValue)
-{
-}
+{
+}
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer)
: DataHolder(data)
, EmptyValue(emptyValue)
, Packer(packer)
-{
-}
+{
+}
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(const TCompactTrie& other)
- : DataHolder(other.DataHolder)
- , EmptyValue(other.EmptyValue)
- , Packer(other.Packer)
-{
-}
+ : DataHolder(other.DataHolder)
+ , EmptyValue(other.EmptyValue)
+ , Packer(other.Packer)
+{
+}
template <class T, class D, class S>
TCompactTrie<T, D, S>::TCompactTrie(TCompactTrie&& other) noexcept
- : DataHolder(std::move(other.DataHolder))
- , EmptyValue(std::move(other.EmptyValue))
- , Packer(std::move(other.Packer))
-{
-}
+ : DataHolder(std::move(other.DataHolder))
+ , EmptyValue(std::move(other.EmptyValue))
+ , Packer(std::move(other.Packer))
+{
+}
template <class T, class D, class S>
-TCompactTrie<T, D, S>& TCompactTrie<T, D, S>::operator=(const TCompactTrie& other) {
+TCompactTrie<T, D, S>& TCompactTrie<T, D, S>::operator=(const TCompactTrie& other) {
if (this != &other) {
DataHolder = other.DataHolder;
EmptyValue = other.EmptyValue;
@@ -529,7 +529,7 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle
template <class T, class D, class S>
void TCompactTrie<T, D, S>::LookupPhrases(
const char* datapos, size_t len, const TSymbol* key, size_t keylen,
- TVector<TPhraseMatch>& matches, TSymbol separator) const {
+ TVector<TPhraseMatch>& matches, TSymbol separator) const {
using namespace NCompactTrie;
matches.clear();
@@ -572,10 +572,10 @@ TCompactTrieHolder<T, D, S>::TCompactTrieHolder(IInputStream& is, size_t len)
template <class T, class D, class S>
TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer)
- : Packer(packer)
- , Impl(new TOpaqueTrieIterator(trie, emptyValue, atend))
-{
-}
+ : Packer(packer)
+ , Impl(new TOpaqueTrieIterator(trie, emptyValue, atend))
+{
+}
template <class T, class D, class S>
TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer)
@@ -586,7 +586,7 @@ TCompactTrie<T, D, S>::TConstIterator::TConstIterator(const TOpaqueTrie& trie, c
}
template <class T, class D, class S>
-bool TCompactTrie<T, D, S>::TConstIterator::operator==(const TConstIterator& other) const {
+bool TCompactTrie<T, D, S>::TConstIterator::operator==(const TConstIterator& other) const {
if (!Impl)
return !other.Impl;
if (!other.Impl)
@@ -595,8 +595,8 @@ bool TCompactTrie<T, D, S>::TConstIterator::operator==(const TConstIterator& oth
}
template <class T, class D, class S>
-bool TCompactTrie<T, D, S>::TConstIterator::operator!=(const TConstIterator& other) const {
- return !operator==(other);
+bool TCompactTrie<T, D, S>::TConstIterator::operator!=(const TConstIterator& other) const {
+ return !operator==(other);
}
template <class T, class D, class S>
diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp
index b4f84b3e4b..74bee09b5d 100644
--- a/library/cpp/containers/comptrie/comptrie_ut.cpp
+++ b/library/cpp/containers/comptrie/comptrie_ut.cpp
@@ -1,12 +1,12 @@
-#include <util/random/shuffle.h>
+#include <util/random/shuffle.h>
#include <library/cpp/testing/unittest/registar.h>
-
+
#include <util/stream/output.h>
#include <utility>
-
-#include <util/charset/wide.h>
+
+#include <util/charset/wide.h>
#include <util/generic/algorithm.h>
-#include <util/generic/buffer.h>
+#include <util/generic/buffer.h>
#include <util/generic/map.h>
#include <util/generic/vector.h>
#include <util/generic/ptr.h>
@@ -14,100 +14,100 @@
#include <util/folder/dirut.h>
-#include <util/random/random.h>
+#include <util/random/random.h>
#include <util/random/fast.h>
-
+
#include <util/string/hex.h>
#include <util/string/cast.h>
-#include "comptrie.h"
+#include "comptrie.h"
#include "set.h"
#include "first_symbol_iterator.h"
#include "search_iterator.h"
#include "pattern_searcher.h"
-
+
#include <array>
#include <iterator>
-class TCompactTrieTest: public TTestBase {
+class TCompactTrieTest: public TTestBase {
private:
UNIT_TEST_SUITE(TCompactTrieTest);
- UNIT_TEST(TestTrie8);
- UNIT_TEST(TestTrie16);
- UNIT_TEST(TestTrie32);
-
- UNIT_TEST(TestFastTrie8);
- UNIT_TEST(TestFastTrie16);
- UNIT_TEST(TestFastTrie32);
-
- UNIT_TEST(TestMinimizedTrie8);
- UNIT_TEST(TestMinimizedTrie16);
- UNIT_TEST(TestMinimizedTrie32);
-
- UNIT_TEST(TestFastMinimizedTrie8);
- UNIT_TEST(TestFastMinimizedTrie16);
- UNIT_TEST(TestFastMinimizedTrie32);
-
- UNIT_TEST(TestTrieIterator8);
- UNIT_TEST(TestTrieIterator16);
- UNIT_TEST(TestTrieIterator32);
-
- UNIT_TEST(TestMinimizedTrieIterator8);
- UNIT_TEST(TestMinimizedTrieIterator16);
- UNIT_TEST(TestMinimizedTrieIterator32);
-
- UNIT_TEST(TestPhraseSearch);
- UNIT_TEST(TestAddGet);
- UNIT_TEST(TestEmpty);
- UNIT_TEST(TestUninitializedNonEmpty);
- UNIT_TEST(TestRandom);
- UNIT_TEST(TestFindTails);
- UNIT_TEST(TestPrefixGrouped);
- UNIT_TEST(CrashTestPrefixGrouped);
+ UNIT_TEST(TestTrie8);
+ UNIT_TEST(TestTrie16);
+ UNIT_TEST(TestTrie32);
+
+ UNIT_TEST(TestFastTrie8);
+ UNIT_TEST(TestFastTrie16);
+ UNIT_TEST(TestFastTrie32);
+
+ UNIT_TEST(TestMinimizedTrie8);
+ UNIT_TEST(TestMinimizedTrie16);
+ UNIT_TEST(TestMinimizedTrie32);
+
+ UNIT_TEST(TestFastMinimizedTrie8);
+ UNIT_TEST(TestFastMinimizedTrie16);
+ UNIT_TEST(TestFastMinimizedTrie32);
+
+ UNIT_TEST(TestTrieIterator8);
+ UNIT_TEST(TestTrieIterator16);
+ UNIT_TEST(TestTrieIterator32);
+
+ UNIT_TEST(TestMinimizedTrieIterator8);
+ UNIT_TEST(TestMinimizedTrieIterator16);
+ UNIT_TEST(TestMinimizedTrieIterator32);
+
+ UNIT_TEST(TestPhraseSearch);
+ UNIT_TEST(TestAddGet);
+ UNIT_TEST(TestEmpty);
+ UNIT_TEST(TestUninitializedNonEmpty);
+ UNIT_TEST(TestRandom);
+ UNIT_TEST(TestFindTails);
+ UNIT_TEST(TestPrefixGrouped);
+ UNIT_TEST(CrashTestPrefixGrouped);
UNIT_TEST(TestMergeFromFile);
UNIT_TEST(TestMergeFromBuffer);
- UNIT_TEST(TestUnique);
- UNIT_TEST(TestAddRetValue);
- UNIT_TEST(TestClear);
+ UNIT_TEST(TestUnique);
+ UNIT_TEST(TestAddRetValue);
+ UNIT_TEST(TestClear);
- UNIT_TEST(TestIterateEmptyKey);
+ UNIT_TEST(TestIterateEmptyKey);
- UNIT_TEST(TestTrieSet);
+ UNIT_TEST(TestTrieSet);
- UNIT_TEST(TestTrieForVectorInt64);
- UNIT_TEST(TestTrieForListInt64);
- UNIT_TEST(TestTrieForSetInt64);
+ UNIT_TEST(TestTrieForVectorInt64);
+ UNIT_TEST(TestTrieForListInt64);
+ UNIT_TEST(TestTrieForSetInt64);
- UNIT_TEST(TestTrieForVectorStroka);
- UNIT_TEST(TestTrieForListStroka);
- UNIT_TEST(TestTrieForSetStroka);
+ UNIT_TEST(TestTrieForVectorStroka);
+ UNIT_TEST(TestTrieForListStroka);
+ UNIT_TEST(TestTrieForSetStroka);
- UNIT_TEST(TestTrieForVectorWtroka);
- UNIT_TEST(TestTrieForVectorFloat);
- UNIT_TEST(TestTrieForVectorDouble);
+ UNIT_TEST(TestTrieForVectorWtroka);
+ UNIT_TEST(TestTrieForVectorFloat);
+ UNIT_TEST(TestTrieForVectorDouble);
- UNIT_TEST(TestTrieForListVectorInt64);
- UNIT_TEST(TestTrieForPairWtrokaVectorInt64);
+ UNIT_TEST(TestTrieForListVectorInt64);
+ UNIT_TEST(TestTrieForPairWtrokaVectorInt64);
- UNIT_TEST(TestEmptyValueOutOfOrder);
- UNIT_TEST(TestFindLongestPrefixWithEmptyValue);
+ UNIT_TEST(TestEmptyValueOutOfOrder);
+ UNIT_TEST(TestFindLongestPrefixWithEmptyValue);
- UNIT_TEST(TestSearchIterChar);
- UNIT_TEST(TestSearchIterWchar);
+ UNIT_TEST(TestSearchIterChar);
+ UNIT_TEST(TestSearchIterWchar);
UNIT_TEST(TestSearchIterWchar32)
- UNIT_TEST(TestCopyAndAssignment);
+ UNIT_TEST(TestCopyAndAssignment);
- UNIT_TEST(TestFirstSymbolIterator8);
- UNIT_TEST(TestFirstSymbolIterator16);
- UNIT_TEST(TestFirstSymbolIterator32);
+ UNIT_TEST(TestFirstSymbolIterator8);
+ UNIT_TEST(TestFirstSymbolIterator16);
+ UNIT_TEST(TestFirstSymbolIterator32);
UNIT_TEST(TestFirstSymbolIteratorChar32);
- UNIT_TEST(TestArrayPacker);
+ UNIT_TEST(TestArrayPacker);
- UNIT_TEST(TestBuilderFindLongestPrefix);
- UNIT_TEST(TestBuilderFindLongestPrefixWithEmptyValue);
+ UNIT_TEST(TestBuilderFindLongestPrefix);
+ UNIT_TEST(TestBuilderFindLongestPrefixWithEmptyValue);
UNIT_TEST(TestPatternSearcherEmpty);
UNIT_TEST(TestPatternSearcherSimple);
@@ -147,7 +147,7 @@ private:
TVector<TContainer> GetSampleVectorData(size_t nValues);
template <class TContainer>
TVector<TContainer> GetSampleTextVectorData(size_t nValues);
- template <class T>
+ template <class T>
void CheckEquality(const T& value1, const T& value2) const;
template <class TContainer>
void TestTrieWithContainers(const TVector<TUtf16String>& keys, const TVector<TContainer>& sampleData, TString methodName);
@@ -265,38 +265,38 @@ public:
UNIT_TEST_SUITE_REGISTRATION(TCompactTrieTest);
-const char* TCompactTrieTest::SampleData[] = {
- "",
- "a", "b", "c", "d",
- "aa", "ab", "ac", "ad",
- "aaa", "aab", "aac", "aad",
- "aba", "abb", "abc", "abd",
- "fba", "fbb", "fbc", "fbd",
- "fbbaa",
- "c\x85\xA4\xBF" // Just something outside ASCII.
+const char* TCompactTrieTest::SampleData[] = {
+ "",
+ "a", "b", "c", "d",
+ "aa", "ab", "ac", "ad",
+ "aaa", "aab", "aac", "aad",
+ "aba", "abb", "abc", "abd",
+ "fba", "fbb", "fbc", "fbd",
+ "fbbaa",
+ "c\x85\xA4\xBF" // Just something outside ASCII.
};
-template <class T>
+template <class T>
typename TCompactTrie<T>::TKey MakeWideKey(const char* str, size_t len) {
typename TCompactTrie<T>::TKey buffer;
for (size_t i = 0; i < len; i++) {
unsigned int ch = (str[i] & 0xFF);
- buffer.push_back((T)(ch | ch << 8 | ch << 16 | ch << 24));
+ buffer.push_back((T)(ch | ch << 8 | ch << 16 | ch << 24));
}
return buffer;
}
-template <class T>
+template <class T>
typename TCompactTrie<T>::TKey MakeWideKey(const TString& str) {
return MakeWideKey<T>(str.c_str(), str.length());
}
-template <class T>
+template <class T>
typename TCompactTrie<T>::TKey MakeWideKey(const TStringBuf& buf) {
return MakeWideKey<T>(buf.data(), buf.length());
}
-template <class T>
+template <class T>
void TCompactTrieTest::CreateTrie(IOutputStream& out, bool minimize, bool useFastLayout) {
TCompactTrieBuilder<T> builder;
@@ -305,18 +305,18 @@ void TCompactTrieTest::CreateTrie(IOutputStream& out, bool minimize, bool useFas
builder.Add(MakeWideKey<T>(i, len), len * 2);
}
-
+
TBufferOutput tmp2;
IOutputStream& currentOutput = useFastLayout ? tmp2 : out;
if (minimize) {
TBufferOutput buftmp;
builder.Save(buftmp);
- CompactTrieMinimize<TCompactTriePacker<ui64>>(currentOutput, buftmp.Buffer().Data(), buftmp.Buffer().Size(), false);
+ CompactTrieMinimize<TCompactTriePacker<ui64>>(currentOutput, buftmp.Buffer().Data(), buftmp.Buffer().Size(), false);
} else {
builder.Save(currentOutput);
}
if (useFastLayout) {
- CompactTrieMakeFastLayout<TCompactTriePacker<T>>(out, tmp2.Buffer().Data(), tmp2.Buffer().Size(), false);
+ CompactTrieMakeFastLayout<TCompactTriePacker<T>>(out, tmp2.Buffer().Data(), tmp2.Buffer().Size(), false);
}
}
@@ -337,7 +337,7 @@ static bool LexicographicStep(TString& s) {
return true;
}
-template <class T>
+template <class T>
void TCompactTrieTest::CheckUpperBound(const char* data, size_t datalen) {
TCompactTrie<T> trie(data, datalen);
typedef typename TCompactTrie<T>::TKey TKey;
@@ -357,7 +357,7 @@ void TCompactTrieTest::CheckUpperBound(const char* data, size_t datalen) {
} while (LexicographicStep(key));
}
-template <class T>
+template <class T>
void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
TCompactTrie<T> trie(data, datalen);
@@ -413,13 +413,13 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
UNIT_ASSERT_EQUAL(prefixLen, 3);
UNIT_ASSERT_EQUAL(6, value);
-
+
value = 12345678;
UNIT_ASSERT(!trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(12345678, value); //Failed Find() should not change value
+ UNIT_ASSERT_EQUAL(12345678, value); //Failed Find() should not change value
}
-template <class T>
+template <class T>
void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
typedef typename TCompactTrie<T>::TKey TKey;
typedef typename TCompactTrie<T>::TValueType TValue;
@@ -441,10 +441,10 @@ void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
received.insert(*it);
items.push_back(*it);
entry_count++;
- it++;
+ it++;
}
TMap<TKey, ui64> received2;
- for (std::pair<TKey, ui64> x : trie) {
+ for (std::pair<TKey, ui64> x : trie) {
received2.insert(x);
}
UNIT_ASSERT(entry_count == stored.size());
@@ -485,7 +485,7 @@ void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
UNIT_ASSERT(revIt != trie.End());
}
-template <class T>
+template <class T>
void TCompactTrieTest::TestTrie(bool minimize, bool useFastLayout) {
TBufferOutput bufout;
CreateTrie<T>(bufout, minimize, useFastLayout);
@@ -493,75 +493,75 @@ void TCompactTrieTest::TestTrie(bool minimize, bool useFastLayout) {
CheckUpperBound<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
}
-template <class T>
+template <class T>
void TCompactTrieTest::TestTrieIterator(bool minimize) {
TBufferOutput bufout;
CreateTrie<T>(bufout, minimize, false);
CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
}
-void TCompactTrieTest::TestTrie8() {
- TestTrie<char>(false, false);
-}
-void TCompactTrieTest::TestTrie16() {
- TestTrie<wchar16>(false, false);
-}
-void TCompactTrieTest::TestTrie32() {
- TestTrie<wchar32>(false, false);
-}
-
-void TCompactTrieTest::TestFastTrie8() {
- TestTrie<char>(false, true);
-}
-void TCompactTrieTest::TestFastTrie16() {
- TestTrie<wchar16>(false, true);
-}
-void TCompactTrieTest::TestFastTrie32() {
- TestTrie<wchar32>(false, true);
-}
-
-void TCompactTrieTest::TestMinimizedTrie8() {
- TestTrie<char>(true, false);
-}
-void TCompactTrieTest::TestMinimizedTrie16() {
- TestTrie<wchar16>(true, false);
-}
-void TCompactTrieTest::TestMinimizedTrie32() {
- TestTrie<wchar32>(true, false);
-}
-
-void TCompactTrieTest::TestFastMinimizedTrie8() {
- TestTrie<char>(true, true);
-}
-void TCompactTrieTest::TestFastMinimizedTrie16() {
- TestTrie<wchar16>(true, true);
-}
-void TCompactTrieTest::TestFastMinimizedTrie32() {
- TestTrie<wchar32>(true, true);
-}
-
-void TCompactTrieTest::TestTrieIterator8() {
- TestTrieIterator<char>(false);
-}
-void TCompactTrieTest::TestTrieIterator16() {
- TestTrieIterator<wchar16>(false);
-}
-void TCompactTrieTest::TestTrieIterator32() {
- TestTrieIterator<wchar32>(false);
-}
-
-void TCompactTrieTest::TestMinimizedTrieIterator8() {
- TestTrieIterator<char>(true);
-}
-void TCompactTrieTest::TestMinimizedTrieIterator16() {
- TestTrieIterator<wchar16>(true);
-}
-void TCompactTrieTest::TestMinimizedTrieIterator32() {
- TestTrieIterator<wchar32>(true);
-}
+void TCompactTrieTest::TestTrie8() {
+ TestTrie<char>(false, false);
+}
+void TCompactTrieTest::TestTrie16() {
+ TestTrie<wchar16>(false, false);
+}
+void TCompactTrieTest::TestTrie32() {
+ TestTrie<wchar32>(false, false);
+}
+
+void TCompactTrieTest::TestFastTrie8() {
+ TestTrie<char>(false, true);
+}
+void TCompactTrieTest::TestFastTrie16() {
+ TestTrie<wchar16>(false, true);
+}
+void TCompactTrieTest::TestFastTrie32() {
+ TestTrie<wchar32>(false, true);
+}
+
+void TCompactTrieTest::TestMinimizedTrie8() {
+ TestTrie<char>(true, false);
+}
+void TCompactTrieTest::TestMinimizedTrie16() {
+ TestTrie<wchar16>(true, false);
+}
+void TCompactTrieTest::TestMinimizedTrie32() {
+ TestTrie<wchar32>(true, false);
+}
+
+void TCompactTrieTest::TestFastMinimizedTrie8() {
+ TestTrie<char>(true, true);
+}
+void TCompactTrieTest::TestFastMinimizedTrie16() {
+ TestTrie<wchar16>(true, true);
+}
+void TCompactTrieTest::TestFastMinimizedTrie32() {
+ TestTrie<wchar32>(true, true);
+}
+
+void TCompactTrieTest::TestTrieIterator8() {
+ TestTrieIterator<char>(false);
+}
+void TCompactTrieTest::TestTrieIterator16() {
+ TestTrieIterator<wchar16>(false);
+}
+void TCompactTrieTest::TestTrieIterator32() {
+ TestTrieIterator<wchar32>(false);
+}
+
+void TCompactTrieTest::TestMinimizedTrieIterator8() {
+ TestTrieIterator<char>(true);
+}
+void TCompactTrieTest::TestMinimizedTrieIterator16() {
+ TestTrieIterator<wchar16>(true);
+}
+void TCompactTrieTest::TestMinimizedTrieIterator32() {
+ TestTrieIterator<wchar32>(true);
+}
void TCompactTrieTest::TestPhraseSearch() {
- static const char* phrases[] = {"ab", "ab cd", "ab cd ef"};
+ static const char* phrases[] = {"ab", "ab cd", "ab cd ef"};
static const char* const goodphrase = "ab cd ef gh";
static const char* const badphrase = "cd ef gh ab";
TBufferOutput bufout;
@@ -651,7 +651,7 @@ static char RandChar() {
}
static TString RandStr(const size_t max) {
- size_t len = RandomNumber<size_t>() % max;
+ size_t len = RandomNumber<size_t>() % max;
TString key;
for (size_t j = 0; j < len; ++j)
key += RandChar();
@@ -664,7 +664,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) {
TCompactTrieBuilder<char, typename T::TData, T> builder;
typedef TMap<TString, typename T::TData> TKeys;
TKeys keys;
-
+
typename T::TData dummy;
for (size_t i = 0; i < n; ++i) {
const TString key = RandStr(maxKeySize);
@@ -677,7 +677,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) {
UNIT_ASSERT(dummy == val);
}
}
-
+
TBufferStream stream;
size_t len = builder.Save(stream);
TCompactTrie<char, typename T::TData, T> trie(stream.Buffer().Data(), len);
@@ -721,12 +721,12 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) {
}
void TCompactTrieTest::TestRandom() {
- TestRandom<TIntPacker<ui64>, true>(1000, 1000);
- TestRandom<TIntPacker<int>, true>(100, 100);
- TestRandom<TDummyPacker<ui64>, true>(0, 0);
- TestRandom<TDummyPacker<ui64>, true>(100, 3);
- TestRandom<TDummyPacker<ui64>, true>(100, 100);
- TestRandom<TStrokaPacker, true>(100, 100);
+ TestRandom<TIntPacker<ui64>, true>(1000, 1000);
+ TestRandom<TIntPacker<int>, true>(100, 100);
+ TestRandom<TDummyPacker<ui64>, true>(0, 0);
+ TestRandom<TDummyPacker<ui64>, true>(100, 3);
+ TestRandom<TDummyPacker<ui64>, true>(100, 100);
+ TestRandom<TStrokaPacker, true>(100, 100);
}
void TCompactTrieTest::TestFindTailsImpl(const TString& prefix) {
@@ -779,14 +779,14 @@ void TCompactTrieTest::TestPrefixGrouped() {
TBuffer b1b;
TCompactTrieBuilder<char, ui32> b1(CTBF_PREFIX_GROUPED);
const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Tumen",
+ "Kazan",
+ "Moscow",
+ "Monino",
+ "Murmansk",
+ "Fryanovo",
+ "Fryazino",
+ "Fryazevo",
+ "Tumen",
};
for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
@@ -937,15 +937,15 @@ void TCompactTrieTest::TestUnique() {
void TCompactTrieTest::TestUniqueImpl(bool isPrefixGrouped) {
TCompactTrieBuilder<char, ui32> builder(CTBF_UNIQUE | (isPrefixGrouped ? CTBF_PREFIX_GROUPED : CTBF_NONE));
const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Fry",
- "Tumen",
+ "Kazan",
+ "Moscow",
+ "Monino",
+ "Murmansk",
+ "Fryanovo",
+ "Fryazino",
+ "Fryazevo",
+ "Fry",
+ "Tumen",
};
for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
UNIT_ASSERT_C(builder.Add(data[i], strlen(data[i]), i + 1), i);
@@ -963,15 +963,15 @@ void TCompactTrieTest::TestUniqueImpl(bool isPrefixGrouped) {
void TCompactTrieTest::TestAddRetValue() {
TCompactTrieBuilder<char, ui32> builder;
const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Fry",
- "Tumen",
+ "Kazan",
+ "Moscow",
+ "Monino",
+ "Murmansk",
+ "Fryanovo",
+ "Fryazino",
+ "Fryazevo",
+ "Fry",
+ "Tumen",
};
for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
UNIT_ASSERT(builder.Add(data[i], strlen(data[i]), i + 1));
@@ -982,7 +982,7 @@ void TCompactTrieTest::TestAddRetValue() {
}
}
-void TCompactTrieTest::TestClear() {
+void TCompactTrieTest::TestClear() {
TCompactTrieBuilder<char, ui32> builder;
const char* data[] = {
"Kazan",
@@ -1008,14 +1008,14 @@ void TCompactTrieTest::TestFindTails() {
TestFindTailsImpl("aa");
TestFindTailsImpl("bb");
TestFindTailsImpl("fb");
- TestFindTailsImpl("fbc");
- TestFindTailsImpl("fbbaa");
+ TestFindTailsImpl("fbc");
+ TestFindTailsImpl("fbbaa");
}
template <class T>
-class TCompactTrieTest::TDummyPacker: public TNullPacker<T> {
+class TCompactTrieTest::TDummyPacker: public TNullPacker<T> {
public:
- static T Data(const TString&) {
+ static T Data(const TString&) {
T data;
TNullPacker<T>().UnpackLeaf(nullptr, data);
return data;
@@ -1024,21 +1024,21 @@ public:
typedef T TData;
};
-class TCompactTrieTest::TStrokaPacker: public TCompactTriePacker<TString> {
+class TCompactTrieTest::TStrokaPacker: public TCompactTriePacker<TString> {
public:
typedef TString TData;
- static TString Data(const TString& str) {
+ static TString Data(const TString& str) {
return str;
}
};
template <class T>
-class TCompactTrieTest::TIntPacker: public TCompactTriePacker<T> {
+class TCompactTrieTest::TIntPacker: public TCompactTriePacker<T> {
public:
typedef T TData;
- static TData Data(const TString&) {
+ static TData Data(const TString&) {
return RandomNumber<std::make_unsigned_t<T>>();
}
};
@@ -1099,7 +1099,7 @@ void TCompactTrieTest::TestTrieSet() {
UNIT_ASSERT(!empty.Has("d"));
UNIT_ASSERT(!empty.Has("d"));
- UNIT_ASSERT(empty.Has("")); // contains only empty string
+ UNIT_ASSERT(empty.Has("")); // contains only empty string
}
// Tests for trie with vector (list, set) values
@@ -1119,7 +1119,7 @@ TVector<TContainer> TCompactTrieTest::GetSampleVectorData(size_t nValues) {
for (size_t i = 0; i < nValues; i++) {
data.push_back(TContainer());
for (size_t j = 0; j < i; j++)
- data[i].insert(data[i].end(), (typename TContainer::value_type)((j == 3) ? 0 : (1 << (j * 5))));
+ data[i].insert(data[i].end(), (typename TContainer::value_type)((j == 3) ? 0 : (1 << (j * 5))));
}
return data;
}
@@ -1135,16 +1135,16 @@ TVector<TContainer> TCompactTrieTest::GetSampleTextVectorData(size_t nValues) {
return data;
}
-template <class T>
+template <class T>
void TCompactTrieTest::CheckEquality(const T& value1, const T& value2) const {
UNIT_ASSERT_VALUES_EQUAL(value1, value2);
}
-template <>
-void TCompactTrieTest::CheckEquality<TVector<i64>>(const TVector<i64>& value1, const TVector<i64>& value2) const {
- UNIT_ASSERT_VALUES_EQUAL(value1.size(), value2.size());
- for (size_t i = 0; i < value1.size(); i++)
- UNIT_ASSERT_VALUES_EQUAL(value1[i], value2[i]);
+template <>
+void TCompactTrieTest::CheckEquality<TVector<i64>>(const TVector<i64>& value1, const TVector<i64>& value2) const {
+ UNIT_ASSERT_VALUES_EQUAL(value1.size(), value2.size());
+ for (size_t i = 0; i < value1.size(); i++)
+ UNIT_ASSERT_VALUES_EQUAL(value1[i], value2[i]);
}
template <class TContainer>
@@ -1171,9 +1171,9 @@ void TCompactTrieTest::TestTrieWithContainers(const TVector<TUtf16String>& keys,
unlink(fileName.data());
}
-template <>
-void TCompactTrieTest::TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(const TVector<TUtf16String>& keys, const TVector<std::pair<TUtf16String, TVector<i64>>>& sampleData, TString methodName) {
- typedef std::pair<TUtf16String, TVector<i64>> TContainer;
+template <>
+void TCompactTrieTest::TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(const TVector<TUtf16String>& keys, const TVector<std::pair<TUtf16String, TVector<i64>>>& sampleData, TString methodName) {
+ typedef std::pair<TUtf16String, TVector<i64>> TContainer;
TString fileName = GetSystemTempDir() + "/TCompactTrieTest-TestTrieWithContainers-" + methodName;
TCompactTrieBuilder<wchar16, TContainer> b;
@@ -1194,63 +1194,63 @@ void TCompactTrieTest::TestTrieWithContainers<std::pair<TUtf16String, TVector<i6
}
void TCompactTrieTest::TestTrieForVectorInt64() {
- TestTrieWithContainers<TVector<i64>>(GetSampleKeys(10), GetSampleVectorData<TVector<i64>>(10), "v-i64");
+ TestTrieWithContainers<TVector<i64>>(GetSampleKeys(10), GetSampleVectorData<TVector<i64>>(10), "v-i64");
}
void TCompactTrieTest::TestTrieForListInt64() {
- TestTrieWithContainers<TList<i64>>(GetSampleKeys(10), GetSampleVectorData<TList<i64>>(10), "l-i64");
+ TestTrieWithContainers<TList<i64>>(GetSampleKeys(10), GetSampleVectorData<TList<i64>>(10), "l-i64");
}
void TCompactTrieTest::TestTrieForSetInt64() {
- TestTrieWithContainers<TSet<i64>>(GetSampleKeys(10), GetSampleVectorData<TSet<i64>>(10), "s-i64");
+ TestTrieWithContainers<TSet<i64>>(GetSampleKeys(10), GetSampleVectorData<TSet<i64>>(10), "s-i64");
}
void TCompactTrieTest::TestTrieForVectorStroka() {
- TestTrieWithContainers<TVector<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TVector<TString>>(10), "v-str");
+ TestTrieWithContainers<TVector<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TVector<TString>>(10), "v-str");
}
void TCompactTrieTest::TestTrieForListStroka() {
- TestTrieWithContainers<TList<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TList<TString>>(10), "l-str");
+ TestTrieWithContainers<TList<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TList<TString>>(10), "l-str");
}
void TCompactTrieTest::TestTrieForSetStroka() {
- TestTrieWithContainers<TSet<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TSet<TString>>(10), "s-str");
+ TestTrieWithContainers<TSet<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TSet<TString>>(10), "s-str");
}
void TCompactTrieTest::TestTrieForVectorWtroka() {
- TVector<TVector<TString>> data = GetSampleTextVectorData<TVector<TString>>(10);
- TVector<TVector<TUtf16String>> wData;
+ TVector<TVector<TString>> data = GetSampleTextVectorData<TVector<TString>>(10);
+ TVector<TVector<TUtf16String>> wData;
for (size_t i = 0; i < data.size(); i++) {
wData.push_back(TVector<TUtf16String>());
for (size_t j = 0; j < data[i].size(); j++)
- wData[i].push_back(UTF8ToWide(data[i][j]));
+ wData[i].push_back(UTF8ToWide(data[i][j]));
}
- TestTrieWithContainers<TVector<TUtf16String>>(GetSampleKeys(10), wData, "v-wtr");
+ TestTrieWithContainers<TVector<TUtf16String>>(GetSampleKeys(10), wData, "v-wtr");
}
void TCompactTrieTest::TestTrieForVectorFloat() {
- TestTrieWithContainers<TVector<float>>(GetSampleKeys(10), GetSampleVectorData<TVector<float>>(10), "v-float");
+ TestTrieWithContainers<TVector<float>>(GetSampleKeys(10), GetSampleVectorData<TVector<float>>(10), "v-float");
}
void TCompactTrieTest::TestTrieForVectorDouble() {
- TestTrieWithContainers<TVector<double>>(GetSampleKeys(10), GetSampleVectorData<TVector<double>>(10), "v-double");
+ TestTrieWithContainers<TVector<double>>(GetSampleKeys(10), GetSampleVectorData<TVector<double>>(10), "v-double");
}
void TCompactTrieTest::TestTrieForListVectorInt64() {
TVector<i64> tmp;
- tmp.push_back(0);
- TList<TVector<i64>> dataElement(5, tmp);
- TVector<TList<TVector<i64>>> data(10, dataElement);
- TestTrieWithContainers<TList<TVector<i64>>>(GetSampleKeys(10), data, "l-v-i64");
+ tmp.push_back(0);
+ TList<TVector<i64>> dataElement(5, tmp);
+ TVector<TList<TVector<i64>>> data(10, dataElement);
+ TestTrieWithContainers<TList<TVector<i64>>>(GetSampleKeys(10), data, "l-v-i64");
}
void TCompactTrieTest::TestTrieForPairWtrokaVectorInt64() {
TVector<TUtf16String> keys = GetSampleKeys(10);
- TVector<TVector<i64>> values = GetSampleVectorData<TVector<i64>>(10);
- TVector<std::pair<TUtf16String, TVector<i64>>> data;
+ TVector<TVector<i64>> values = GetSampleVectorData<TVector<i64>>(10);
+ TVector<std::pair<TUtf16String, TVector<i64>>> data;
for (size_t i = 0; i < 10; i++)
data.push_back(std::pair<TUtf16String, TVector<i64>>(keys[i] + u"_v", values[i]));
- TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(keys, data, "pair-str-v-i64");
+ TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(keys, data, "pair-str-v-i64");
}
void TCompactTrieTest::TestEmptyValueOutOfOrder() {
@@ -1468,7 +1468,7 @@ void TCompactTrieTest::TestArrayPacker() {
void TCompactTrieTest::TestBuilderFindLongestPrefix() {
const size_t sizes[] = {10, 100};
- const double branchProbabilities[] = {0.01, 0.1, 0.5, 0.9, 0.99};
+ const double branchProbabilities[] = {0.01, 0.1, 0.5, 0.9, 0.99};
for (size_t size : sizes) {
for (double branchProbability : branchProbabilities) {
TestBuilderFindLongestPrefix(size, branchProbability, false, false);
@@ -1498,7 +1498,7 @@ void TCompactTrieTest::TestBuilderFindLongestPrefix(size_t keysCount, double bra
if (isPrefixGrouped)
Sort(keys.begin(), keys.end());
else
- Shuffle(keys.begin(), keys.end());
+ Shuffle(keys.begin(), keys.end());
TCompactTrieBuilder<char, TString> builder(isPrefixGrouped ? CTBF_PREFIX_GROUPED : CTBF_NONE);
const TString EMPTY_VALUE = "empty";
diff --git a/library/cpp/containers/comptrie/first_symbol_iterator.h b/library/cpp/containers/comptrie/first_symbol_iterator.h
index d3c521e7e6..d06135f06f 100644
--- a/library/cpp/containers/comptrie/first_symbol_iterator.h
+++ b/library/cpp/containers/comptrie/first_symbol_iterator.h
@@ -6,7 +6,7 @@
namespace NCompactTrie {
// Iterates over possible first symbols in a trie.
// Allows one to get the symbol and the subtrie starting from it.
- template <class TTrie>
+ template <class TTrie>
class TFirstSymbolIterator {
public:
using TSymbol = typename TTrie::TSymbol;
@@ -58,4 +58,4 @@ namespace NCompactTrie {
TCopyPtr<TOpaqueTrieIterator> Impl;
};
-}
+}
diff --git a/library/cpp/containers/comptrie/key_selector.h b/library/cpp/containers/comptrie/key_selector.h
index 80191df744..60466cef71 100644
--- a/library/cpp/containers/comptrie/key_selector.h
+++ b/library/cpp/containers/comptrie/key_selector.h
@@ -4,24 +4,24 @@
#include <util/generic/string.h>
#include <util/generic/strbuf.h>
-template <class T>
+template <class T>
struct TCompactTrieKeySelector {
typedef TVector<T> TKey;
typedef TVector<T> TKeyBuf;
};
-template <class TChar>
+template <class TChar>
struct TCompactTrieCharKeySelector {
typedef TBasicString<TChar> TKey;
typedef TBasicStringBuf<TChar> TKeyBuf;
};
-template <>
-struct TCompactTrieKeySelector<char>: public TCompactTrieCharKeySelector<char> {
+template <>
+struct TCompactTrieKeySelector<char>: public TCompactTrieCharKeySelector<char> {
};
-template <>
-struct TCompactTrieKeySelector<wchar16>: public TCompactTrieCharKeySelector<wchar16> {
+template <>
+struct TCompactTrieKeySelector<wchar16>: public TCompactTrieCharKeySelector<wchar16> {
};
template <>
diff --git a/library/cpp/containers/comptrie/leaf_skipper.h b/library/cpp/containers/comptrie/leaf_skipper.h
index 0bdfa11d63..3959258948 100644
--- a/library/cpp/containers/comptrie/leaf_skipper.h
+++ b/library/cpp/containers/comptrie/leaf_skipper.h
@@ -9,11 +9,11 @@ namespace NCompactTrie {
virtual ~ILeafSkipper() = default;
};
- template <class TPacker>
- class TPackerLeafSkipper: public ILeafSkipper {
+ template <class TPacker>
+ class TPackerLeafSkipper: public ILeafSkipper {
private:
const TPacker* Packer;
-
+
public:
TPackerLeafSkipper(const TPacker* packer)
: Packer(packer)
@@ -25,9 +25,9 @@ namespace NCompactTrie {
}
// For test purposes.
- const TPacker* GetPacker() const {
- return Packer;
- }
+ const TPacker* GetPacker() const {
+ return Packer;
+ }
};
// The data you need to traverse the trie without unpacking the values.
@@ -40,13 +40,13 @@ namespace NCompactTrie {
: Data(data)
, Length(dataLength)
, SkipFunction(skipFunction)
- {
- }
+ {
+ }
bool operator==(const TOpaqueTrie& other) const {
return Data == other.Data &&
- Length == other.Length &&
- &SkipFunction == &other.SkipFunction;
+ Length == other.Length &&
+ &SkipFunction == &other.SkipFunction;
}
bool operator!=(const TOpaqueTrie& other) const {
diff --git a/library/cpp/containers/comptrie/loader/loader.h b/library/cpp/containers/comptrie/loader/loader.h
index d172efa541..ee10e9b451 100644
--- a/library/cpp/containers/comptrie/loader/loader.h
+++ b/library/cpp/containers/comptrie/loader/loader.h
@@ -9,7 +9,7 @@
template <class TrieType, size_t N>
TrieType LoadTrieFromArchive(const TString& key,
const unsigned char (&data)[N],
- bool ignoreErrors = false) {
+ bool ignoreErrors = false) {
TArchiveReader archive(TBlob::NoCopy(data, sizeof(data)));
if (archive.Has(key)) {
TAutoPtr<IInputStream> trie = archive.ObjectByKey(key);
diff --git a/library/cpp/containers/comptrie/loader/loader_ut.cpp b/library/cpp/containers/comptrie/loader/loader_ut.cpp
index 2e771357fe..345063a31e 100644
--- a/library/cpp/containers/comptrie/loader/loader_ut.cpp
+++ b/library/cpp/containers/comptrie/loader/loader_ut.cpp
@@ -6,25 +6,25 @@ using TDummyTrie = TCompactTrie<char, i32>;
namespace {
const unsigned char DATA[] = {
-#include "data.inc"
+#include "data.inc"
};
-}
+}
Y_UNIT_TEST_SUITE(ArchiveLoaderTests) {
Y_UNIT_TEST(BaseTest) {
- TDummyTrie trie = LoadTrieFromArchive<TDummyTrie>("/dummy.trie", DATA, true);
- UNIT_ASSERT_EQUAL(trie.Size(), 3);
+ TDummyTrie trie = LoadTrieFromArchive<TDummyTrie>("/dummy.trie", DATA, true);
+ UNIT_ASSERT_EQUAL(trie.Size(), 3);
- const TString TrieKyes[3] = {
- "zero", "one", "two"};
- i32 val = -1;
- for (i32 i = 0; i < 3; ++i) {
+ const TString TrieKyes[3] = {
+ "zero", "one", "two"};
+ i32 val = -1;
+ for (i32 i = 0; i < 3; ++i) {
UNIT_ASSERT(trie.Find(TrieKyes[i].data(), TrieKyes[i].size(), &val));
- UNIT_ASSERT_EQUAL(i, val);
- }
+ UNIT_ASSERT_EQUAL(i, val);
+ }
- UNIT_CHECK_GENERATED_EXCEPTION(
- LoadTrieFromArchive<TDummyTrie>("/noname.trie", DATA),
- yexception);
+ UNIT_CHECK_GENERATED_EXCEPTION(
+ LoadTrieFromArchive<TDummyTrie>("/noname.trie", DATA),
+ yexception);
}
}
diff --git a/library/cpp/containers/comptrie/make_fast_layout.cpp b/library/cpp/containers/comptrie/make_fast_layout.cpp
index 693384ee7f..ade78d7899 100644
--- a/library/cpp/containers/comptrie/make_fast_layout.cpp
+++ b/library/cpp/containers/comptrie/make_fast_layout.cpp
@@ -24,444 +24,444 @@
// (there is not much difference between these papers, actually).
//
namespace NCompactTrie {
- static size_t FindSupportingPowerOf2(size_t n) {
- size_t result = 1ull << (8 * sizeof(size_t) - 1);
- while (result > n) {
- result >>= 1;
- }
- return result;
+ static size_t FindSupportingPowerOf2(size_t n) {
+ size_t result = 1ull << (8 * sizeof(size_t) - 1);
+ while (result > n) {
+ result >>= 1;
+ }
+ return result;
}
- namespace {
- class TTrieNodeSet {
- public:
+ namespace {
+ class TTrieNodeSet {
+ public:
TTrieNodeSet() = default;
- explicit TTrieNodeSet(const TOpaqueTrie& trie)
- : Body(trie.Length / (8 * MinNodeSize) + 1, 0)
- {
- }
-
- bool Has(size_t offset) const {
- const size_t reducedOffset = ReducedOffset(offset);
- return OffsetCell(reducedOffset) & OffsetMask(reducedOffset);
- }
-
- void Add(size_t offset) {
- const size_t reducedOffset = ReducedOffset(offset);
- OffsetCell(reducedOffset) |= OffsetMask(reducedOffset);
- }
-
- void Remove(size_t offset) {
- const size_t reducedOffset = ReducedOffset(offset);
- OffsetCell(reducedOffset) &= ~OffsetMask(reducedOffset);
- }
-
- void Swap(TTrieNodeSet& other) {
- Body.swap(other.Body);
- }
-
- private:
- static const size_t MinNodeSize = 2;
- TVector<ui8> Body;
-
- static size_t ReducedOffset(size_t offset) {
- return offset / MinNodeSize;
- }
- static ui8 OffsetMask(size_t reducedOffset) {
- return 1 << (reducedOffset % 8);
- }
- ui8& OffsetCell(size_t reducedOffset) {
- return Body.at(reducedOffset / 8);
- }
- const ui8& OffsetCell(size_t reducedOffset) const {
- return Body.at(reducedOffset / 8);
- }
- };
-
- //---------------------------------------------------------------------
-
- class TTrieNodeCounts {
- public:
+ explicit TTrieNodeSet(const TOpaqueTrie& trie)
+ : Body(trie.Length / (8 * MinNodeSize) + 1, 0)
+ {
+ }
+
+ bool Has(size_t offset) const {
+ const size_t reducedOffset = ReducedOffset(offset);
+ return OffsetCell(reducedOffset) & OffsetMask(reducedOffset);
+ }
+
+ void Add(size_t offset) {
+ const size_t reducedOffset = ReducedOffset(offset);
+ OffsetCell(reducedOffset) |= OffsetMask(reducedOffset);
+ }
+
+ void Remove(size_t offset) {
+ const size_t reducedOffset = ReducedOffset(offset);
+ OffsetCell(reducedOffset) &= ~OffsetMask(reducedOffset);
+ }
+
+ void Swap(TTrieNodeSet& other) {
+ Body.swap(other.Body);
+ }
+
+ private:
+ static const size_t MinNodeSize = 2;
+ TVector<ui8> Body;
+
+ static size_t ReducedOffset(size_t offset) {
+ return offset / MinNodeSize;
+ }
+ static ui8 OffsetMask(size_t reducedOffset) {
+ return 1 << (reducedOffset % 8);
+ }
+ ui8& OffsetCell(size_t reducedOffset) {
+ return Body.at(reducedOffset / 8);
+ }
+ const ui8& OffsetCell(size_t reducedOffset) const {
+ return Body.at(reducedOffset / 8);
+ }
+ };
+
+ //---------------------------------------------------------------------
+
+ class TTrieNodeCounts {
+ public:
TTrieNodeCounts() = default;
- explicit TTrieNodeCounts(const TOpaqueTrie& trie)
- : Body(trie.Length / MinNodeSize, 0)
- , IsTree(false)
- {
- }
-
- size_t Get(size_t offset) const {
- return IsTree ? 1 : Body.at(offset / MinNodeSize);
- }
-
- void Inc(size_t offset) {
- if (IsTree) {
- return;
- }
- ui8& count = Body.at(offset / MinNodeSize);
- if (count != MaxCount) {
- ++count;
- }
- }
-
- size_t Dec(size_t offset) {
- if (IsTree) {
- return 0;
- }
- ui8& count = Body.at(offset / MinNodeSize);
- Y_ASSERT(count > 0);
- if (count != MaxCount) {
- --count;
- }
- return count;
- }
-
- void Swap(TTrieNodeCounts& other) {
- Body.swap(other.Body);
- ::DoSwap(IsTree, other.IsTree);
- }
-
- void SetTreeMode() {
- IsTree = true;
- Body = TVector<ui8>();
- }
-
- private:
- static const size_t MinNodeSize = 2;
- static const ui8 MaxCount = 255;
-
- TVector<ui8> Body;
- bool IsTree = false;
- };
-
- //----------------------------------------------------------
-
- class TOffsetIndex {
- public:
- // In all methods:
- // Key --- offset from the beginning of the old trie.
- // Value --- offset from the end of the new trie.
-
- explicit TOffsetIndex(TTrieNodeCounts& counts) {
- ParentCounts.Swap(counts);
- }
-
- void Add(size_t key, size_t value) {
- Data[key] = value;
- }
-
- size_t Size() const {
- return Data.size();
- }
-
- size_t Get(size_t key) {
- auto pos = Data.find(key);
- if (pos == Data.end()) {
- ythrow yexception() << "Bad node walking order: trying to get node offset too early or too many times!";
- }
- size_t result = pos->second;
- if (ParentCounts.Dec(key) == 0) {
- // We don't need this offset any more.
- Data.erase(pos);
- }
- return result;
- }
-
- private:
- TTrieNodeCounts ParentCounts;
- THashMap<size_t, size_t> Data;
- };
-
- //---------------------------------------------------------------------------------------
-
- class TTrieMeasurer {
- public:
- TTrieMeasurer(const TOpaqueTrie& trie, bool verbose);
- void Measure();
-
- size_t GetDepth() const {
- return Depth;
- }
-
- size_t GetNodeCount() const {
- return NodeCount;
- }
-
- size_t GetUnminimizedNodeCount() const {
- return UnminimizedNodeCount;
- }
-
- bool IsTree() const {
- return NodeCount == UnminimizedNodeCount;
- }
-
- TTrieNodeCounts& GetParentCounts() {
- return ParentCounts;
- }
-
- const TOpaqueTrie& GetTrie() const {
- return Trie;
- }
-
- private:
- const TOpaqueTrie& Trie;
- size_t Depth;
- TTrieNodeCounts ParentCounts;
- size_t NodeCount;
- size_t UnminimizedNodeCount;
- const bool Verbose;
-
- // returns depth, increments NodeCount.
- size_t MeasureSubtrie(size_t rootOffset, bool isNewPath);
- };
-
- TTrieMeasurer::TTrieMeasurer(const TOpaqueTrie& trie, bool verbose)
- : Trie(trie)
- , Depth(0)
- , ParentCounts(trie)
- , NodeCount(0)
- , UnminimizedNodeCount(0)
- , Verbose(verbose)
- {
- Y_ASSERT(Trie.Data);
- }
-
- void TTrieMeasurer::Measure() {
- if (Verbose) {
- Cerr << "Measuring the trie..." << Endl;
- }
- NodeCount = 0;
- UnminimizedNodeCount = 0;
- Depth = MeasureSubtrie(0, true);
- if (IsTree()) {
- ParentCounts.SetTreeMode();
- }
- if (Verbose) {
- Cerr << "Unminimized node count: " << UnminimizedNodeCount << Endl;
- Cerr << "Trie depth: " << Depth << Endl;
- Cerr << "Node count: " << NodeCount << Endl;
- }
- }
-
- // A chain of nodes linked by forward links
- // is considered one node with many left and right children
- // for depth measuring here and in
- // TVanEmdeBoasReverseNodeEnumerator::FindDescendants.
- size_t TTrieMeasurer::MeasureSubtrie(size_t rootOffset, bool isNewPath) {
- Y_ASSERT(rootOffset < Trie.Length);
- TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
- size_t depth = 0;
- for (;;) {
- ++UnminimizedNodeCount;
- if (Verbose) {
- ShowProgress(UnminimizedNodeCount);
- }
- if (isNewPath) {
- if (ParentCounts.Get(node.GetOffset()) > 0) {
- isNewPath = false;
- } else {
- ++NodeCount;
- }
- ParentCounts.Inc(node.GetOffset());
- }
- if (node.GetLeftOffset()) {
- depth = Max(depth, 1 + MeasureSubtrie(node.GetLeftOffset(), isNewPath));
- }
- if (node.GetRightOffset()) {
- depth = Max(depth, 1 + MeasureSubtrie(node.GetRightOffset(), isNewPath));
- }
- if (node.GetForwardOffset()) {
- node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
- } else {
- break;
- }
- }
- return depth;
+ explicit TTrieNodeCounts(const TOpaqueTrie& trie)
+ : Body(trie.Length / MinNodeSize, 0)
+ , IsTree(false)
+ {
+ }
+
+ size_t Get(size_t offset) const {
+ return IsTree ? 1 : Body.at(offset / MinNodeSize);
+ }
+
+ void Inc(size_t offset) {
+ if (IsTree) {
+ return;
+ }
+ ui8& count = Body.at(offset / MinNodeSize);
+ if (count != MaxCount) {
+ ++count;
+ }
+ }
+
+ size_t Dec(size_t offset) {
+ if (IsTree) {
+ return 0;
+ }
+ ui8& count = Body.at(offset / MinNodeSize);
+ Y_ASSERT(count > 0);
+ if (count != MaxCount) {
+ --count;
+ }
+ return count;
+ }
+
+ void Swap(TTrieNodeCounts& other) {
+ Body.swap(other.Body);
+ ::DoSwap(IsTree, other.IsTree);
+ }
+
+ void SetTreeMode() {
+ IsTree = true;
+ Body = TVector<ui8>();
+ }
+
+ private:
+ static const size_t MinNodeSize = 2;
+ static const ui8 MaxCount = 255;
+
+ TVector<ui8> Body;
+ bool IsTree = false;
+ };
+
+ //----------------------------------------------------------
+
+ class TOffsetIndex {
+ public:
+ // In all methods:
+ // Key --- offset from the beginning of the old trie.
+ // Value --- offset from the end of the new trie.
+
+ explicit TOffsetIndex(TTrieNodeCounts& counts) {
+ ParentCounts.Swap(counts);
+ }
+
+ void Add(size_t key, size_t value) {
+ Data[key] = value;
+ }
+
+ size_t Size() const {
+ return Data.size();
+ }
+
+ size_t Get(size_t key) {
+ auto pos = Data.find(key);
+ if (pos == Data.end()) {
+ ythrow yexception() << "Bad node walking order: trying to get node offset too early or too many times!";
+ }
+ size_t result = pos->second;
+ if (ParentCounts.Dec(key) == 0) {
+ // We don't need this offset any more.
+ Data.erase(pos);
+ }
+ return result;
+ }
+
+ private:
+ TTrieNodeCounts ParentCounts;
+ THashMap<size_t, size_t> Data;
+ };
+
+ //---------------------------------------------------------------------------------------
+
+ class TTrieMeasurer {
+ public:
+ TTrieMeasurer(const TOpaqueTrie& trie, bool verbose);
+ void Measure();
+
+ size_t GetDepth() const {
+ return Depth;
+ }
+
+ size_t GetNodeCount() const {
+ return NodeCount;
+ }
+
+ size_t GetUnminimizedNodeCount() const {
+ return UnminimizedNodeCount;
+ }
+
+ bool IsTree() const {
+ return NodeCount == UnminimizedNodeCount;
+ }
+
+ TTrieNodeCounts& GetParentCounts() {
+ return ParentCounts;
+ }
+
+ const TOpaqueTrie& GetTrie() const {
+ return Trie;
+ }
+
+ private:
+ const TOpaqueTrie& Trie;
+ size_t Depth;
+ TTrieNodeCounts ParentCounts;
+ size_t NodeCount;
+ size_t UnminimizedNodeCount;
+ const bool Verbose;
+
+ // returns depth, increments NodeCount.
+ size_t MeasureSubtrie(size_t rootOffset, bool isNewPath);
+ };
+
+ TTrieMeasurer::TTrieMeasurer(const TOpaqueTrie& trie, bool verbose)
+ : Trie(trie)
+ , Depth(0)
+ , ParentCounts(trie)
+ , NodeCount(0)
+ , UnminimizedNodeCount(0)
+ , Verbose(verbose)
+ {
+ Y_ASSERT(Trie.Data);
}
-
- //--------------------------------------------------------------------------------------
-
- using TLevelNodes = TVector<size_t>;
-
- struct TLevel {
- size_t Depth;
- TLevelNodes Nodes;
-
- explicit TLevel(size_t depth)
- : Depth(depth)
- {
- }
- };
-
- //----------------------------------------------------------------------------------------
-
- class TVanEmdeBoasReverseNodeEnumerator: public TReverseNodeEnumerator {
- public:
- TVanEmdeBoasReverseNodeEnumerator(TTrieMeasurer& measurer, bool verbose)
- : Fresh(true)
- , Trie(measurer.GetTrie())
- , Depth(measurer.GetDepth())
- , MaxIndexSize(0)
- , BackIndex(measurer.GetParentCounts())
- , ProcessedNodes(measurer.GetTrie())
- , Verbose(verbose)
- {
- }
-
- bool Move() override {
- if (!Fresh) {
- CentralWord.pop_back();
- }
- if (CentralWord.empty()) {
- return MoveCentralWordStart();
- }
- return true;
- }
-
- const TNode& Get() const {
- return CentralWord.back();
- }
-
- size_t GetLeafLength() const override {
- return Get().GetLeafLength();
- }
-
- // Returns recalculated offset from the end of the current node.
- size_t PrepareOffset(size_t absoffset, size_t resultLength) {
- if (!absoffset)
- return NPOS;
- return resultLength - BackIndex.Get(absoffset);
- }
-
- size_t RecreateNode(char* buffer, size_t resultLength) override {
- TWriteableNode newNode(Get(), Trie.Data);
- newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
- newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
- newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
-
- const size_t len = newNode.Pack(buffer);
- ProcessedNodes.Add(Get().GetOffset());
- BackIndex.Add(Get().GetOffset(), resultLength + len);
- MaxIndexSize = Max(MaxIndexSize, BackIndex.Size());
- return len;
- }
-
- private:
- bool Fresh;
- TOpaqueTrie Trie;
- size_t Depth;
- size_t MaxIndexSize;
-
- TVector<TLevel> Trace;
- TOffsetIndex BackIndex;
- TVector<TNode> CentralWord;
- TTrieNodeSet ProcessedNodes;
-
- const bool Verbose;
-
- private:
- bool IsVisited(size_t offset) const {
- return ProcessedNodes.Has(offset);
- }
-
- void AddCascade(size_t step) {
- Y_ASSERT(!(step & (step - 1))); // Should be a power of 2.
- while (step > 0) {
- size_t root = Trace.back().Nodes.back();
- TLevel level(Trace.back().Depth + step);
- Trace.push_back(level);
- size_t actualStep = FindSupportingPowerOf2(FindDescendants(root, step, Trace.back().Nodes));
- if (actualStep != step) {
- // Retry with a smaller step.
- Y_ASSERT(actualStep < step);
- step = actualStep;
- Trace.pop_back();
- } else {
- step /= 2;
- }
- }
- }
-
- void FillCentralWord() {
- CentralWord.clear();
- CentralWord.push_back(TNode(Trie.Data, Trace.back().Nodes.back(), Trie.SkipFunction));
- // Do not check for epsilon links, as the traversal order now is different.
- while (CentralWord.back().GetForwardOffset() && !IsVisited(CentralWord.back().GetForwardOffset())) {
- CentralWord.push_back(TNode(Trie.Data, CentralWord.back().GetForwardOffset(), Trie.SkipFunction));
- }
- }
-
- bool MoveCentralWordStart() {
- do {
- if (Fresh) {
- TLevel root(0);
- Trace.push_back(root);
- Trace.back().Nodes.push_back(0);
- const size_t sectionDepth = FindSupportingPowerOf2(Depth);
- AddCascade(sectionDepth);
- Fresh = false;
- } else {
- Trace.back().Nodes.pop_back();
- if (Trace.back().Nodes.empty() && Trace.size() == 1) {
- if (Verbose) {
- Cerr << "Max index size: " << MaxIndexSize << Endl;
- Cerr << "Current index size: " << BackIndex.Size() << Endl;
- }
- // We just popped the root.
- return false;
- }
- size_t lastStep = Trace.back().Depth - Trace[Trace.size() - 2].Depth;
- if (Trace.back().Nodes.empty()) {
- Trace.pop_back();
- }
- AddCascade(lastStep / 2);
- }
- } while (IsVisited(Trace.back().Nodes.back()));
- FillCentralWord();
- return true;
- }
-
- // Returns the maximal depth it has reached while searching.
- // This is a method because it needs OffsetIndex to skip processed nodes.
- size_t FindDescendants(size_t rootOffset, size_t depth, TLevelNodes& result) const {
- if (depth == 0) {
- result.push_back(rootOffset);
- return 0;
- }
- size_t actualDepth = 0;
- TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
- for (;;) {
- if (node.GetLeftOffset() && !IsVisited(node.GetLeftOffset())) {
- actualDepth = Max(actualDepth,
- 1 + FindDescendants(node.GetLeftOffset(), depth - 1, result));
+
+ void TTrieMeasurer::Measure() {
+ if (Verbose) {
+ Cerr << "Measuring the trie..." << Endl;
+ }
+ NodeCount = 0;
+ UnminimizedNodeCount = 0;
+ Depth = MeasureSubtrie(0, true);
+ if (IsTree()) {
+ ParentCounts.SetTreeMode();
+ }
+ if (Verbose) {
+ Cerr << "Unminimized node count: " << UnminimizedNodeCount << Endl;
+ Cerr << "Trie depth: " << Depth << Endl;
+ Cerr << "Node count: " << NodeCount << Endl;
+ }
+ }
+
+ // A chain of nodes linked by forward links
+ // is considered one node with many left and right children
+ // for depth measuring here and in
+ // TVanEmdeBoasReverseNodeEnumerator::FindDescendants.
+ size_t TTrieMeasurer::MeasureSubtrie(size_t rootOffset, bool isNewPath) {
+ Y_ASSERT(rootOffset < Trie.Length);
+ TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
+ size_t depth = 0;
+ for (;;) {
+ ++UnminimizedNodeCount;
+ if (Verbose) {
+ ShowProgress(UnminimizedNodeCount);
+ }
+ if (isNewPath) {
+ if (ParentCounts.Get(node.GetOffset()) > 0) {
+ isNewPath = false;
+ } else {
+ ++NodeCount;
+ }
+ ParentCounts.Inc(node.GetOffset());
+ }
+ if (node.GetLeftOffset()) {
+ depth = Max(depth, 1 + MeasureSubtrie(node.GetLeftOffset(), isNewPath));
+ }
+ if (node.GetRightOffset()) {
+ depth = Max(depth, 1 + MeasureSubtrie(node.GetRightOffset(), isNewPath));
+ }
+ if (node.GetForwardOffset()) {
+ node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
+ } else {
+ break;
+ }
+ }
+ return depth;
+ }
+
+ //--------------------------------------------------------------------------------------
+
+ using TLevelNodes = TVector<size_t>;
+
+ struct TLevel {
+ size_t Depth;
+ TLevelNodes Nodes;
+
+ explicit TLevel(size_t depth)
+ : Depth(depth)
+ {
+ }
+ };
+
+ //----------------------------------------------------------------------------------------
+
+ class TVanEmdeBoasReverseNodeEnumerator: public TReverseNodeEnumerator {
+ public:
+ TVanEmdeBoasReverseNodeEnumerator(TTrieMeasurer& measurer, bool verbose)
+ : Fresh(true)
+ , Trie(measurer.GetTrie())
+ , Depth(measurer.GetDepth())
+ , MaxIndexSize(0)
+ , BackIndex(measurer.GetParentCounts())
+ , ProcessedNodes(measurer.GetTrie())
+ , Verbose(verbose)
+ {
+ }
+
+ bool Move() override {
+ if (!Fresh) {
+ CentralWord.pop_back();
+ }
+ if (CentralWord.empty()) {
+ return MoveCentralWordStart();
+ }
+ return true;
+ }
+
+ const TNode& Get() const {
+ return CentralWord.back();
+ }
+
+ size_t GetLeafLength() const override {
+ return Get().GetLeafLength();
+ }
+
+ // Returns recalculated offset from the end of the current node.
+ size_t PrepareOffset(size_t absoffset, size_t resultLength) {
+ if (!absoffset)
+ return NPOS;
+ return resultLength - BackIndex.Get(absoffset);
+ }
+
+ size_t RecreateNode(char* buffer, size_t resultLength) override {
+ TWriteableNode newNode(Get(), Trie.Data);
+ newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
+ newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
+ newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
+
+ const size_t len = newNode.Pack(buffer);
+ ProcessedNodes.Add(Get().GetOffset());
+ BackIndex.Add(Get().GetOffset(), resultLength + len);
+ MaxIndexSize = Max(MaxIndexSize, BackIndex.Size());
+ return len;
+ }
+
+ private:
+ bool Fresh;
+ TOpaqueTrie Trie;
+ size_t Depth;
+ size_t MaxIndexSize;
+
+ TVector<TLevel> Trace;
+ TOffsetIndex BackIndex;
+ TVector<TNode> CentralWord;
+ TTrieNodeSet ProcessedNodes;
+
+ const bool Verbose;
+
+ private:
+ bool IsVisited(size_t offset) const {
+ return ProcessedNodes.Has(offset);
+ }
+
+ void AddCascade(size_t step) {
+ Y_ASSERT(!(step & (step - 1))); // Should be a power of 2.
+ while (step > 0) {
+ size_t root = Trace.back().Nodes.back();
+ TLevel level(Trace.back().Depth + step);
+ Trace.push_back(level);
+ size_t actualStep = FindSupportingPowerOf2(FindDescendants(root, step, Trace.back().Nodes));
+ if (actualStep != step) {
+ // Retry with a smaller step.
+ Y_ASSERT(actualStep < step);
+ step = actualStep;
+ Trace.pop_back();
+ } else {
+ step /= 2;
}
- if (node.GetRightOffset() && !IsVisited(node.GetRightOffset())) {
- actualDepth = Max(actualDepth,
- 1 + FindDescendants(node.GetRightOffset(), depth - 1, result));
- }
- if (node.GetForwardOffset() && !IsVisited(node.GetForwardOffset())) {
- node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
- } else {
- break;
- }
}
- return actualDepth;
}
- };
- } // Anonymous namespace.
-
- //-----------------------------------------------------------------------------------
-
- size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const TOpaqueTrie& trie, bool verbose) {
- if (!trie.Data || !trie.Length) {
+ void FillCentralWord() {
+ CentralWord.clear();
+ CentralWord.push_back(TNode(Trie.Data, Trace.back().Nodes.back(), Trie.SkipFunction));
+ // Do not check for epsilon links, as the traversal order now is different.
+ while (CentralWord.back().GetForwardOffset() && !IsVisited(CentralWord.back().GetForwardOffset())) {
+ CentralWord.push_back(TNode(Trie.Data, CentralWord.back().GetForwardOffset(), Trie.SkipFunction));
+ }
+ }
+
+ bool MoveCentralWordStart() {
+ do {
+ if (Fresh) {
+ TLevel root(0);
+ Trace.push_back(root);
+ Trace.back().Nodes.push_back(0);
+ const size_t sectionDepth = FindSupportingPowerOf2(Depth);
+ AddCascade(sectionDepth);
+ Fresh = false;
+ } else {
+ Trace.back().Nodes.pop_back();
+ if (Trace.back().Nodes.empty() && Trace.size() == 1) {
+ if (Verbose) {
+ Cerr << "Max index size: " << MaxIndexSize << Endl;
+ Cerr << "Current index size: " << BackIndex.Size() << Endl;
+ }
+ // We just popped the root.
+ return false;
+ }
+ size_t lastStep = Trace.back().Depth - Trace[Trace.size() - 2].Depth;
+ if (Trace.back().Nodes.empty()) {
+ Trace.pop_back();
+ }
+ AddCascade(lastStep / 2);
+ }
+ } while (IsVisited(Trace.back().Nodes.back()));
+ FillCentralWord();
+ return true;
+ }
+
+ // Returns the maximal depth it has reached while searching.
+ // This is a method because it needs OffsetIndex to skip processed nodes.
+ size_t FindDescendants(size_t rootOffset, size_t depth, TLevelNodes& result) const {
+ if (depth == 0) {
+ result.push_back(rootOffset);
+ return 0;
+ }
+ size_t actualDepth = 0;
+ TNode node(Trie.Data, rootOffset, Trie.SkipFunction);
+ for (;;) {
+ if (node.GetLeftOffset() && !IsVisited(node.GetLeftOffset())) {
+ actualDepth = Max(actualDepth,
+ 1 + FindDescendants(node.GetLeftOffset(), depth - 1, result));
+ }
+ if (node.GetRightOffset() && !IsVisited(node.GetRightOffset())) {
+ actualDepth = Max(actualDepth,
+ 1 + FindDescendants(node.GetRightOffset(), depth - 1, result));
+ }
+ if (node.GetForwardOffset() && !IsVisited(node.GetForwardOffset())) {
+ node = TNode(Trie.Data, node.GetForwardOffset(), Trie.SkipFunction);
+ } else {
+ break;
+ }
+ }
+ return actualDepth;
+ }
+ };
+
+ } // Anonymous namespace.
+
+ //-----------------------------------------------------------------------------------
+
+ size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const TOpaqueTrie& trie, bool verbose) {
+ if (!trie.Data || !trie.Length) {
return 0;
}
- TTrieMeasurer mes(trie, verbose);
- mes.Measure();
- TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose);
- return WriteTrieBackwards(os, enumerator, verbose);
+ TTrieMeasurer mes(trie, verbose);
+ mes.Measure();
+ TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose);
+ return WriteTrieBackwards(os, enumerator, verbose);
}
-
+
}
diff --git a/library/cpp/containers/comptrie/make_fast_layout.h b/library/cpp/containers/comptrie/make_fast_layout.h
index cb488ffaa5..b8fab5d65b 100644
--- a/library/cpp/containers/comptrie/make_fast_layout.h
+++ b/library/cpp/containers/comptrie/make_fast_layout.h
@@ -6,15 +6,15 @@
class IOutputStream;
namespace NCompactTrie {
- // Return value: size of the resulting trie.
- size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const NCompactTrie::TOpaqueTrie& trie, bool verbose);
+ // Return value: size of the resulting trie.
+ size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const NCompactTrie::TOpaqueTrie& trie, bool verbose);
+
+ // Return value: size of the resulting trie.
+ template <class TPacker>
+ size_t CompactTrieMakeFastLayoutImpl(IOutputStream& os, const char* data, size_t datalength, bool verbose, const TPacker* packer) {
+ TPackerLeafSkipper<TPacker> skipper(packer);
+ TOpaqueTrie trie(data, datalength, skipper);
+ return RawCompactTrieFastLayoutImpl(os, trie, verbose);
+ }
- // Return value: size of the resulting trie.
- template <class TPacker>
- size_t CompactTrieMakeFastLayoutImpl(IOutputStream& os, const char* data, size_t datalength, bool verbose, const TPacker* packer) {
- TPackerLeafSkipper<TPacker> skipper(packer);
- TOpaqueTrie trie(data, datalength, skipper);
- return RawCompactTrieFastLayoutImpl(os, trie, verbose);
- }
-
}
diff --git a/library/cpp/containers/comptrie/minimize.cpp b/library/cpp/containers/comptrie/minimize.cpp
index f53511428f..80d0b25217 100644
--- a/library/cpp/containers/comptrie/minimize.cpp
+++ b/library/cpp/containers/comptrie/minimize.cpp
@@ -8,352 +8,352 @@
#include <util/generic/algorithm.h>
namespace NCompactTrie {
- // Minimize the trie. The result is equivalent to the original
- // trie, except that it takes less space (and has marginally lower
- // performance, because of eventual epsilon links).
- // The algorithm is as follows: starting from the largest pieces, we find
- // nodes that have identical continuations (Daciuk's right language),
- // and repack the trie. Repacking is done in-place, so memory is less
- // of an issue; however, it may take considerable time.
-
- // IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout.
- // Because of non-local structure and epsilon links, it won't work
- // as you expect it to, and can destroy the trie in the making.
-
- namespace {
- using TOffsetList = TVector<size_t>;
- using TPieceIndex = THashMap<size_t, TOffsetList>;
-
- using TSizePair = std::pair<size_t, size_t>;
- using TSizePairVector = TVector<TSizePair>;
- using TSizePairVectorVector = TVector<TSizePairVector>;
-
- class TOffsetMap {
- protected:
- TSizePairVectorVector Data;
-
- public:
- TOffsetMap() {
- Data.reserve(0x10000);
- }
-
- void Add(size_t key, size_t value) {
- size_t hikey = key & 0xFFFF;
-
- if (Data.size() <= hikey)
- Data.resize(hikey + 1);
-
- TSizePairVector& sublist = Data[hikey];
-
- for (auto& it : sublist) {
- if (it.first == key) {
- it.second = value;
-
- return;
- }
- }
-
- sublist.push_back(TSizePair(key, value));
- }
-
- bool Contains(size_t key) const {
- return (Get(key) != 0);
+ // Minimize the trie. The result is equivalent to the original
+ // trie, except that it takes less space (and has marginally lower
+ // performance, because of eventual epsilon links).
+ // The algorithm is as follows: starting from the largest pieces, we find
+ // nodes that have identical continuations (Daciuk's right language),
+ // and repack the trie. Repacking is done in-place, so memory is less
+ // of an issue; however, it may take considerable time.
+
+ // IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout.
+ // Because of non-local structure and epsilon links, it won't work
+ // as you expect it to, and can destroy the trie in the making.
+
+ namespace {
+ using TOffsetList = TVector<size_t>;
+ using TPieceIndex = THashMap<size_t, TOffsetList>;
+
+ using TSizePair = std::pair<size_t, size_t>;
+ using TSizePairVector = TVector<TSizePair>;
+ using TSizePairVectorVector = TVector<TSizePairVector>;
+
+ class TOffsetMap {
+ protected:
+ TSizePairVectorVector Data;
+
+ public:
+ TOffsetMap() {
+ Data.reserve(0x10000);
}
-
- size_t Get(size_t key) const {
- size_t hikey = key & 0xFFFF;
-
- if (Data.size() <= hikey)
- return 0;
-
- const TSizePairVector& sublist = Data[hikey];
-
- for (const auto& it : sublist) {
- if (it.first == key)
- return it.second;
- }
-
- return 0;
- }
- };
-
- class TOffsetDeltas {
- protected:
- TSizePairVector Data;
-
- public:
- void Add(size_t key, size_t value) {
- if (Data.empty()) {
- if (key == value)
- return; // no offset
- } else {
- TSizePair last = Data.back();
-
- if (key <= last.first) {
- Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl;
-
- return;
- }
-
- if (last.first + value == last.second + key)
- return; // same offset
- }
-
- Data.push_back(TSizePair(key, value));
+
+ void Add(size_t key, size_t value) {
+ size_t hikey = key & 0xFFFF;
+
+ if (Data.size() <= hikey)
+ Data.resize(hikey + 1);
+
+ TSizePairVector& sublist = Data[hikey];
+
+ for (auto& it : sublist) {
+ if (it.first == key) {
+ it.second = value;
+
+ return;
+ }
+ }
+
+ sublist.push_back(TSizePair(key, value));
+ }
+
+ bool Contains(size_t key) const {
+ return (Get(key) != 0);
+ }
+
+ size_t Get(size_t key) const {
+ size_t hikey = key & 0xFFFF;
+
+ if (Data.size() <= hikey)
+ return 0;
+
+ const TSizePairVector& sublist = Data[hikey];
+
+ for (const auto& it : sublist) {
+ if (it.first == key)
+ return it.second;
+ }
+
+ return 0;
+ }
+ };
+
+ class TOffsetDeltas {
+ protected:
+ TSizePairVector Data;
+
+ public:
+ void Add(size_t key, size_t value) {
+ if (Data.empty()) {
+ if (key == value)
+ return; // no offset
+ } else {
+ TSizePair last = Data.back();
+
+ if (key <= last.first) {
+ Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl;
+
+ return;
+ }
+
+ if (last.first + value == last.second + key)
+ return; // same offset
+ }
+
+ Data.push_back(TSizePair(key, value));
+ }
+
+ size_t Get(size_t key) const {
+ if (Data.empty())
+ return key; // difference is zero;
+
+ if (key < Data.front().first)
+ return key;
+
+ // Binary search for the highest entry in the list that does not exceed the key
+ size_t from = 0;
+ size_t to = Data.size() - 1;
+
+ while (from < to) {
+ size_t midpoint = (from + to + 1) / 2;
+
+ if (key < Data[midpoint].first)
+ to = midpoint - 1;
+ else
+ from = midpoint;
+ }
+
+ TSizePair entry = Data[from];
+
+ return key - entry.first + entry.second;
+ }
+ };
+
+ class TPieceComparer {
+ private:
+ const char* Data;
+ const size_t Length;
+
+ public:
+ TPieceComparer(const char* buf, size_t len)
+ : Data(buf)
+ , Length(len)
+ {
}
-
- size_t Get(size_t key) const {
- if (Data.empty())
- return key; // difference is zero;
-
- if (key < Data.front().first)
- return key;
-
- // Binary search for the highest entry in the list that does not exceed the key
- size_t from = 0;
- size_t to = Data.size() - 1;
-
- while (from < to) {
- size_t midpoint = (from + to + 1) / 2;
-
- if (key < Data[midpoint].first)
- to = midpoint - 1;
- else
- from = midpoint;
- }
-
- TSizePair entry = Data[from];
-
- return key - entry.first + entry.second;
- }
- };
-
- class TPieceComparer {
- private:
- const char* Data;
- const size_t Length;
-
- public:
- TPieceComparer(const char* buf, size_t len)
- : Data(buf)
- , Length(len)
- {
- }
-
- bool operator()(size_t p1, const size_t p2) {
- int res = memcmp(Data + p1, Data + p2, Length);
-
- if (res)
- return (res > 0);
-
- return (p1 > p2); // the pieces are sorted in the reverse order of appearance
- }
- };
-
- struct TBranchPoint {
- TNode Node;
- int Selector;
-
- public:
- TBranchPoint()
- : Selector(0)
- {
- }
-
- TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction)
- : Node(data, offset, skipFunction)
- , Selector(0)
- {
- }
-
- bool IsFinal() const {
- return Node.IsFinal();
- }
-
- // NextNode returns child nodes, starting from the last node: Right, then Left, then Forward
- size_t NextNode(const TOffsetMap& mergedNodes) {
- while (Selector < 3) {
- size_t nextOffset = 0;
-
- switch (++Selector) {
- case 1:
- if (Node.GetRightOffset())
- nextOffset = Node.GetRightOffset();
- break;
-
- case 2:
- if (Node.GetLeftOffset())
- nextOffset = Node.GetLeftOffset();
- break;
-
- case 3:
- if (Node.GetForwardOffset())
- nextOffset = Node.GetForwardOffset();
- break;
-
- default:
- break;
- }
-
- if (nextOffset && !mergedNodes.Contains(nextOffset))
- return nextOffset;
- }
- return 0;
+
+ bool operator()(size_t p1, const size_t p2) {
+ int res = memcmp(Data + p1, Data + p2, Length);
+
+ if (res)
+ return (res > 0);
+
+ return (p1 > p2); // the pieces are sorted in the reverse order of appearance
+ }
+ };
+
+ struct TBranchPoint {
+ TNode Node;
+ int Selector;
+
+ public:
+ TBranchPoint()
+ : Selector(0)
+ {
}
- };
-
- class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator {
- private:
- bool Fresh;
- TOpaqueTrie Trie;
- const TOffsetMap& MergeMap;
- TVector<TBranchPoint> Trace;
- TOffsetDeltas OffsetIndex;
-
- public:
- TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers)
- : Fresh(true)
- , Trie(trie)
- , MergeMap(mergers)
- {
- }
-
- bool Move() override {
- if (Fresh) {
- Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction));
- Fresh = false;
- } else {
- if (Trace.empty())
- return false;
-
- Trace.pop_back();
-
- if (Trace.empty())
- return false;
- }
-
- size_t nextnode = Trace.back().NextNode(MergeMap);
-
- while (nextnode) {
- Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction));
- nextnode = Trace.back().NextNode(MergeMap);
- }
-
- return (!Trace.empty());
- }
-
- const TNode& Get() const {
- return Trace.back().Node;
- }
- size_t GetLeafLength() const override {
- return Get().GetLeafLength();
- }
-
- // Returns recalculated offset from the end of the current node
- size_t PrepareOffset(size_t absoffset, size_t minilength) {
- if (!absoffset)
- return NPOS;
-
- if (MergeMap.Contains(absoffset))
- absoffset = MergeMap.Get(absoffset);
- return minilength - OffsetIndex.Get(Trie.Length - absoffset);
- }
-
- size_t RecreateNode(char* buffer, size_t resultLength) override {
- TWriteableNode newNode(Get(), Trie.Data);
- newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
- newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
- newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
-
- if (!buffer)
- return newNode.Measure();
-
- const size_t len = newNode.Pack(buffer);
- OffsetIndex.Add(Trie.Length - Get().GetOffset(), resultLength + len);
-
- return len;
- }
- };
-
+
+ TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction)
+ : Node(data, offset, skipFunction)
+ , Selector(0)
+ {
+ }
+
+ bool IsFinal() const {
+ return Node.IsFinal();
+ }
+
+ // NextNode returns child nodes, starting from the last node: Right, then Left, then Forward
+ size_t NextNode(const TOffsetMap& mergedNodes) {
+ while (Selector < 3) {
+ size_t nextOffset = 0;
+
+ switch (++Selector) {
+ case 1:
+ if (Node.GetRightOffset())
+ nextOffset = Node.GetRightOffset();
+ break;
+
+ case 2:
+ if (Node.GetLeftOffset())
+ nextOffset = Node.GetLeftOffset();
+ break;
+
+ case 3:
+ if (Node.GetForwardOffset())
+ nextOffset = Node.GetForwardOffset();
+ break;
+
+ default:
+ break;
+ }
+
+ if (nextOffset && !mergedNodes.Contains(nextOffset))
+ return nextOffset;
+ }
+ return 0;
+ }
+ };
+
+ class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator {
+ private:
+ bool Fresh;
+ TOpaqueTrie Trie;
+ const TOffsetMap& MergeMap;
+ TVector<TBranchPoint> Trace;
+ TOffsetDeltas OffsetIndex;
+
+ public:
+ TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers)
+ : Fresh(true)
+ , Trie(trie)
+ , MergeMap(mergers)
+ {
+ }
+
+ bool Move() override {
+ if (Fresh) {
+ Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction));
+ Fresh = false;
+ } else {
+ if (Trace.empty())
+ return false;
+
+ Trace.pop_back();
+
+ if (Trace.empty())
+ return false;
+ }
+
+ size_t nextnode = Trace.back().NextNode(MergeMap);
+
+ while (nextnode) {
+ Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction));
+ nextnode = Trace.back().NextNode(MergeMap);
+ }
+
+ return (!Trace.empty());
+ }
+
+ const TNode& Get() const {
+ return Trace.back().Node;
+ }
+ size_t GetLeafLength() const override {
+ return Get().GetLeafLength();
+ }
+
+ // Returns recalculated offset from the end of the current node
+ size_t PrepareOffset(size_t absoffset, size_t minilength) {
+ if (!absoffset)
+ return NPOS;
+
+ if (MergeMap.Contains(absoffset))
+ absoffset = MergeMap.Get(absoffset);
+ return minilength - OffsetIndex.Get(Trie.Length - absoffset);
+ }
+
+ size_t RecreateNode(char* buffer, size_t resultLength) override {
+ TWriteableNode newNode(Get(), Trie.Data);
+ newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
+ newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
+ newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
+
+ if (!buffer)
+ return newNode.Measure();
+
+ const size_t len = newNode.Pack(buffer);
+ OffsetIndex.Add(Trie.Length - Get().GetOffset(), resultLength + len);
+
+ return len;
+ }
+ };
+
}
- static void AddPiece(TPieceIndex& index, size_t offset, size_t len) {
- index[len].push_back(offset);
- }
-
- static TOffsetMap FindEquivalentSubtries(const TOpaqueTrie& trie, bool verbose, size_t minMergeSize) {
- // Tree nodes, arranged by span length.
- // When all nodes of a given size are considered, they pop off the queue.
- TPieceIndex subtries;
- TOffsetMap merger;
- // Start walking the trie from head.
- AddPiece(subtries, 0, trie.Length);
-
- size_t counter = 0;
- // Now consider all nodes with sizeable continuations
- for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) {
- TPieceIndex::iterator iit = subtries.find(curlen);
-
- if (iit == subtries.end())
- continue; // fast forward to the next available length value
-
- TOffsetList& batch = iit->second;
- TPieceComparer comparer(trie.Data, curlen);
- Sort(batch.begin(), batch.end(), comparer);
-
- TOffsetList::iterator it = batch.begin();
- while (it != batch.end()) {
- if (verbose)
- ShowProgress(++counter);
-
- size_t offset = *it;
-
- // Fill the array with the subnodes of the element
- TNode node(trie.Data, offset, trie.SkipFunction);
- size_t end = offset + curlen;
- if (size_t rightOffset = node.GetRightOffset()) {
- AddPiece(subtries, rightOffset, end - rightOffset);
- end = rightOffset;
- }
- if (size_t leftOffset = node.GetLeftOffset()) {
- AddPiece(subtries, leftOffset, end - leftOffset);
- end = leftOffset;
- }
- if (size_t forwardOffset = node.GetForwardOffset()) {
- AddPiece(subtries, forwardOffset, end - forwardOffset);
- }
-
- while (++it != batch.end()) {
- // Find next different; until then, just add the offsets to the list of merged nodes.
- size_t nextoffset = *it;
-
- if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen))
- break;
-
- merger.Add(nextoffset, offset);
- }
- }
-
- subtries.erase(curlen);
+ static void AddPiece(TPieceIndex& index, size_t offset, size_t len) {
+ index[len].push_back(offset);
+ }
+
+ static TOffsetMap FindEquivalentSubtries(const TOpaqueTrie& trie, bool verbose, size_t minMergeSize) {
+ // Tree nodes, arranged by span length.
+ // When all nodes of a given size are considered, they pop off the queue.
+ TPieceIndex subtries;
+ TOffsetMap merger;
+ // Start walking the trie from head.
+ AddPiece(subtries, 0, trie.Length);
+
+ size_t counter = 0;
+ // Now consider all nodes with sizeable continuations
+ for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) {
+ TPieceIndex::iterator iit = subtries.find(curlen);
+
+ if (iit == subtries.end())
+ continue; // fast forward to the next available length value
+
+ TOffsetList& batch = iit->second;
+ TPieceComparer comparer(trie.Data, curlen);
+ Sort(batch.begin(), batch.end(), comparer);
+
+ TOffsetList::iterator it = batch.begin();
+ while (it != batch.end()) {
+ if (verbose)
+ ShowProgress(++counter);
+
+ size_t offset = *it;
+
+ // Fill the array with the subnodes of the element
+ TNode node(trie.Data, offset, trie.SkipFunction);
+ size_t end = offset + curlen;
+ if (size_t rightOffset = node.GetRightOffset()) {
+ AddPiece(subtries, rightOffset, end - rightOffset);
+ end = rightOffset;
+ }
+ if (size_t leftOffset = node.GetLeftOffset()) {
+ AddPiece(subtries, leftOffset, end - leftOffset);
+ end = leftOffset;
+ }
+ if (size_t forwardOffset = node.GetForwardOffset()) {
+ AddPiece(subtries, forwardOffset, end - forwardOffset);
+ }
+
+ while (++it != batch.end()) {
+ // Find next different; until then, just add the offsets to the list of merged nodes.
+ size_t nextoffset = *it;
+
+ if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen))
+ break;
+
+ merger.Add(nextoffset, offset);
+ }
+ }
+
+ subtries.erase(curlen);
+ }
+ if (verbose) {
+ Cerr << counter << Endl;
}
- if (verbose) {
- Cerr << counter << Endl;
- }
- return merger;
+ return merger;
}
-
- size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) {
- if (!trie.Data || !trie.Length) {
- return 0;
- }
-
- TOffsetMap merger = FindEquivalentSubtries(trie, verbose, minMergeSize);
- TMergingReverseNodeEnumerator enumerator(trie, merger);
-
- if (mode == MM_DEFAULT)
- return WriteTrieBackwards(os, enumerator, verbose);
- else
- return WriteTrieBackwardsNoAlloc(os, enumerator, trie, mode);
+
+ size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) {
+ if (!trie.Data || !trie.Length) {
+ return 0;
+ }
+
+ TOffsetMap merger = FindEquivalentSubtries(trie, verbose, minMergeSize);
+ TMergingReverseNodeEnumerator enumerator(trie, merger);
+
+ if (mode == MM_DEFAULT)
+ return WriteTrieBackwards(os, enumerator, verbose);
+ else
+ return WriteTrieBackwardsNoAlloc(os, enumerator, trie, mode);
}
}
diff --git a/library/cpp/containers/comptrie/minimize.h b/library/cpp/containers/comptrie/minimize.h
index 2464571437..baaa431d04 100644
--- a/library/cpp/containers/comptrie/minimize.h
+++ b/library/cpp/containers/comptrie/minimize.h
@@ -6,24 +6,24 @@
class IOutputStream;
namespace NCompactTrie {
- size_t MeasureOffset(size_t offset);
+ size_t MeasureOffset(size_t offset);
- enum EMinimizeMode {
- MM_DEFAULT, // alollocate new memory for minimized tree
- MM_NOALLOC, // minimize tree in the same buffer
- MM_INPLACE // do not write tree to the stream, but move to the buffer beginning
- };
+ enum EMinimizeMode {
+ MM_DEFAULT, // alollocate new memory for minimized tree
+ MM_NOALLOC, // minimize tree in the same buffer
+ MM_INPLACE // do not write tree to the stream, but move to the buffer beginning
+ };
- // Return value: size of the minimized trie.
- size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode);
+ // Return value: size of the minimized trie.
+ size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode);
- // Return value: size of the minimized trie.
- template <class TPacker>
- size_t CompactTrieMinimizeImpl(IOutputStream& os, const char* data, size_t datalength, bool verbose, const TPacker* packer, EMinimizeMode mode) {
- TPackerLeafSkipper<TPacker> skipper(packer);
- size_t minmerge = MeasureOffset(datalength);
- TOpaqueTrie trie(data, datalength, skipper);
- return RawCompactTrieMinimizeImpl(os, trie, verbose, minmerge, mode);
- }
+ // Return value: size of the minimized trie.
+ template <class TPacker>
+ size_t CompactTrieMinimizeImpl(IOutputStream& os, const char* data, size_t datalength, bool verbose, const TPacker* packer, EMinimizeMode mode) {
+ TPackerLeafSkipper<TPacker> skipper(packer);
+ size_t minmerge = MeasureOffset(datalength);
+ TOpaqueTrie trie(data, datalength, skipper);
+ return RawCompactTrieMinimizeImpl(os, trie, verbose, minmerge, mode);
+ }
}
diff --git a/library/cpp/containers/comptrie/node.cpp b/library/cpp/containers/comptrie/node.cpp
index cd98e6b4d6..5fd22f15ec 100644
--- a/library/cpp/containers/comptrie/node.cpp
+++ b/library/cpp/containers/comptrie/node.cpp
@@ -6,74 +6,74 @@
#include <util/generic/yexception.h>
namespace NCompactTrie {
- TNode::TNode()
- : Offset(0)
- , LeafLength(0)
- , CoreLength(0)
- , Label(0)
- {
- for (auto& offset : Offsets) {
- offset = 0;
- }
+ TNode::TNode()
+ : Offset(0)
+ , LeafLength(0)
+ , CoreLength(0)
+ , Label(0)
+ {
+ for (auto& offset : Offsets) {
+ offset = 0;
+ }
}
- // We believe that epsilon links are found only on the forward-position and that afer jumping an epsilon link you come to an ordinary node.
-
- TNode::TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction)
- : Offset(offset)
- , LeafLength(0)
- , CoreLength(0)
- , Label(0)
- {
- for (auto& anOffset : Offsets) {
- anOffset = 0;
- }
- if (!data)
- return; // empty constructor
-
- const char* datapos = data + offset;
- char flags = *(datapos++);
- Y_ASSERT(!IsEpsilonLink(flags));
- Label = *(datapos++);
+ // We believe that epsilon links are found only on the forward-position and that afer jumping an epsilon link you come to an ordinary node.
- size_t leftsize = LeftOffsetLen(flags);
- size_t& leftOffset = Offsets[D_LEFT];
- leftOffset = UnpackOffset(datapos, leftsize);
- if (leftOffset) {
- leftOffset += Offset;
- }
- datapos += leftsize;
+ TNode::TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction)
+ : Offset(offset)
+ , LeafLength(0)
+ , CoreLength(0)
+ , Label(0)
+ {
+ for (auto& anOffset : Offsets) {
+ anOffset = 0;
+ }
+ if (!data)
+ return; // empty constructor
- size_t rightsize = RightOffsetLen(flags);
- size_t& rightOffset = Offsets[D_RIGHT];
- rightOffset = UnpackOffset(datapos, rightsize);
- if (rightOffset) {
- rightOffset += Offset;
- }
- datapos += rightsize;
-
- if (flags & MT_FINAL) {
- Offsets[D_FINAL] = datapos - data;
- LeafLength = skipFunction.SkipLeaf(datapos);
- }
+ const char* datapos = data + offset;
+ char flags = *(datapos++);
+ Y_ASSERT(!IsEpsilonLink(flags));
+ Label = *(datapos++);
- CoreLength = 2 + leftsize + rightsize + LeafLength;
- if (flags & MT_NEXT) {
- size_t& forwardOffset = Offsets[D_NEXT];
- forwardOffset = Offset + CoreLength;
- // There might be an epsilon link at the forward position.
- // If so, set the ForwardOffset to the value that points to the link's end.
- const char* forwardPos = data + forwardOffset;
- const char forwardFlags = *forwardPos;
- if (IsEpsilonLink(forwardFlags)) {
- // Jump through the epsilon link.
- size_t epsilonOffset = UnpackOffset(forwardPos + 1, forwardFlags & MT_SIZEMASK);
- if (!epsilonOffset) {
- ythrow yexception() << "Corrupted epsilon link";
- }
- forwardOffset += epsilonOffset;
+ size_t leftsize = LeftOffsetLen(flags);
+ size_t& leftOffset = Offsets[D_LEFT];
+ leftOffset = UnpackOffset(datapos, leftsize);
+ if (leftOffset) {
+ leftOffset += Offset;
+ }
+ datapos += leftsize;
+
+ size_t rightsize = RightOffsetLen(flags);
+ size_t& rightOffset = Offsets[D_RIGHT];
+ rightOffset = UnpackOffset(datapos, rightsize);
+ if (rightOffset) {
+ rightOffset += Offset;
+ }
+ datapos += rightsize;
+
+ if (flags & MT_FINAL) {
+ Offsets[D_FINAL] = datapos - data;
+ LeafLength = skipFunction.SkipLeaf(datapos);
+ }
+
+ CoreLength = 2 + leftsize + rightsize + LeafLength;
+ if (flags & MT_NEXT) {
+ size_t& forwardOffset = Offsets[D_NEXT];
+ forwardOffset = Offset + CoreLength;
+ // There might be an epsilon link at the forward position.
+ // If so, set the ForwardOffset to the value that points to the link's end.
+ const char* forwardPos = data + forwardOffset;
+ const char forwardFlags = *forwardPos;
+ if (IsEpsilonLink(forwardFlags)) {
+ // Jump through the epsilon link.
+ size_t epsilonOffset = UnpackOffset(forwardPos + 1, forwardFlags & MT_SIZEMASK);
+ if (!epsilonOffset) {
+ ythrow yexception() << "Corrupted epsilon link";
+ }
+ forwardOffset += epsilonOffset;
}
}
}
-
+
}
diff --git a/library/cpp/containers/comptrie/node.h b/library/cpp/containers/comptrie/node.h
index 9a936998b5..d6f4317db0 100644
--- a/library/cpp/containers/comptrie/node.h
+++ b/library/cpp/containers/comptrie/node.h
@@ -3,78 +3,78 @@
#include <cstddef>
namespace NCompactTrie {
- class ILeafSkipper;
+ class ILeafSkipper;
- enum TDirection {
- D_LEFT,
- D_FINAL,
- D_NEXT,
- D_RIGHT,
- D_MAX
- };
-
- inline TDirection& operator++(TDirection& direction) {
- direction = static_cast<TDirection>(direction + 1);
- return direction;
- }
+ enum TDirection {
+ D_LEFT,
+ D_FINAL,
+ D_NEXT,
+ D_RIGHT,
+ D_MAX
+ };
- inline TDirection& operator--(TDirection& direction) {
- direction = static_cast<TDirection>(direction - 1);
- return direction;
- }
+ inline TDirection& operator++(TDirection& direction) {
+ direction = static_cast<TDirection>(direction + 1);
+ return direction;
+ }
- class TNode {
- public:
- TNode();
- // Processes epsilon links and sets ForwardOffset to correct value. Assumes an epsilon link doesn't point to an epsilon link.
- TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction);
+ inline TDirection& operator--(TDirection& direction) {
+ direction = static_cast<TDirection>(direction - 1);
+ return direction;
+ }
- size_t GetOffset() const {
- return Offset;
- }
+ class TNode {
+ public:
+ TNode();
+ // Processes epsilon links and sets ForwardOffset to correct value. Assumes an epsilon link doesn't point to an epsilon link.
+ TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction);
- size_t GetLeafOffset() const {
- return Offsets[D_FINAL];
- }
- size_t GetLeafLength() const {
- return LeafLength;
- }
- size_t GetCoreLength() const {
- return CoreLength;
- }
+ size_t GetOffset() const {
+ return Offset;
+ }
- size_t GetOffsetByDirection(TDirection direction) const {
- return Offsets[direction];
- }
+ size_t GetLeafOffset() const {
+ return Offsets[D_FINAL];
+ }
+ size_t GetLeafLength() const {
+ return LeafLength;
+ }
+ size_t GetCoreLength() const {
+ return CoreLength;
+ }
- size_t GetForwardOffset() const {
- return Offsets[D_NEXT];
- }
- size_t GetLeftOffset() const {
- return Offsets[D_LEFT];
- }
- size_t GetRightOffset() const {
- return Offsets[D_RIGHT];
- }
- char GetLabel() const {
- return Label;
- }
+ size_t GetOffsetByDirection(TDirection direction) const {
+ return Offsets[direction];
+ }
- bool IsFinal() const {
- return GetLeafOffset() != 0;
- }
+ size_t GetForwardOffset() const {
+ return Offsets[D_NEXT];
+ }
+ size_t GetLeftOffset() const {
+ return Offsets[D_LEFT];
+ }
+ size_t GetRightOffset() const {
+ return Offsets[D_RIGHT];
+ }
+ char GetLabel() const {
+ return Label;
+ }
- bool HasEpsilonLinkForward() const {
- return GetForwardOffset() > Offset + CoreLength;
- }
-
- private:
- size_t Offsets[D_MAX];
- size_t Offset;
- size_t LeafLength;
- size_t CoreLength;
+ bool IsFinal() const {
+ return GetLeafOffset() != 0;
+ }
- char Label;
- };
+ bool HasEpsilonLinkForward() const {
+ return GetForwardOffset() > Offset + CoreLength;
+ }
-}
+ private:
+ size_t Offsets[D_MAX];
+ size_t Offset;
+ size_t LeafLength;
+ size_t CoreLength;
+
+ char Label;
+ };
+
+}
diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
index d1916358aa..5fd3914be6 100644
--- a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
+++ b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
@@ -3,229 +3,229 @@
#include "node.h"
namespace NCompactTrie {
- TOpaqueTrieIterator::TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
- size_t maxKeyLength)
- : Trie(trie)
- , EmptyValue(emptyValue)
- , AtEmptyValue(emptyValue && !atend)
- , MaxKeyLength(maxKeyLength)
- {
- if (!AtEmptyValue && !atend)
- Forward();
- }
-
- bool TOpaqueTrieIterator::operator==(const TOpaqueTrieIterator& rhs) const {
- return (Trie == rhs.Trie &&
- Forks == rhs.Forks &&
- EmptyValue == rhs.EmptyValue &&
- AtEmptyValue == rhs.AtEmptyValue &&
- MaxKeyLength == rhs.MaxKeyLength);
- }
-
- bool TOpaqueTrieIterator::HasMaxKeyLength() const {
- return MaxKeyLength != size_t(-1) && MeasureNarrowKey() == MaxKeyLength;
- }
-
- bool TOpaqueTrieIterator::Forward() {
- if (AtEmptyValue) {
- AtEmptyValue = false;
- bool res = Forward(); // TODO delete this after format change
- if (res && MeasureNarrowKey() != 0) {
- return res; // there was not "\0" key
- }
- // otherwise we are skipping "\0" key
- }
-
- if (!Trie.Length)
- return false;
-
- if (Forks.Empty()) {
- TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
- Forks.Push(fork);
- } else {
- TFork* topFork = &Forks.Top();
- while (!topFork->NextDirection()) {
- if (topFork->Node.GetOffset() >= Trie.Length)
- return false;
- Forks.Pop();
- if (Forks.Empty())
- return false;
- topFork = &Forks.Top();
- }
- }
-
- Y_ASSERT(!Forks.Empty());
- while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
- TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
- Forks.Push(nextFork);
- }
- TFork& top = Forks.Top();
- static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
- if (HasMaxKeyLength() && top.CurrentDirection == D_FINAL && top.HasDirection(D_NEXT)) {
- top.NextDirection();
- }
- return true;
- }
-
- bool TOpaqueTrieIterator::Backward() {
- if (AtEmptyValue)
+ TOpaqueTrieIterator::TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
+ size_t maxKeyLength)
+ : Trie(trie)
+ , EmptyValue(emptyValue)
+ , AtEmptyValue(emptyValue && !atend)
+ , MaxKeyLength(maxKeyLength)
+ {
+ if (!AtEmptyValue && !atend)
+ Forward();
+ }
+
+ bool TOpaqueTrieIterator::operator==(const TOpaqueTrieIterator& rhs) const {
+ return (Trie == rhs.Trie &&
+ Forks == rhs.Forks &&
+ EmptyValue == rhs.EmptyValue &&
+ AtEmptyValue == rhs.AtEmptyValue &&
+ MaxKeyLength == rhs.MaxKeyLength);
+ }
+
+ bool TOpaqueTrieIterator::HasMaxKeyLength() const {
+ return MaxKeyLength != size_t(-1) && MeasureNarrowKey() == MaxKeyLength;
+ }
+
+ bool TOpaqueTrieIterator::Forward() {
+ if (AtEmptyValue) {
+ AtEmptyValue = false;
+ bool res = Forward(); // TODO delete this after format change
+ if (res && MeasureNarrowKey() != 0) {
+ return res; // there was not "\0" key
+ }
+ // otherwise we are skipping "\0" key
+ }
+
+ if (!Trie.Length)
return false;
- if (!Trie.Length) {
- if (EmptyValue) {
- // A trie that has only the empty value;
- // we are not at the empty value, so move to it.
+ if (Forks.Empty()) {
+ TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
+ Forks.Push(fork);
+ } else {
+ TFork* topFork = &Forks.Top();
+ while (!topFork->NextDirection()) {
+ if (topFork->Node.GetOffset() >= Trie.Length)
+ return false;
+ Forks.Pop();
+ if (Forks.Empty())
+ return false;
+ topFork = &Forks.Top();
+ }
+ }
+
+ Y_ASSERT(!Forks.Empty());
+ while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
+ TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
+ Forks.Push(nextFork);
+ }
+ TFork& top = Forks.Top();
+ static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
+ if (HasMaxKeyLength() && top.CurrentDirection == D_FINAL && top.HasDirection(D_NEXT)) {
+ top.NextDirection();
+ }
+ return true;
+ }
+
+ bool TOpaqueTrieIterator::Backward() {
+ if (AtEmptyValue)
+ return false;
+
+ if (!Trie.Length) {
+ if (EmptyValue) {
+ // A trie that has only the empty value;
+ // we are not at the empty value, so move to it.
AtEmptyValue = true;
return true;
- } else {
- // Empty trie.
- return false;
+ } else {
+ // Empty trie.
+ return false;
+ }
+ }
+
+ if (Forks.Empty()) {
+ TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
+ fork.LastDirection();
+ Forks.Push(fork);
+ } else {
+ TFork* topFork = &Forks.Top();
+ while (!topFork->PrevDirection()) {
+ if (topFork->Node.GetOffset() >= Trie.Length)
+ return false;
+ Forks.Pop();
+ if (!Forks.Empty()) {
+ topFork = &Forks.Top();
+ } else {
+ // When there are no more forks,
+ // we have to iterate over the empty value.
+ if (!EmptyValue)
+ return false;
+ AtEmptyValue = true;
+ return true;
+ }
}
}
- if (Forks.Empty()) {
- TFork fork(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
- fork.LastDirection();
- Forks.Push(fork);
- } else {
- TFork* topFork = &Forks.Top();
- while (!topFork->PrevDirection()) {
- if (topFork->Node.GetOffset() >= Trie.Length)
- return false;
- Forks.Pop();
- if (!Forks.Empty()) {
- topFork = &Forks.Top();
- } else {
- // When there are no more forks,
- // we have to iterate over the empty value.
- if (!EmptyValue)
- return false;
- AtEmptyValue = true;
- return true;
- }
- }
- }
-
- Y_ASSERT(!Forks.Empty());
- while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
- TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
- nextFork.LastDirection();
- Forks.Push(nextFork);
- }
- TFork& top = Forks.Top();
- static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
- if (HasMaxKeyLength() && top.CurrentDirection == D_NEXT && top.HasDirection(D_FINAL)) {
- top.PrevDirection();
- }
- if (MeasureNarrowKey() == 0) {
- // This is the '\0' key, skip it and get to the EmptyValue.
- AtEmptyValue = true;
- Forks.Clear();
- }
- return true;
- }
-
- const char* TOpaqueTrieIterator::GetValuePtr() const {
- if (!Forks.Empty()) {
- const TFork& lastFork = Forks.Top();
- Y_ASSERT(lastFork.Node.IsFinal() && lastFork.CurrentDirection == D_FINAL);
- return Trie.Data + lastFork.GetValueOffset();
- }
- Y_ASSERT(AtEmptyValue);
- return EmptyValue;
- }
-
- //-------------------------------------------------------------------------
-
- TString TForkStack::GetKey() const {
- if (HasEmptyKey()) {
- return TString();
- }
-
- TString result(Key);
- if (TopHasLabelInKey()) {
- result.append(Top().GetLabel());
- }
- return result;
- }
-
- bool TForkStack::HasEmptyKey() const {
- // Special case: if we get a single zero label, treat it as an empty key
- // TODO delete this after format change
- if (TopHasLabelInKey()) {
- return Key.size() == 0 && Top().GetLabel() == '\0';
- } else {
- return Key.size() == 1 && Key[0] == '\0';
- }
- }
-
- size_t TForkStack::MeasureKey() const {
- size_t result = Key.size() + (TopHasLabelInKey() ? 1 : 0);
- if (result == 1 && HasEmptyKey()) {
- return 0;
- }
- return result;
- }
-
- //-------------------------------------------------------------------------
-
- TFork::TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper)
- : Node(data, offset, skipper)
- , Data(data)
- , Limit(limit)
- , CurrentDirection(TDirection(0))
- {
+ Y_ASSERT(!Forks.Empty());
+ while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) {
+ TFork nextFork = Forks.Top().NextFork(Trie.SkipFunction);
+ nextFork.LastDirection();
+ Forks.Push(nextFork);
+ }
+ TFork& top = Forks.Top();
+ static_assert(D_FINAL < D_NEXT, "relative order of NEXT and FINAL directions has changed");
+ if (HasMaxKeyLength() && top.CurrentDirection == D_NEXT && top.HasDirection(D_FINAL)) {
+ top.PrevDirection();
+ }
+ if (MeasureNarrowKey() == 0) {
+ // This is the '\0' key, skip it and get to the EmptyValue.
+ AtEmptyValue = true;
+ Forks.Clear();
+ }
+ return true;
+ }
+
+ const char* TOpaqueTrieIterator::GetValuePtr() const {
+ if (!Forks.Empty()) {
+ const TFork& lastFork = Forks.Top();
+ Y_ASSERT(lastFork.Node.IsFinal() && lastFork.CurrentDirection == D_FINAL);
+ return Trie.Data + lastFork.GetValueOffset();
+ }
+ Y_ASSERT(AtEmptyValue);
+ return EmptyValue;
+ }
+
+ //-------------------------------------------------------------------------
+
+ TString TForkStack::GetKey() const {
+ if (HasEmptyKey()) {
+ return TString();
+ }
+
+ TString result(Key);
+ if (TopHasLabelInKey()) {
+ result.append(Top().GetLabel());
+ }
+ return result;
+ }
+
+ bool TForkStack::HasEmptyKey() const {
+ // Special case: if we get a single zero label, treat it as an empty key
+ // TODO delete this after format change
+ if (TopHasLabelInKey()) {
+ return Key.size() == 0 && Top().GetLabel() == '\0';
+ } else {
+ return Key.size() == 1 && Key[0] == '\0';
+ }
+ }
+
+ size_t TForkStack::MeasureKey() const {
+ size_t result = Key.size() + (TopHasLabelInKey() ? 1 : 0);
+ if (result == 1 && HasEmptyKey()) {
+ return 0;
+ }
+ return result;
+ }
+
+ //-------------------------------------------------------------------------
+
+ TFork::TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper)
+ : Node(data, offset, skipper)
+ , Data(data)
+ , Limit(limit)
+ , CurrentDirection(TDirection(0))
+ {
#if COMPTRIE_DATA_CHECK
- if (Node.GetOffset() >= Limit - 1)
- ythrow yexception() << "gone beyond the limit, data is corrupted";
+ if (Node.GetOffset() >= Limit - 1)
+ ythrow yexception() << "gone beyond the limit, data is corrupted";
#endif
- while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection)) {
- ++CurrentDirection;
- }
- }
-
- bool TFork::operator==(const TFork& rhs) const {
- return (Data == rhs.Data &&
- Node.GetOffset() == rhs.Node.GetOffset() &&
- CurrentDirection == rhs.CurrentDirection);
- }
-
- inline bool TFork::NextDirection() {
- do {
- ++CurrentDirection;
- } while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection));
- return CurrentDirection < D_MAX;
- }
-
- inline bool TFork::PrevDirection() {
- if (CurrentDirection == TDirection(0)) {
- return false;
- }
- do {
- --CurrentDirection;
- } while (CurrentDirection > 0 && !HasDirection(CurrentDirection));
- return HasDirection(CurrentDirection);
- }
-
- void TFork::LastDirection() {
- CurrentDirection = D_MAX;
- PrevDirection();
- }
-
- bool TFork::SetDirection(TDirection direction) {
- if (!HasDirection(direction)) {
- return false;
- }
- CurrentDirection = direction;
- return true;
- }
-
- char TFork::GetLabel() const {
- return Node.GetLabel();
- }
-
- size_t TFork::GetValueOffset() const {
- return Node.GetLeafOffset();
+ while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection)) {
+ ++CurrentDirection;
+ }
+ }
+
+ bool TFork::operator==(const TFork& rhs) const {
+ return (Data == rhs.Data &&
+ Node.GetOffset() == rhs.Node.GetOffset() &&
+ CurrentDirection == rhs.CurrentDirection);
+ }
+
+ inline bool TFork::NextDirection() {
+ do {
+ ++CurrentDirection;
+ } while (CurrentDirection < D_MAX && !HasDirection(CurrentDirection));
+ return CurrentDirection < D_MAX;
+ }
+
+ inline bool TFork::PrevDirection() {
+ if (CurrentDirection == TDirection(0)) {
+ return false;
+ }
+ do {
+ --CurrentDirection;
+ } while (CurrentDirection > 0 && !HasDirection(CurrentDirection));
+ return HasDirection(CurrentDirection);
+ }
+
+ void TFork::LastDirection() {
+ CurrentDirection = D_MAX;
+ PrevDirection();
+ }
+
+ bool TFork::SetDirection(TDirection direction) {
+ if (!HasDirection(direction)) {
+ return false;
+ }
+ CurrentDirection = direction;
+ return true;
+ }
+
+ char TFork::GetLabel() const {
+ return Node.GetLabel();
+ }
+
+ size_t TFork::GetValueOffset() const {
+ return Node.GetLeafOffset();
}
}
diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.h b/library/cpp/containers/comptrie/opaque_trie_iterator.h
index 8e615d7e70..195da3c191 100644
--- a/library/cpp/containers/comptrie/opaque_trie_iterator.h
+++ b/library/cpp/containers/comptrie/opaque_trie_iterator.h
@@ -9,258 +9,258 @@
#include <util/generic/yexception.h>
namespace NCompactTrie {
- class ILeafSkipper;
-
- class TFork { // Auxiliary class for a branching point in the iterator
- public:
- TNode Node;
- const char* Data;
- size_t Limit; // valid data is in range [Data + Node.GetOffset(), Data + Limit)
- TDirection CurrentDirection;
-
- public:
- TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper);
-
- bool operator==(const TFork& rhs) const;
-
- bool HasLabelInKey() const {
- return CurrentDirection == D_NEXT || CurrentDirection == D_FINAL;
- }
-
- bool NextDirection();
- bool PrevDirection();
- void LastDirection();
-
- bool HasDirection(TDirection direction) const {
- return Node.GetOffsetByDirection(direction);
- }
- // If the fork doesn't have the specified direction,
- // leaves the direction intact and returns false.
- // Otherwise returns true.
- bool SetDirection(TDirection direction);
- TFork NextFork(const ILeafSkipper& skipper) const;
-
- char GetLabel() const;
- size_t GetValueOffset() const;
- };
-
- inline TFork TFork::NextFork(const ILeafSkipper& skipper) const {
- Y_ASSERT(CurrentDirection != D_FINAL);
- size_t offset = Node.GetOffsetByDirection(CurrentDirection);
- return TFork(Data, offset, Limit, skipper);
+ class ILeafSkipper;
+
+ class TFork { // Auxiliary class for a branching point in the iterator
+ public:
+ TNode Node;
+ const char* Data;
+ size_t Limit; // valid data is in range [Data + Node.GetOffset(), Data + Limit)
+ TDirection CurrentDirection;
+
+ public:
+ TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper);
+
+ bool operator==(const TFork& rhs) const;
+
+ bool HasLabelInKey() const {
+ return CurrentDirection == D_NEXT || CurrentDirection == D_FINAL;
+ }
+
+ bool NextDirection();
+ bool PrevDirection();
+ void LastDirection();
+
+ bool HasDirection(TDirection direction) const {
+ return Node.GetOffsetByDirection(direction);
+ }
+ // If the fork doesn't have the specified direction,
+ // leaves the direction intact and returns false.
+ // Otherwise returns true.
+ bool SetDirection(TDirection direction);
+ TFork NextFork(const ILeafSkipper& skipper) const;
+
+ char GetLabel() const;
+ size_t GetValueOffset() const;
+ };
+
+ inline TFork TFork::NextFork(const ILeafSkipper& skipper) const {
+ Y_ASSERT(CurrentDirection != D_FINAL);
+ size_t offset = Node.GetOffsetByDirection(CurrentDirection);
+ return TFork(Data, offset, Limit, skipper);
}
- //------------------------------------------------------------------------------------------------
- class TForkStack {
- public:
- void Push(const TFork& fork) {
- if (TopHasLabelInKey()) {
- Key.push_back(Top().GetLabel());
- }
- Forks.push_back(fork);
- }
-
- void Pop() {
- Forks.pop_back();
- if (TopHasLabelInKey()) {
- Key.pop_back();
- }
- }
-
- TFork& Top() {
- return Forks.back();
+ //------------------------------------------------------------------------------------------------
+ class TForkStack {
+ public:
+ void Push(const TFork& fork) {
+ if (TopHasLabelInKey()) {
+ Key.push_back(Top().GetLabel());
+ }
+ Forks.push_back(fork);
+ }
+
+ void Pop() {
+ Forks.pop_back();
+ if (TopHasLabelInKey()) {
+ Key.pop_back();
+ }
+ }
+
+ TFork& Top() {
+ return Forks.back();
+ }
+ const TFork& Top() const {
+ return Forks.back();
+ }
+
+ bool Empty() const {
+ return Forks.empty();
+ }
+
+ void Clear() {
+ Forks.clear();
+ Key.clear();
+ }
+
+ bool operator==(const TForkStack& other) const {
+ return Forks == other.Forks;
+ }
+ bool operator!=(const TForkStack& other) const {
+ return !(*this == other);
+ }
+
+ TString GetKey() const;
+ size_t MeasureKey() const;
+
+ private:
+ TVector<TFork> Forks;
+ TString Key;
+
+ private:
+ bool TopHasLabelInKey() const {
+ return !Empty() && Top().HasLabelInKey();
+ }
+ bool HasEmptyKey() const;
+ };
+
+ //------------------------------------------------------------------------------------------------
+
+ template <class TSymbol>
+ struct TConvertRawKey {
+ typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
+ static TKey Get(const TString& rawkey) {
+ TKey result;
+ const size_t sz = rawkey.size();
+ result.reserve(sz / sizeof(TSymbol));
+ for (size_t i = 0; i < sz; i += sizeof(TSymbol)) {
+ TSymbol sym = 0;
+ for (size_t j = 0; j < sizeof(TSymbol); j++) {
+ if (sizeof(TSymbol) <= 1)
+ sym = 0;
+ else
+ sym <<= 8;
+ if (i + j < sz)
+ sym |= TSymbol(0x00FF & rawkey[i + j]);
+ }
+ result.push_back(sym);
+ }
+ return result;
+ }
+
+ static size_t Size(size_t rawsize) {
+ return rawsize / sizeof(TSymbol);
+ }
+ };
+
+ template <>
+ struct TConvertRawKey<char> {
+ static TString Get(const TString& rawkey) {
+ return rawkey;
}
- const TFork& Top() const {
- return Forks.back();
- }
- bool Empty() const {
- return Forks.empty();
- }
+ static size_t Size(size_t rawsize) {
+ return rawsize;
+ }
+ };
+
+ //------------------------------------------------------------------------------------------------
+ class TOpaqueTrieIterator { // Iterator stuff. Stores a stack of visited forks.
+ public:
+ TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
+ size_t maxKeyLength = size_t(-1));
+
+ bool operator==(const TOpaqueTrieIterator& rhs) const;
+ bool operator!=(const TOpaqueTrieIterator& rhs) const {
+ return !(*this == rhs);
+ }
+
+ bool Forward();
+ bool Backward();
+
+ template <class TSymbol>
+ bool UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // True if matched exactly.
- void Clear() {
- Forks.clear();
- Key.clear();
+ template <class TSymbol>
+ typename TCompactTrieKeySelector<TSymbol>::TKey GetKey() const {
+ return TConvertRawKey<TSymbol>::Get(GetNarrowKey());
}
- bool operator==(const TForkStack& other) const {
- return Forks == other.Forks;
- }
- bool operator!=(const TForkStack& other) const {
- return !(*this == other);
- }
-
- TString GetKey() const;
- size_t MeasureKey() const;
-
- private:
- TVector<TFork> Forks;
- TString Key;
-
- private:
- bool TopHasLabelInKey() const {
- return !Empty() && Top().HasLabelInKey();
- }
- bool HasEmptyKey() const;
- };
-
- //------------------------------------------------------------------------------------------------
-
- template <class TSymbol>
- struct TConvertRawKey {
- typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
- static TKey Get(const TString& rawkey) {
- TKey result;
- const size_t sz = rawkey.size();
- result.reserve(sz / sizeof(TSymbol));
- for (size_t i = 0; i < sz; i += sizeof(TSymbol)) {
- TSymbol sym = 0;
- for (size_t j = 0; j < sizeof(TSymbol); j++) {
- if (sizeof(TSymbol) <= 1)
- sym = 0;
- else
- sym <<= 8;
- if (i + j < sz)
- sym |= TSymbol(0x00FF & rawkey[i + j]);
- }
- result.push_back(sym);
- }
- return result;
- }
-
- static size_t Size(size_t rawsize) {
- return rawsize / sizeof(TSymbol);
- }
- };
-
- template <>
- struct TConvertRawKey<char> {
- static TString Get(const TString& rawkey) {
- return rawkey;
- }
-
- static size_t Size(size_t rawsize) {
- return rawsize;
+ template <class TSymbol>
+ size_t MeasureKey() const {
+ return TConvertRawKey<TSymbol>::Size(MeasureNarrowKey());
}
- };
-
- //------------------------------------------------------------------------------------------------
- class TOpaqueTrieIterator { // Iterator stuff. Stores a stack of visited forks.
- public:
- TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
- size_t maxKeyLength = size_t(-1));
-
- bool operator==(const TOpaqueTrieIterator& rhs) const;
- bool operator!=(const TOpaqueTrieIterator& rhs) const {
- return !(*this == rhs);
- }
-
- bool Forward();
- bool Backward();
-
- template <class TSymbol>
- bool UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // True if matched exactly.
-
- template <class TSymbol>
- typename TCompactTrieKeySelector<TSymbol>::TKey GetKey() const {
- return TConvertRawKey<TSymbol>::Get(GetNarrowKey());
- }
-
- template <class TSymbol>
- size_t MeasureKey() const {
- return TConvertRawKey<TSymbol>::Size(MeasureNarrowKey());
- }
-
- TString GetNarrowKey() const {
- return Forks.GetKey();
- }
- size_t MeasureNarrowKey() const {
- return Forks.MeasureKey();
- }
-
- const char* GetValuePtr() const; // 0 if none
- const TNode& GetNode() const { // Could be called for non-empty key and not AtEnd.
- return Forks.Top().Node;
- }
- const TOpaqueTrie& GetTrie() const {
- return Trie;
- }
-
- private:
- TOpaqueTrie Trie;
- TForkStack Forks;
- const char* const EmptyValue;
- bool AtEmptyValue;
- const size_t MaxKeyLength;
-
- private:
- bool HasMaxKeyLength() const;
-
- template <class TSymbol>
- int LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // Used in UpperBound.
- };
+
+ TString GetNarrowKey() const {
+ return Forks.GetKey();
+ }
+ size_t MeasureNarrowKey() const {
+ return Forks.MeasureKey();
+ }
+
+ const char* GetValuePtr() const; // 0 if none
+ const TNode& GetNode() const { // Could be called for non-empty key and not AtEnd.
+ return Forks.Top().Node;
+ }
+ const TOpaqueTrie& GetTrie() const {
+ return Trie;
+ }
+
+ private:
+ TOpaqueTrie Trie;
+ TForkStack Forks;
+ const char* const EmptyValue;
+ bool AtEmptyValue;
+ const size_t MaxKeyLength;
+
+ private:
+ bool HasMaxKeyLength() const;
+
+ template <class TSymbol>
+ int LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key); // Used in UpperBound.
+ };
template <class TSymbol>
- int TOpaqueTrieIterator::LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
- Forks.Clear();
- TFork next(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
- for (size_t i = 0; i < key.size(); i++) {
- TSymbol symbol = key[i];
- const bool isLastSymbol = (i + 1 == key.size());
- for (i64 shift = (i64)NCompactTrie::ExtraBits<TSymbol>(); shift >= 0; shift -= 8) {
- const unsigned char label = (unsigned char)(symbol >> shift);
- const bool isLastByte = (isLastSymbol && shift == 0);
- do {
- Forks.Push(next);
- TFork& top = Forks.Top();
- if (label < (unsigned char)top.GetLabel()) {
- if (!top.SetDirection(D_LEFT))
- return 1;
- } else if (label > (unsigned char)top.GetLabel()) {
- if (!top.SetDirection(D_RIGHT)) {
- Forks.Pop(); // We don't pass this fork on the way to the upper bound.
- return -1;
- }
- } else if (isLastByte) { // Here and below label == top.GetLabel().
- if (top.SetDirection(D_FINAL)) {
- return 0; // Skip the NextFork() call at the end of the cycle.
- } else {
- top.SetDirection(D_NEXT);
- return 1;
- }
- } else if (!top.SetDirection(D_NEXT)) {
- top.SetDirection(D_FINAL);
+ int TOpaqueTrieIterator::LongestPrefix(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
+ Forks.Clear();
+ TFork next(Trie.Data, 0, Trie.Length, Trie.SkipFunction);
+ for (size_t i = 0; i < key.size(); i++) {
+ TSymbol symbol = key[i];
+ const bool isLastSymbol = (i + 1 == key.size());
+ for (i64 shift = (i64)NCompactTrie::ExtraBits<TSymbol>(); shift >= 0; shift -= 8) {
+ const unsigned char label = (unsigned char)(symbol >> shift);
+ const bool isLastByte = (isLastSymbol && shift == 0);
+ do {
+ Forks.Push(next);
+ TFork& top = Forks.Top();
+ if (label < (unsigned char)top.GetLabel()) {
+ if (!top.SetDirection(D_LEFT))
+ return 1;
+ } else if (label > (unsigned char)top.GetLabel()) {
+ if (!top.SetDirection(D_RIGHT)) {
+ Forks.Pop(); // We don't pass this fork on the way to the upper bound.
+ return -1;
+ }
+ } else if (isLastByte) { // Here and below label == top.GetLabel().
+ if (top.SetDirection(D_FINAL)) {
+ return 0; // Skip the NextFork() call at the end of the cycle.
+ } else {
+ top.SetDirection(D_NEXT);
+ return 1;
+ }
+ } else if (!top.SetDirection(D_NEXT)) {
+ top.SetDirection(D_FINAL);
return -1;
}
- next = top.NextFork(Trie.SkipFunction);
- } while (Forks.Top().CurrentDirection != D_NEXT); // Proceed to the next byte.
- }
+ next = top.NextFork(Trie.SkipFunction);
+ } while (Forks.Top().CurrentDirection != D_NEXT); // Proceed to the next byte.
+ }
}
- // We get here only if the key was empty.
- Forks.Push(next);
- return 1;
+ // We get here only if the key was empty.
+ Forks.Push(next);
+ return 1;
}
- template <class TSymbol>
- bool TOpaqueTrieIterator::UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
- Forks.Clear();
- if (key.empty() && EmptyValue) {
- AtEmptyValue = true;
- return true;
- } else {
- AtEmptyValue = false;
+ template <class TSymbol>
+ bool TOpaqueTrieIterator::UpperBound(const typename TCompactTrieKeySelector<TSymbol>::TKeyBuf& key) {
+ Forks.Clear();
+ if (key.empty() && EmptyValue) {
+ AtEmptyValue = true;
+ return true;
+ } else {
+ AtEmptyValue = false;
+ }
+ const int defect = LongestPrefix<TSymbol>(key);
+ if (defect > 0) {
+ // Continue the constructed forks with the smallest key possible.
+ while (Forks.Top().CurrentDirection != D_FINAL) {
+ TFork next = Forks.Top().NextFork(Trie.SkipFunction);
+ Forks.Push(next);
+ }
+ } else if (defect < 0) {
+ Forward();
}
- const int defect = LongestPrefix<TSymbol>(key);
- if (defect > 0) {
- // Continue the constructed forks with the smallest key possible.
- while (Forks.Top().CurrentDirection != D_FINAL) {
- TFork next = Forks.Top().NextFork(Trie.SkipFunction);
- Forks.Push(next);
- }
- } else if (defect < 0) {
- Forward();
- }
- return defect == 0;
+ return defect == 0;
}
-
+
}
diff --git a/library/cpp/containers/comptrie/prefix_iterator.h b/library/cpp/containers/comptrie/prefix_iterator.h
index eb2b2d6878..b369bb4f42 100644
--- a/library/cpp/containers/comptrie/prefix_iterator.h
+++ b/library/cpp/containers/comptrie/prefix_iterator.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "comptrie_trie.h"
// Iterates over all prefixes of the given key in the trie.
@@ -39,11 +39,11 @@ public:
result = Next();
}
- operator bool() const {
+ operator bool() const {
return result;
}
- TPrefixIterator& operator++() {
+ TPrefixIterator& operator++() {
result = Next();
return *this;
}
diff --git a/library/cpp/containers/comptrie/protopacker.h b/library/cpp/containers/comptrie/protopacker.h
index 5bedde270d..3e15866dc5 100644
--- a/library/cpp/containers/comptrie/protopacker.h
+++ b/library/cpp/containers/comptrie/protopacker.h
@@ -3,7 +3,7 @@
#include <util/stream/mem.h>
#include <util/ysaveload.h>
-template <class Proto>
+template <class Proto>
class TProtoPacker {
public:
TProtoPacker() = default;
diff --git a/library/cpp/containers/comptrie/search_iterator.h b/library/cpp/containers/comptrie/search_iterator.h
index ad6bcfdf60..247f7e5936 100644
--- a/library/cpp/containers/comptrie/search_iterator.h
+++ b/library/cpp/containers/comptrie/search_iterator.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "comptrie_trie.h"
#include "first_symbol_iterator.h"
@@ -74,7 +74,7 @@ private:
const char* ValuePos = nullptr;
};
-template <class TTrie>
+template <class TTrie>
inline TSearchIterator<TTrie> MakeSearchIterator(const TTrie& trie) {
return TSearchIterator<TTrie>(trie);
}
diff --git a/library/cpp/containers/comptrie/set.h b/library/cpp/containers/comptrie/set.h
index ffea92485c..acd43338f0 100644
--- a/library/cpp/containers/comptrie/set.h
+++ b/library/cpp/containers/comptrie/set.h
@@ -1,16 +1,16 @@
-#pragma once
-
+#pragma once
+
#include "comptrie_trie.h"
template <typename T = char>
-class TCompactTrieSet: public TCompactTrie<T, ui8, TNullPacker<ui8>> {
+class TCompactTrieSet: public TCompactTrie<T, ui8, TNullPacker<ui8>> {
public:
- typedef TCompactTrie<T, ui8, TNullPacker<ui8>> TBase;
+ typedef TCompactTrie<T, ui8, TNullPacker<ui8>> TBase;
- using typename TBase::TBuilder;
+ using typename TBase::TBuilder;
using typename TBase::TKey;
using typename TBase::TKeyBuf;
- using typename TBase::TSymbol;
+ using typename TBase::TSymbol;
TCompactTrieSet() = default;
@@ -20,8 +20,8 @@ public:
}
template <typename D>
- explicit TCompactTrieSet(const TCompactTrie<T, D, TNullPacker<D>>& trie)
- : TBase(trie.Data()) // should be binary compatible for any D
+ explicit TCompactTrieSet(const TCompactTrie<T, D, TNullPacker<D>>& trie)
+ : TBase(trie.Data()) // should be binary compatible for any D
{
}
diff --git a/library/cpp/containers/comptrie/write_trie_backwards.cpp b/library/cpp/containers/comptrie/write_trie_backwards.cpp
index f7459b1c49..fd8c28b0ed 100644
--- a/library/cpp/containers/comptrie/write_trie_backwards.cpp
+++ b/library/cpp/containers/comptrie/write_trie_backwards.cpp
@@ -7,104 +7,104 @@
#include <util/generic/vector.h>
namespace NCompactTrie {
- size_t WriteTrieBackwards(IOutputStream& os, TReverseNodeEnumerator& enumerator, bool verbose) {
- if (verbose) {
- Cerr << "Writing down the trie..." << Endl;
- }
-
- // Rewrite everything from the back, removing unused pieces.
- const size_t chunksize = 0x10000;
- TVector<char*> resultData;
-
- resultData.push_back(new char[chunksize]);
- char* chunkend = resultData.back() + chunksize;
-
- size_t resultLength = 0;
- size_t chunkLength = 0;
-
- size_t counter = 0;
- TBuffer bufferHolder;
- while (enumerator.Move()) {
- if (verbose)
- ShowProgress(++counter);
-
- size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be
- bufferHolder.Clear();
- bufferHolder.Resize(bufferLength);
- char* buffer = bufferHolder.Data();
-
- size_t nodelength = enumerator.RecreateNode(buffer, resultLength);
- Y_ASSERT(nodelength <= bufferLength);
-
- resultLength += nodelength;
-
- if (chunkLength + nodelength <= chunksize) {
- chunkLength += nodelength;
- memcpy(chunkend - chunkLength, buffer, nodelength);
- } else { // allocate a new chunk
- memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength);
- chunkLength = chunkLength + nodelength - chunksize;
-
- resultData.push_back(new char[chunksize]);
- chunkend = resultData.back() + chunksize;
-
- while (chunkLength > chunksize) { // allocate a new chunks
- chunkLength -= chunksize;
- memcpy(chunkend - chunksize, buffer + chunkLength, chunksize);
-
- resultData.push_back(new char[chunksize]);
- chunkend = resultData.back() + chunksize;
- }
-
- memcpy(chunkend - chunkLength, buffer, chunkLength);
+ size_t WriteTrieBackwards(IOutputStream& os, TReverseNodeEnumerator& enumerator, bool verbose) {
+ if (verbose) {
+ Cerr << "Writing down the trie..." << Endl;
+ }
+
+ // Rewrite everything from the back, removing unused pieces.
+ const size_t chunksize = 0x10000;
+ TVector<char*> resultData;
+
+ resultData.push_back(new char[chunksize]);
+ char* chunkend = resultData.back() + chunksize;
+
+ size_t resultLength = 0;
+ size_t chunkLength = 0;
+
+ size_t counter = 0;
+ TBuffer bufferHolder;
+ while (enumerator.Move()) {
+ if (verbose)
+ ShowProgress(++counter);
+
+ size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be
+ bufferHolder.Clear();
+ bufferHolder.Resize(bufferLength);
+ char* buffer = bufferHolder.Data();
+
+ size_t nodelength = enumerator.RecreateNode(buffer, resultLength);
+ Y_ASSERT(nodelength <= bufferLength);
+
+ resultLength += nodelength;
+
+ if (chunkLength + nodelength <= chunksize) {
+ chunkLength += nodelength;
+ memcpy(chunkend - chunkLength, buffer, nodelength);
+ } else { // allocate a new chunk
+ memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength);
+ chunkLength = chunkLength + nodelength - chunksize;
+
+ resultData.push_back(new char[chunksize]);
+ chunkend = resultData.back() + chunksize;
+
+ while (chunkLength > chunksize) { // allocate a new chunks
+ chunkLength -= chunksize;
+ memcpy(chunkend - chunksize, buffer + chunkLength, chunksize);
+
+ resultData.push_back(new char[chunksize]);
+ chunkend = resultData.back() + chunksize;
+ }
+
+ memcpy(chunkend - chunkLength, buffer, chunkLength);
}
- }
-
- if (verbose)
- Cerr << counter << Endl;
-
- // Write the whole thing down
- while (!resultData.empty()) {
- char* chunk = resultData.back();
- os.Write(chunk + chunksize - chunkLength, chunkLength);
- chunkLength = chunksize;
- delete[] chunk;
- resultData.pop_back();
}
-
- return resultLength;
+
+ if (verbose)
+ Cerr << counter << Endl;
+
+ // Write the whole thing down
+ while (!resultData.empty()) {
+ char* chunk = resultData.back();
+ os.Write(chunk + chunksize - chunkLength, chunkLength);
+ chunkLength = chunksize;
+ delete[] chunk;
+ resultData.pop_back();
+ }
+
+ return resultLength;
}
- size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) {
- char* data = const_cast<char*>(trie.Data);
- char* end = data + trie.Length;
- char* pos = end;
-
- TVector<char> buf(64);
- while (enumerator.Move()) {
- size_t nodeLength = enumerator.RecreateNode(nullptr, end - pos);
- if (nodeLength > buf.size())
- buf.resize(nodeLength);
-
- size_t realLength = enumerator.RecreateNode(buf.data(), end - pos);
- Y_ASSERT(realLength == nodeLength);
-
- pos -= nodeLength;
- memcpy(pos, buf.data(), nodeLength);
- }
-
- switch (mode) {
- case MM_NOALLOC:
- os.Write(pos, end - pos);
- break;
- case MM_INPLACE:
- memmove(data, pos, end - pos);
- break;
- default:
- Y_VERIFY(false);
- }
-
- return end - pos;
+ size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) {
+ char* data = const_cast<char*>(trie.Data);
+ char* end = data + trie.Length;
+ char* pos = end;
+
+ TVector<char> buf(64);
+ while (enumerator.Move()) {
+ size_t nodeLength = enumerator.RecreateNode(nullptr, end - pos);
+ if (nodeLength > buf.size())
+ buf.resize(nodeLength);
+
+ size_t realLength = enumerator.RecreateNode(buf.data(), end - pos);
+ Y_ASSERT(realLength == nodeLength);
+
+ pos -= nodeLength;
+ memcpy(pos, buf.data(), nodeLength);
+ }
+
+ switch (mode) {
+ case MM_NOALLOC:
+ os.Write(pos, end - pos);
+ break;
+ case MM_INPLACE:
+ memmove(data, pos, end - pos);
+ break;
+ default:
+ Y_VERIFY(false);
+ }
+
+ return end - pos;
}
}
diff --git a/library/cpp/containers/comptrie/writeable_node.cpp b/library/cpp/containers/comptrie/writeable_node.cpp
index d0e986f99a..404003dbbd 100644
--- a/library/cpp/containers/comptrie/writeable_node.cpp
+++ b/library/cpp/containers/comptrie/writeable_node.cpp
@@ -3,94 +3,94 @@
#include "comptrie_impl.h"
namespace NCompactTrie {
- TWriteableNode::TWriteableNode()
- : LeafPos(nullptr)
- , LeafLength(0)
- , ForwardOffset(NPOS)
- , LeftOffset(NPOS)
- , RightOffset(NPOS)
- , Label(0)
- {
- }
+ TWriteableNode::TWriteableNode()
+ : LeafPos(nullptr)
+ , LeafLength(0)
+ , ForwardOffset(NPOS)
+ , LeftOffset(NPOS)
+ , RightOffset(NPOS)
+ , Label(0)
+ {
+ }
+
+ static size_t GetOffsetFromEnd(const TNode& node, size_t absOffset) {
+ return absOffset ? absOffset - node.GetOffset() - node.GetCoreLength() : NPOS;
+ }
- static size_t GetOffsetFromEnd(const TNode& node, size_t absOffset) {
- return absOffset ? absOffset - node.GetOffset() - node.GetCoreLength() : NPOS;
- }
-
- TWriteableNode::TWriteableNode(const TNode& node, const char* data)
- : LeafPos(node.IsFinal() ? data + node.GetLeafOffset() : nullptr)
- , LeafLength(node.GetLeafLength())
- , ForwardOffset(GetOffsetFromEnd(node, node.GetForwardOffset()))
- , LeftOffset(GetOffsetFromEnd(node, node.GetLeftOffset()))
- , RightOffset(GetOffsetFromEnd(node, node.GetRightOffset()))
- , Label(node.GetLabel())
- {
- }
+ TWriteableNode::TWriteableNode(const TNode& node, const char* data)
+ : LeafPos(node.IsFinal() ? data + node.GetLeafOffset() : nullptr)
+ , LeafLength(node.GetLeafLength())
+ , ForwardOffset(GetOffsetFromEnd(node, node.GetForwardOffset()))
+ , LeftOffset(GetOffsetFromEnd(node, node.GetLeftOffset()))
+ , RightOffset(GetOffsetFromEnd(node, node.GetRightOffset()))
+ , Label(node.GetLabel())
+ {
+ }
- size_t TWriteableNode::Measure() const {
- size_t len = 2 + LeafLength;
- size_t fwdLen = 0;
- size_t lastLen = 0;
- size_t lastFwdLen = 0;
- // Now, increase all the offsets by the length and recalculate everything, until it converges
- do {
- lastLen = len;
- lastFwdLen = fwdLen;
+ size_t TWriteableNode::Measure() const {
+ size_t len = 2 + LeafLength;
+ size_t fwdLen = 0;
+ size_t lastLen = 0;
+ size_t lastFwdLen = 0;
+ // Now, increase all the offsets by the length and recalculate everything, until it converges
+ do {
+ lastLen = len;
+ lastFwdLen = fwdLen;
- len = 2 + LeafLength;
- len += MeasureOffset(LeftOffset != NPOS ? LeftOffset + lastLen : 0);
- len += MeasureOffset(RightOffset != NPOS ? RightOffset + lastLen : 0);
-
- // Relative forward offset of 0 means we don't need extra length for an epsilon link.
- // But an epsilon link means we need an extra 1 for the flags and the forward offset is measured
- // from the start of the epsilon link, not from the start of our node.
- if (ForwardOffset != NPOS && ForwardOffset != 0) {
- fwdLen = MeasureOffset(ForwardOffset + lastFwdLen) + 1;
- len += fwdLen;
- }
+ len = 2 + LeafLength;
+ len += MeasureOffset(LeftOffset != NPOS ? LeftOffset + lastLen : 0);
+ len += MeasureOffset(RightOffset != NPOS ? RightOffset + lastLen : 0);
- } while (lastLen != len || lastFwdLen != fwdLen);
+ // Relative forward offset of 0 means we don't need extra length for an epsilon link.
+ // But an epsilon link means we need an extra 1 for the flags and the forward offset is measured
+ // from the start of the epsilon link, not from the start of our node.
+ if (ForwardOffset != NPOS && ForwardOffset != 0) {
+ fwdLen = MeasureOffset(ForwardOffset + lastFwdLen) + 1;
+ len += fwdLen;
+ }
+
+ } while (lastLen != len || lastFwdLen != fwdLen);
+
+ return len;
+ }
- return len;
- }
+ size_t TWriteableNode::Pack(char* buffer) const {
+ const size_t length = Measure();
- size_t TWriteableNode::Pack(char* buffer) const {
- const size_t length = Measure();
+ char flags = 0;
+ if (LeafPos) {
+ flags |= MT_FINAL;
+ }
+ if (ForwardOffset != NPOS) {
+ flags |= MT_NEXT;
+ }
- char flags = 0;
- if (LeafPos) {
- flags |= MT_FINAL;
- }
- if (ForwardOffset != NPOS) {
- flags |= MT_NEXT;
- }
+ const size_t leftOffset = LeftOffset != NPOS ? LeftOffset + length : 0;
+ const size_t rightOffset = RightOffset != NPOS ? RightOffset + length : 0;
+ const size_t leftOffsetSize = MeasureOffset(leftOffset);
+ const size_t rightOffsetSize = MeasureOffset(rightOffset);
+ flags |= (leftOffsetSize << MT_LEFTSHIFT);
+ flags |= (rightOffsetSize << MT_RIGHTSHIFT);
- const size_t leftOffset = LeftOffset != NPOS ? LeftOffset + length : 0;
- const size_t rightOffset = RightOffset != NPOS ? RightOffset + length : 0;
- const size_t leftOffsetSize = MeasureOffset(leftOffset);
- const size_t rightOffsetSize = MeasureOffset(rightOffset);
- flags |= (leftOffsetSize << MT_LEFTSHIFT);
- flags |= (rightOffsetSize << MT_RIGHTSHIFT);
+ buffer[0] = flags;
+ buffer[1] = Label;
+ size_t usedLen = 2;
+ usedLen += PackOffset(buffer + usedLen, leftOffset);
+ usedLen += PackOffset(buffer + usedLen, rightOffset);
- buffer[0] = flags;
- buffer[1] = Label;
- size_t usedLen = 2;
- usedLen += PackOffset(buffer + usedLen, leftOffset);
- usedLen += PackOffset(buffer + usedLen, rightOffset);
+ if (LeafPos && LeafLength) {
+ memcpy(buffer + usedLen, LeafPos, LeafLength);
+ usedLen += LeafLength;
+ }
- if (LeafPos && LeafLength) {
- memcpy(buffer + usedLen, LeafPos, LeafLength);
- usedLen += LeafLength;
- }
-
- if (ForwardOffset != NPOS && ForwardOffset != 0) {
- const size_t fwdOffset = ForwardOffset + length - usedLen;
- size_t fwdOffsetSize = MeasureOffset(fwdOffset);
- buffer[usedLen++] = (char)(fwdOffsetSize & MT_SIZEMASK);
- usedLen += PackOffset(buffer + usedLen, fwdOffset);
- }
- Y_ASSERT(usedLen == length);
- return usedLen;
+ if (ForwardOffset != NPOS && ForwardOffset != 0) {
+ const size_t fwdOffset = ForwardOffset + length - usedLen;
+ size_t fwdOffsetSize = MeasureOffset(fwdOffset);
+ buffer[usedLen++] = (char)(fwdOffsetSize & MT_SIZEMASK);
+ usedLen += PackOffset(buffer + usedLen, fwdOffset);
+ }
+ Y_ASSERT(usedLen == length);
+ return usedLen;
}
}
diff --git a/library/cpp/containers/comptrie/writeable_node.h b/library/cpp/containers/comptrie/writeable_node.h
index 585ef0a5e4..5454e579ef 100644
--- a/library/cpp/containers/comptrie/writeable_node.h
+++ b/library/cpp/containers/comptrie/writeable_node.h
@@ -3,24 +3,24 @@
#include <cstddef>
namespace NCompactTrie {
- class TNode;
+ class TNode;
- class TWriteableNode {
- public:
- const char* LeafPos;
- size_t LeafLength;
+ class TWriteableNode {
+ public:
+ const char* LeafPos;
+ size_t LeafLength;
- size_t ForwardOffset;
- size_t LeftOffset;
- size_t RightOffset;
- char Label;
+ size_t ForwardOffset;
+ size_t LeftOffset;
+ size_t RightOffset;
+ char Label;
- TWriteableNode();
- TWriteableNode(const TNode& node, const char* data);
+ TWriteableNode();
+ TWriteableNode(const TNode& node, const char* data);
- // When you call this, the offsets should be relative to the end of the node. Use NPOS to indicate an absent offset.
- size_t Pack(char* buffer) const;
- size_t Measure() const;
- };
+ // When you call this, the offsets should be relative to the end of the node. Use NPOS to indicate an absent offset.
+ size_t Pack(char* buffer) const;
+ size_t Measure() const;
+ };
}
diff --git a/library/cpp/containers/comptrie/ya.make b/library/cpp/containers/comptrie/ya.make
index 43d6534634..81352da4b2 100644
--- a/library/cpp/containers/comptrie/ya.make
+++ b/library/cpp/containers/comptrie/ya.make
@@ -1,4 +1,4 @@
-LIBRARY()
+LIBRARY()
OWNER(velavokr)
@@ -12,9 +12,9 @@ SRCS(
key_selector.h
leaf_skipper.h
set.h
- comptrie.cpp
+ comptrie.cpp
comptrie_builder.cpp
- comptrie_impl.cpp
+ comptrie_impl.cpp
make_fast_layout.cpp
minimize.cpp
node.cpp