aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/containers
diff options
context:
space:
mode:
authorgrig <grig@yandex-team.ru>2022-02-10 16:50:24 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:24 +0300
commitbeb63ece3a6872dfbe113104f524ab6fdbec0adc (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/containers
parentda383a4f674027527827ad076134241fc5da0cbf (diff)
downloadydb-beb63ece3a6872dfbe113104f524ab6fdbec0adc.tar.gz
Restoring authorship annotation for <grig@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/containers')
-rw-r--r--library/cpp/containers/comptrie/comptrie.h2
-rw-r--r--library/cpp/containers/comptrie/comptrie_builder.h20
-rw-r--r--library/cpp/containers/comptrie/comptrie_builder.inl8
-rw-r--r--library/cpp/containers/comptrie/comptrie_impl.cpp12
-rw-r--r--library/cpp/containers/comptrie/comptrie_impl.h4
-rw-r--r--library/cpp/containers/comptrie/comptrie_trie.h42
-rw-r--r--library/cpp/containers/comptrie/comptrie_ut.cpp190
-rw-r--r--library/cpp/containers/comptrie/leaf_skipper.h4
-rw-r--r--library/cpp/containers/comptrie/make_fast_layout.cpp24
-rw-r--r--library/cpp/containers/comptrie/make_fast_layout.h4
-rw-r--r--library/cpp/containers/comptrie/minimize.cpp86
-rw-r--r--library/cpp/containers/comptrie/minimize.h4
-rw-r--r--library/cpp/containers/comptrie/node.cpp16
-rw-r--r--library/cpp/containers/comptrie/node.h10
-rw-r--r--library/cpp/containers/comptrie/opaque_trie_iterator.cpp2
-rw-r--r--library/cpp/containers/comptrie/opaque_trie_iterator.h6
-rw-r--r--library/cpp/containers/comptrie/write_trie_backwards.cpp24
-rw-r--r--library/cpp/containers/comptrie/writeable_node.cpp22
18 files changed, 240 insertions, 240 deletions
diff --git a/library/cpp/containers/comptrie/comptrie.h b/library/cpp/containers/comptrie/comptrie.h
index a37dcee421..f77024327e 100644
--- a/library/cpp/containers/comptrie/comptrie.h
+++ b/library/cpp/containers/comptrie/comptrie.h
@@ -1,4 +1,4 @@
#pragma once
-
+
#include "comptrie_trie.h"
#include "comptrie_builder.h"
diff --git a/library/cpp/containers/comptrie/comptrie_builder.h b/library/cpp/containers/comptrie/comptrie_builder.h
index 156f93db39..cf7d2e39a3 100644
--- a/library/cpp/containers/comptrie/comptrie_builder.h
+++ b/library/cpp/containers/comptrie/comptrie_builder.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "comptrie_packer.h"
#include "minimize.h"
#include "key_selector.h"
@@ -12,7 +12,7 @@
// is created incrementally. It actually helps a lot to have the input data prefix-grouped
// by key; otherwise, memory consumption becomes a tough issue.
// NOTE: building and serializing the automaton may be lengthy, and takes lots of memory.
-
+
// PREFIX_GROUPED means that if we, while constructing a trie, add to the builder two keys with the same prefix,
// then all the keys that we add between these two also have the same prefix.
// Actually in this mode the builder can accept even more freely ordered input,
@@ -45,16 +45,16 @@ public:
typedef S TPacker;
typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf;
-
+
explicit TCompactTrieBuilder(TCompactTrieBuilderFlags flags = CTBF_NONE, TPacker packer = TPacker(), IAllocator* alloc = TDefaultAllocator::Instance());
-
+
// All Add.. methods return true if it was a new key, false if the key already existed.
bool Add(const TSymbol* key, size_t keylen, const TData& value);
bool Add(const TKeyBuf& key, const TData& value) {
return Add(key.data(), key.size(), value);
}
-
+
// add already serialized data
bool AddPtr(const TSymbol* key, size_t keylen, const char* data);
bool AddPtr(const TKeyBuf& key, const char* data) {
@@ -80,14 +80,14 @@ public:
bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr) const {
return FindLongestPrefix(key.data(), key.size(), prefixLen, value);
}
-
+
size_t Save(IOutputStream& os) const;
size_t SaveAndDestroy(IOutputStream& os);
size_t SaveToFile(const TString& fileName) const {
TFixedBufferFileOutput out(fileName);
return Save(out);
}
-
+
void Clear(); // Returns all memory to the system and resets the builder state.
size_t GetEntryCount() const;
@@ -101,8 +101,8 @@ public:
protected:
class TCompactTrieBuilderImpl;
THolder<TCompactTrieBuilderImpl> Impl;
-};
-
+};
+
//----------------------------------------------------------------------------------------------------------------------
// Minimize the trie. The result is equivalent to the original
// trie, except that it takes less space (and has marginally lower
@@ -119,7 +119,7 @@ protected:
template <class TPacker>
size_t CompactTrieMinimize(IOutputStream& os, const char* data, size_t datalength, bool verbose = false, const TPacker& packer = TPacker(), NCompactTrie::EMinimizeMode mode = NCompactTrie::MM_DEFAULT);
-
+
template <class TTrieBuilder>
size_t CompactTrieMinimize(IOutputStream& os, const TTrieBuilder& builder, bool verbose = false);
diff --git a/library/cpp/containers/comptrie/comptrie_builder.inl b/library/cpp/containers/comptrie/comptrie_builder.inl
index a0c1097862..f273fa6571 100644
--- a/library/cpp/containers/comptrie/comptrie_builder.inl
+++ b/library/cpp/containers/comptrie/comptrie_builder.inl
@@ -1,5 +1,5 @@
#pragma once
-
+
#include "comptrie_impl.h"
#include "comptrie_trie.h"
#include "make_fast_layout.h"
@@ -618,7 +618,7 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode*
} else {
ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen);
}
-
+
char* ckey = ckeybuf.Data();
TNode* next;
@@ -907,9 +907,9 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcMeasure(
size_t leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize) : 0;
size_t rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize) : 0;
leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize + leftoffsetsize + rightoffsetsize) : 0;
- rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0;
+ rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0;
leftoffsetsize = leftsize ? MeasureOffset(coresize + treesize + leftoffsetsize + rightoffsetsize) : 0;
- rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0;
+ rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0;
coresize += leftoffsetsize + rightoffsetsize;
thiz->LeftOffset = leftsize ? coresize + treesize : 0;
diff --git a/library/cpp/containers/comptrie/comptrie_impl.cpp b/library/cpp/containers/comptrie/comptrie_impl.cpp
index a8c1f76bfb..a116ab6d1e 100644
--- a/library/cpp/containers/comptrie/comptrie_impl.cpp
+++ b/library/cpp/containers/comptrie/comptrie_impl.cpp
@@ -8,26 +8,26 @@
namespace NCompactTrie {
size_t MeasureOffset(size_t offset) {
int n = 0;
-
+
while (offset) {
offset >>= 8;
++n;
}
return n;
- }
+ }
size_t PackOffset(char* buffer, size_t offset) {
size_t len = MeasureOffset(offset);
size_t i = len;
-
+
while (i--) {
buffer[i] = (char)(offset & 0xFF);
offset >>= 8;
}
return len;
- }
+ }
void ShowProgress(size_t n) {
if (n % 1000000 == 0)
@@ -35,5 +35,5 @@ namespace NCompactTrie {
else if (n % 20000 == 0)
Cerr << ".";
}
-
-}
+
+}
diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h
index 9a3e7f7d90..f41c38311a 100644
--- a/library/cpp/containers/comptrie/comptrie_impl.h
+++ b/library/cpp/containers/comptrie/comptrie_impl.h
@@ -1,7 +1,7 @@
#pragma once
-
+
#include <util/stream/output.h>
-
+
#ifndef COMPTRIE_DATA_CHECK
#define COMPTRIE_DATA_CHECK 1
#endif
diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h
index 6665dfdf4d..40ec1e52b3 100644
--- a/library/cpp/containers/comptrie/comptrie_trie.h
+++ b/library/cpp/containers/comptrie/comptrie_trie.h
@@ -1,11 +1,11 @@
#pragma once
-
+
#include "comptrie_impl.h"
#include "comptrie_packer.h"
#include "opaque_trie_iterator.h"
#include "leaf_skipper.h"
#include "key_selector.h"
-
+
#include <util/generic/buffer.h>
#include <util/generic/ptr.h>
#include <util/generic/vector.h>
@@ -30,12 +30,12 @@ class TPrefixIterator;
// in case of <char> specialization cannot distinguish between "" and "\0" keys
template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
-class TCompactTrie {
-public:
- typedef T TSymbol;
+class TCompactTrie {
+public:
+ typedef T TSymbol;
typedef D TData;
typedef S TPacker;
-
+
typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey;
typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf;
@@ -50,8 +50,8 @@ protected:
const char* EmptyValue = nullptr;
TPacker Packer;
NCompactTrie::TPackerLeafSkipper<TPacker> Skipper = &Packer; // This should be true for every constructor.
-
-public:
+
+public:
TCompactTrie() = default;
TCompactTrie(const char* d, size_t len, TPacker packer);
@@ -76,7 +76,7 @@ public:
void Init(const char* d, size_t len, TPacker packer = TPacker());
void Init(const TBlob& data, TPacker packer = TPacker());
-
+
bool IsInitialized() const;
bool IsEmpty() const;
@@ -142,14 +142,14 @@ public:
inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const;
class TConstIterator {
- private:
+ private:
typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator;
typedef NCompactTrie::TOpaqueTrie TOpaqueTrie;
- friend class TCompactTrie;
+ friend class TCompactTrie;
TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend, TPacker packer); // only usable from Begin() and End() methods
TConstIterator(const TOpaqueTrie& trie, const char* emptyValue, const TKeyBuf& key, TPacker packer); // only usable from UpperBound() method
- public:
+ public:
TConstIterator() = default;
bool IsEmpty() const {
return !Impl;
@@ -162,7 +162,7 @@ public:
TConstIterator& operator--();
TConstIterator operator--(int /*unused*/);
TValueType operator*();
-
+
TKey GetKey() const;
size_t GetKeySize() const;
TData GetValue() const;
@@ -172,13 +172,13 @@ public:
private:
TPacker Packer;
TCopyPtr<TOpaqueTrieIterator> Impl;
- };
-
+ };
+
TConstIterator Begin() const;
TConstIterator begin() const;
TConstIterator End() const;
TConstIterator end() const;
-
+
// Returns an iterator pointing to the smallest key in the trie >= the argument.
// TODO: misleading name. Should be called LowerBound for consistency with stl.
// No. It is the STL that has a misleading name.
@@ -203,22 +203,22 @@ protected:
return LookupLongestPrefix(key, keylen, prefixLen, valuepos, hasNext);
}
void LookupPhrases(const char* datapos, size_t len, const TSymbol* key, size_t keylen, TVector<TPhraseMatch>& matches, TSymbol separator) const;
-};
-
+};
+
template <class T = char, class D = ui64, class S = TCompactTriePacker<D>>
class TCompactTrieHolder: public TCompactTrie<T, D, S>, NNonCopyable::TNonCopyable {
private:
typedef TCompactTrie<T, D, S> TBase;
TArrayHolder<char> Storage;
-
+
public:
TCompactTrieHolder(IInputStream& is, size_t len);
};
-
+
//------------------------//
// Implementation section //
//------------------------//
-
+
// TCompactTrie
template <class T, class D, class S>
diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp
index 0afb79113f..74bee09b5d 100644
--- a/library/cpp/containers/comptrie/comptrie_ut.cpp
+++ b/library/cpp/containers/comptrie/comptrie_ut.cpp
@@ -116,25 +116,25 @@ private:
UNIT_TEST_SUITE_END();
static const char* SampleData[];
-
- template <class T>
+
+ template <class T>
void CreateTrie(IOutputStream& out, bool minimize, bool useFastLayout);
-
- template <class T>
+
+ template <class T>
void CheckData(const char* src, size_t len);
- template <class T>
+ template <class T>
void CheckUpperBound(const char* src, size_t len);
template <class T>
- void CheckIterator(const char* src, size_t len);
-
- template <class T>
+ void CheckIterator(const char* src, size_t len);
+
+ template <class T>
void TestTrie(bool minimize, bool useFastLayout);
-
- template <class T>
- void TestTrieIterator(bool minimize);
-
+
+ template <class T>
+ void TestTrieIterator(bool minimize);
+
template <class T, bool minimize>
void TestRandom(const size_t n, const size_t maxKeySize);
@@ -167,34 +167,34 @@ private:
class TDummyPacker;
class TStrokaPacker;
-public:
+public:
void TestPackers();
- void TestTrie8();
- void TestTrie16();
- void TestTrie32();
+ void TestTrie8();
+ void TestTrie16();
+ void TestTrie32();
void TestFastTrie8();
void TestFastTrie16();
void TestFastTrie32();
- void TestMinimizedTrie8();
- void TestMinimizedTrie16();
- void TestMinimizedTrie32();
+ void TestMinimizedTrie8();
+ void TestMinimizedTrie16();
+ void TestMinimizedTrie32();
void TestFastMinimizedTrie8();
void TestFastMinimizedTrie16();
void TestFastMinimizedTrie32();
- void TestTrieIterator8();
- void TestTrieIterator16();
- void TestTrieIterator32();
+ void TestTrieIterator8();
+ void TestTrieIterator16();
+ void TestTrieIterator32();
- void TestMinimizedTrieIterator8();
- void TestMinimizedTrieIterator16();
- void TestMinimizedTrieIterator32();
+ void TestMinimizedTrieIterator8();
+ void TestMinimizedTrieIterator16();
+ void TestMinimizedTrieIterator32();
- void TestPhraseSearch();
+ void TestPhraseSearch();
void TestAddGet();
void TestEmpty();
void TestUninitializedNonEmpty();
@@ -279,13 +279,13 @@ const char* TCompactTrieTest::SampleData[] = {
template <class T>
typename TCompactTrie<T>::TKey MakeWideKey(const char* str, size_t len) {
typename TCompactTrie<T>::TKey buffer;
- for (size_t i = 0; i < len; i++) {
- unsigned int ch = (str[i] & 0xFF);
+ for (size_t i = 0; i < len; i++) {
+ unsigned int ch = (str[i] & 0xFF);
buffer.push_back((T)(ch | ch << 8 | ch << 16 | ch << 24));
- }
- return buffer;
-}
-
+ }
+ return buffer;
+}
+
template <class T>
typename TCompactTrie<T>::TKey MakeWideKey(const TString& str) {
return MakeWideKey<T>(str.c_str(), str.length());
@@ -308,13 +308,13 @@ void TCompactTrieTest::CreateTrie(IOutputStream& out, bool minimize, bool useFas
TBufferOutput tmp2;
IOutputStream& currentOutput = useFastLayout ? tmp2 : out;
- if (minimize) {
- TBufferOutput buftmp;
- builder.Save(buftmp);
+ if (minimize) {
+ TBufferOutput buftmp;
+ builder.Save(buftmp);
CompactTrieMinimize<TCompactTriePacker<ui64>>(currentOutput, buftmp.Buffer().Data(), buftmp.Buffer().Size(), false);
- } else {
+ } else {
builder.Save(currentOutput);
- }
+ }
if (useFastLayout) {
CompactTrieMakeFastLayout<TCompactTriePacker<T>>(out, tmp2.Buffer().Data(), tmp2.Buffer().Size(), false);
}
@@ -358,23 +358,23 @@ void TCompactTrieTest::CheckUpperBound(const char* data, size_t datalen) {
}
template <class T>
-void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
- TCompactTrie<T> trie(data, datalen);
+void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
+ TCompactTrie<T> trie(data, datalen);
UNIT_ASSERT_VALUES_EQUAL(Y_ARRAY_SIZE(SampleData), trie.Size());
for (auto& i : SampleData) {
size_t len = strlen(i);
- ui64 value = 0;
+ ui64 value = 0;
size_t prefixLen = 0;
typename TCompactTrie<T>::TKey key = MakeWideKey<T>(i, len);
UNIT_ASSERT(trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(len * 2, value);
+ UNIT_ASSERT_EQUAL(len * 2, value);
UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
UNIT_ASSERT_EQUAL(len, prefixLen);
UNIT_ASSERT_EQUAL(len * 2, value);
-
+
TString badkey("bb");
badkey += i;
key = MakeWideKey<T>(badkey);
@@ -420,35 +420,35 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
}
template <class T>
-void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
+void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
typedef typename TCompactTrie<T>::TKey TKey;
typedef typename TCompactTrie<T>::TValueType TValue;
TMap<TKey, ui64> stored;
-
+
for (auto& i : SampleData) {
size_t len = strlen(i);
-
+
stored[MakeWideKey<T>(i, len)] = len * 2;
- }
-
- TCompactTrie<T> trie(data, datalen);
+ }
+
+ TCompactTrie<T> trie(data, datalen);
TVector<TValue> items;
- typename TCompactTrie<T>::TConstIterator it = trie.Begin();
- size_t entry_count = 0;
+ typename TCompactTrie<T>::TConstIterator it = trie.Begin();
+ size_t entry_count = 0;
TMap<TKey, ui64> received;
- while (it != trie.End()) {
+ while (it != trie.End()) {
UNIT_ASSERT_VALUES_EQUAL(it.GetKeySize(), it.GetKey().size());
- received.insert(*it);
+ received.insert(*it);
items.push_back(*it);
- entry_count++;
+ entry_count++;
it++;
- }
+ }
TMap<TKey, ui64> received2;
for (std::pair<TKey, ui64> x : trie) {
received2.insert(x);
}
- UNIT_ASSERT(entry_count == stored.size());
- UNIT_ASSERT(received == stored);
+ UNIT_ASSERT(entry_count == stored.size());
+ UNIT_ASSERT(received == stored);
UNIT_ASSERT(received2 == stored);
std::reverse(items.begin(), items.end());
@@ -483,21 +483,21 @@ void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
UNIT_ASSERT(revIt == emptyIt);
UNIT_ASSERT(revIt.IsEmpty());
UNIT_ASSERT(revIt != trie.End());
-}
-
+}
+
template <class T>
void TCompactTrieTest::TestTrie(bool minimize, bool useFastLayout) {
TBufferOutput bufout;
CreateTrie<T>(bufout, minimize, useFastLayout);
- CheckData<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
+ CheckData<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
CheckUpperBound<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
}
template <class T>
-void TCompactTrieTest::TestTrieIterator(bool minimize) {
+void TCompactTrieTest::TestTrieIterator(bool minimize) {
TBufferOutput bufout;
CreateTrie<T>(bufout, minimize, false);
- CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
+ CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
}
void TCompactTrieTest::TestTrie8() {
@@ -519,7 +519,7 @@ void TCompactTrieTest::TestFastTrie16() {
void TCompactTrieTest::TestFastTrie32() {
TestTrie<wchar32>(false, true);
}
-
+
void TCompactTrieTest::TestMinimizedTrie8() {
TestTrie<char>(true, false);
}
@@ -529,7 +529,7 @@ void TCompactTrieTest::TestMinimizedTrie16() {
void TCompactTrieTest::TestMinimizedTrie32() {
TestTrie<wchar32>(true, false);
}
-
+
void TCompactTrieTest::TestFastMinimizedTrie8() {
TestTrie<char>(true, true);
}
@@ -559,56 +559,56 @@ void TCompactTrieTest::TestMinimizedTrieIterator16() {
void TCompactTrieTest::TestMinimizedTrieIterator32() {
TestTrieIterator<wchar32>(true);
}
-
-void TCompactTrieTest::TestPhraseSearch() {
+
+void TCompactTrieTest::TestPhraseSearch() {
static const char* phrases[] = {"ab", "ab cd", "ab cd ef"};
- static const char* const goodphrase = "ab cd ef gh";
- static const char* const badphrase = "cd ef gh ab";
- TBufferOutput bufout;
-
+ static const char* const goodphrase = "ab cd ef gh";
+ static const char* const badphrase = "cd ef gh ab";
+ TBufferOutput bufout;
+
TCompactTrieBuilder<char> builder;
for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) {
- builder.Add(phrases[i], strlen(phrases[i]), i);
- }
- builder.Save(bufout);
-
- TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
+ builder.Add(phrases[i], strlen(phrases[i]), i);
+ }
+ builder.Save(bufout);
+
+ TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
TVector<TCompactTrie<char>::TPhraseMatch> matches;
- trie.FindPhrases(goodphrase, strlen(goodphrase), matches);
-
+ trie.FindPhrases(goodphrase, strlen(goodphrase), matches);
+
UNIT_ASSERT(matches.size() == Y_ARRAY_SIZE(phrases));
for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) {
- UNIT_ASSERT(matches[i].first == strlen(phrases[i]));
- UNIT_ASSERT(matches[i].second == i);
- }
-
- trie.FindPhrases(badphrase, strlen(badphrase), matches);
- UNIT_ASSERT(matches.size() == 0);
-}
-
+ UNIT_ASSERT(matches[i].first == strlen(phrases[i]));
+ UNIT_ASSERT(matches[i].second == i);
+ }
+
+ trie.FindPhrases(badphrase, strlen(badphrase), matches);
+ UNIT_ASSERT(matches.size() == 0);
+}
+
void TCompactTrieTest::TestAddGet() {
- TCompactTrieBuilder<char> builder;
- builder.Add("abcd", 4, 1);
- builder.Add("acde", 4, 2);
+ TCompactTrieBuilder<char> builder;
+ builder.Add("abcd", 4, 1);
+ builder.Add("acde", 4, 2);
ui64 dummy;
- UNIT_ASSERT(builder.Find("abcd", 4, &dummy));
+ UNIT_ASSERT(builder.Find("abcd", 4, &dummy));
UNIT_ASSERT(1 == dummy);
- UNIT_ASSERT(builder.Find("acde", 4, &dummy));
+ UNIT_ASSERT(builder.Find("acde", 4, &dummy));
UNIT_ASSERT(2 == dummy);
- UNIT_ASSERT(!builder.Find("fgdgfacde", 9, &dummy));
- UNIT_ASSERT(!builder.Find("ab", 2, &dummy));
+ UNIT_ASSERT(!builder.Find("fgdgfacde", 9, &dummy));
+ UNIT_ASSERT(!builder.Find("ab", 2, &dummy));
}
void TCompactTrieTest::TestEmpty() {
- TCompactTrieBuilder<char> builder;
+ TCompactTrieBuilder<char> builder;
ui64 dummy = 12345;
size_t prefixLen;
- UNIT_ASSERT(!builder.Find("abc", 3, &dummy));
+ UNIT_ASSERT(!builder.Find("abc", 3, &dummy));
TBufferOutput bufout;
builder.Save(bufout);
- TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- UNIT_ASSERT(!trie.Find("abc", 3, &dummy));
+ TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
+ UNIT_ASSERT(!trie.Find("abc", 3, &dummy));
UNIT_ASSERT(!trie.Find("", 0, &dummy));
UNIT_ASSERT(!trie.FindLongestPrefix("abc", 3, &prefixLen, &dummy));
UNIT_ASSERT(!trie.FindLongestPrefix("", 0, &prefixLen, &dummy));
diff --git a/library/cpp/containers/comptrie/leaf_skipper.h b/library/cpp/containers/comptrie/leaf_skipper.h
index 06e93e10dc..3959258948 100644
--- a/library/cpp/containers/comptrie/leaf_skipper.h
+++ b/library/cpp/containers/comptrie/leaf_skipper.h
@@ -1,7 +1,7 @@
#pragma once
-
+
#include <cstddef>
-
+
namespace NCompactTrie {
class ILeafSkipper {
public:
diff --git a/library/cpp/containers/comptrie/make_fast_layout.cpp b/library/cpp/containers/comptrie/make_fast_layout.cpp
index 5de128e0e9..ade78d7899 100644
--- a/library/cpp/containers/comptrie/make_fast_layout.cpp
+++ b/library/cpp/containers/comptrie/make_fast_layout.cpp
@@ -146,7 +146,7 @@ namespace NCompactTrie {
explicit TOffsetIndex(TTrieNodeCounts& counts) {
ParentCounts.Swap(counts);
}
-
+
void Add(size_t key, size_t value) {
Data[key] = value;
}
@@ -154,7 +154,7 @@ namespace NCompactTrie {
size_t Size() const {
return Data.size();
}
-
+
size_t Get(size_t key) {
auto pos = Data.find(key);
if (pos == Data.end()) {
@@ -167,14 +167,14 @@ namespace NCompactTrie {
}
return result;
}
-
+
private:
TTrieNodeCounts ParentCounts;
THashMap<size_t, size_t> Data;
};
//---------------------------------------------------------------------------------------
-
+
class TTrieMeasurer {
public:
TTrieMeasurer(const TOpaqueTrie& trie, bool verbose);
@@ -226,7 +226,7 @@ namespace NCompactTrie {
{
Y_ASSERT(Trie.Data);
}
-
+
void TTrieMeasurer::Measure() {
if (Verbose) {
Cerr << "Measuring the trie..." << Endl;
@@ -308,7 +308,7 @@ namespace NCompactTrie {
, Verbose(verbose)
{
}
-
+
bool Move() override {
if (!Fresh) {
CentralWord.pop_back();
@@ -318,11 +318,11 @@ namespace NCompactTrie {
}
return true;
}
-
+
const TNode& Get() const {
return CentralWord.back();
}
-
+
size_t GetLeafLength() const override {
return Get().GetLeafLength();
}
@@ -333,7 +333,7 @@ namespace NCompactTrie {
return NPOS;
return resultLength - BackIndex.Get(absoffset);
}
-
+
size_t RecreateNode(char* buffer, size_t resultLength) override {
TWriteableNode newNode(Get(), Trie.Data);
newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
@@ -346,7 +346,7 @@ namespace NCompactTrie {
MaxIndexSize = Max(MaxIndexSize, BackIndex.Size());
return len;
}
-
+
private:
bool Fresh;
TOpaqueTrie Trie;
@@ -382,7 +382,7 @@ namespace NCompactTrie {
}
}
}
-
+
void FillCentralWord() {
CentralWord.clear();
CentralWord.push_back(TNode(Trie.Data, Trace.back().Nodes.back(), Trie.SkipFunction));
@@ -462,6 +462,6 @@ namespace NCompactTrie {
mes.Measure();
TVanEmdeBoasReverseNodeEnumerator enumerator(mes, verbose);
return WriteTrieBackwards(os, enumerator, verbose);
- }
+ }
}
diff --git a/library/cpp/containers/comptrie/make_fast_layout.h b/library/cpp/containers/comptrie/make_fast_layout.h
index a55e9a5689..b8fab5d65b 100644
--- a/library/cpp/containers/comptrie/make_fast_layout.h
+++ b/library/cpp/containers/comptrie/make_fast_layout.h
@@ -1,8 +1,8 @@
#pragma once
-
+
#include "leaf_skipper.h"
#include <cstddef>
-
+
class IOutputStream;
namespace NCompactTrie {
diff --git a/library/cpp/containers/comptrie/minimize.cpp b/library/cpp/containers/comptrie/minimize.cpp
index cb25a366f6..80d0b25217 100644
--- a/library/cpp/containers/comptrie/minimize.cpp
+++ b/library/cpp/containers/comptrie/minimize.cpp
@@ -15,7 +15,7 @@ namespace NCompactTrie {
// nodes that have identical continuations (Daciuk's right language),
// and repack the trie. Repacking is done in-place, so memory is less
// of an issue; however, it may take considerable time.
-
+
// IMPORTANT: never try to reminimize an already minimized trie or a trie with fast layout.
// Because of non-local structure and epsilon links, it won't work
// as you expect it to, and can destroy the trie in the making.
@@ -23,7 +23,7 @@ namespace NCompactTrie {
namespace {
using TOffsetList = TVector<size_t>;
using TPieceIndex = THashMap<size_t, TOffsetList>;
-
+
using TSizePair = std::pair<size_t, size_t>;
using TSizePairVector = TVector<TSizePair>;
using TSizePairVectorVector = TVector<TSizePairVector>;
@@ -36,38 +36,38 @@ namespace NCompactTrie {
TOffsetMap() {
Data.reserve(0x10000);
}
-
+
void Add(size_t key, size_t value) {
size_t hikey = key & 0xFFFF;
-
+
if (Data.size() <= hikey)
Data.resize(hikey + 1);
TSizePairVector& sublist = Data[hikey];
-
+
for (auto& it : sublist) {
if (it.first == key) {
it.second = value;
-
+
return;
}
}
-
+
sublist.push_back(TSizePair(key, value));
}
bool Contains(size_t key) const {
return (Get(key) != 0);
- }
+ }
size_t Get(size_t key) const {
size_t hikey = key & 0xFFFF;
-
+
if (Data.size() <= hikey)
return 0;
-
+
const TSizePairVector& sublist = Data[hikey];
-
+
for (const auto& it : sublist) {
if (it.first == key)
return it.second;
@@ -76,7 +76,7 @@ namespace NCompactTrie {
return 0;
}
};
-
+
class TOffsetDeltas {
protected:
TSizePairVector Data;
@@ -88,10 +88,10 @@ namespace NCompactTrie {
return; // no offset
} else {
TSizePair last = Data.back();
-
+
if (key <= last.first) {
Cerr << "Trouble: elements to offset delta list added in wrong order" << Endl;
-
+
return;
}
@@ -100,19 +100,19 @@ namespace NCompactTrie {
}
Data.push_back(TSizePair(key, value));
- }
+ }
size_t Get(size_t key) const {
if (Data.empty())
return key; // difference is zero;
-
+
if (key < Data.front().first)
return key;
-
+
// Binary search for the highest entry in the list that does not exceed the key
size_t from = 0;
size_t to = Data.size() - 1;
-
+
while (from < to) {
size_t midpoint = (from + to + 1) / 2;
@@ -121,7 +121,7 @@ namespace NCompactTrie {
else
from = midpoint;
}
-
+
TSizePair entry = Data[from];
return key - entry.first + entry.second;
@@ -139,7 +139,7 @@ namespace NCompactTrie {
, Length(len)
{
}
-
+
bool operator()(size_t p1, const size_t p2) {
int res = memcmp(Data + p1, Data + p2, Length);
@@ -159,13 +159,13 @@ namespace NCompactTrie {
: Selector(0)
{
}
-
+
TBranchPoint(const char* data, size_t offset, const ILeafSkipper& skipFunction)
: Node(data, offset, skipFunction)
, Selector(0)
{
}
-
+
bool IsFinal() const {
return Node.IsFinal();
}
@@ -174,7 +174,7 @@ namespace NCompactTrie {
size_t NextNode(const TOffsetMap& mergedNodes) {
while (Selector < 3) {
size_t nextOffset = 0;
-
+
switch (++Selector) {
case 1:
if (Node.GetRightOffset())
@@ -199,7 +199,7 @@ namespace NCompactTrie {
return nextOffset;
}
return 0;
- }
+ }
};
class TMergingReverseNodeEnumerator: public TReverseNodeEnumerator {
@@ -209,7 +209,7 @@ namespace NCompactTrie {
const TOffsetMap& MergeMap;
TVector<TBranchPoint> Trace;
TOffsetDeltas OffsetIndex;
-
+
public:
TMergingReverseNodeEnumerator(const TOpaqueTrie& trie, const TOffsetMap& mergers)
: Fresh(true)
@@ -217,7 +217,7 @@ namespace NCompactTrie {
, MergeMap(mergers)
{
}
-
+
bool Move() override {
if (Fresh) {
Trace.push_back(TBranchPoint(Trie.Data, 0, Trie.SkipFunction));
@@ -225,7 +225,7 @@ namespace NCompactTrie {
} else {
if (Trace.empty())
return false;
-
+
Trace.pop_back();
if (Trace.empty())
@@ -233,7 +233,7 @@ namespace NCompactTrie {
}
size_t nextnode = Trace.back().NextNode(MergeMap);
-
+
while (nextnode) {
Trace.push_back(TBranchPoint(Trie.Data, nextnode, Trie.SkipFunction));
nextnode = Trace.back().NextNode(MergeMap);
@@ -241,30 +241,30 @@ namespace NCompactTrie {
return (!Trace.empty());
}
-
+
const TNode& Get() const {
return Trace.back().Node;
}
size_t GetLeafLength() const override {
return Get().GetLeafLength();
}
-
+
// Returns recalculated offset from the end of the current node
size_t PrepareOffset(size_t absoffset, size_t minilength) {
if (!absoffset)
return NPOS;
-
+
if (MergeMap.Contains(absoffset))
absoffset = MergeMap.Get(absoffset);
return minilength - OffsetIndex.Get(Trie.Length - absoffset);
}
-
+
size_t RecreateNode(char* buffer, size_t resultLength) override {
TWriteableNode newNode(Get(), Trie.Data);
newNode.ForwardOffset = PrepareOffset(Get().GetForwardOffset(), resultLength);
newNode.LeftOffset = PrepareOffset(Get().GetLeftOffset(), resultLength);
newNode.RightOffset = PrepareOffset(Get().GetRightOffset(), resultLength);
-
+
if (!buffer)
return newNode.Measure();
@@ -275,8 +275,8 @@ namespace NCompactTrie {
}
};
- }
-
+ }
+
static void AddPiece(TPieceIndex& index, size_t offset, size_t len) {
index[len].push_back(offset);
}
@@ -288,24 +288,24 @@ namespace NCompactTrie {
TOffsetMap merger;
// Start walking the trie from head.
AddPiece(subtries, 0, trie.Length);
-
+
size_t counter = 0;
// Now consider all nodes with sizeable continuations
for (size_t curlen = trie.Length; curlen >= minMergeSize && !subtries.empty(); curlen--) {
TPieceIndex::iterator iit = subtries.find(curlen);
-
+
if (iit == subtries.end())
continue; // fast forward to the next available length value
TOffsetList& batch = iit->second;
TPieceComparer comparer(trie.Data, curlen);
Sort(batch.begin(), batch.end(), comparer);
-
+
TOffsetList::iterator it = batch.begin();
while (it != batch.end()) {
if (verbose)
ShowProgress(++counter);
-
+
size_t offset = *it;
// Fill the array with the subnodes of the element
@@ -322,11 +322,11 @@ namespace NCompactTrie {
if (size_t forwardOffset = node.GetForwardOffset()) {
AddPiece(subtries, forwardOffset, end - forwardOffset);
}
-
+
while (++it != batch.end()) {
// Find next different; until then, just add the offsets to the list of merged nodes.
size_t nextoffset = *it;
-
+
if (memcmp(trie.Data + offset, trie.Data + nextoffset, curlen))
break;
@@ -335,12 +335,12 @@ namespace NCompactTrie {
}
subtries.erase(curlen);
- }
+ }
if (verbose) {
Cerr << counter << Endl;
}
return merger;
- }
+ }
size_t RawCompactTrieMinimizeImpl(IOutputStream& os, TOpaqueTrie& trie, bool verbose, size_t minMergeSize, EMinimizeMode mode) {
if (!trie.Data || !trie.Length) {
diff --git a/library/cpp/containers/comptrie/minimize.h b/library/cpp/containers/comptrie/minimize.h
index d9fff3cfa5..baaa431d04 100644
--- a/library/cpp/containers/comptrie/minimize.h
+++ b/library/cpp/containers/comptrie/minimize.h
@@ -1,8 +1,8 @@
#pragma once
-
+
#include "leaf_skipper.h"
#include <cstddef>
-
+
class IOutputStream;
namespace NCompactTrie {
diff --git a/library/cpp/containers/comptrie/node.cpp b/library/cpp/containers/comptrie/node.cpp
index 1df102fb84..5fd22f15ec 100644
--- a/library/cpp/containers/comptrie/node.cpp
+++ b/library/cpp/containers/comptrie/node.cpp
@@ -1,7 +1,7 @@
#include "node.h"
#include "leaf_skipper.h"
#include "comptrie_impl.h"
-
+
#include <util/system/yassert.h>
#include <util/generic/yexception.h>
@@ -16,7 +16,7 @@ namespace NCompactTrie {
offset = 0;
}
}
-
+
// We believe that epsilon links are found only on the forward-position and that afer jumping an epsilon link you come to an ordinary node.
TNode::TNode(const char* data, size_t offset, const ILeafSkipper& skipFunction)
@@ -35,7 +35,7 @@ namespace NCompactTrie {
char flags = *(datapos++);
Y_ASSERT(!IsEpsilonLink(flags));
Label = *(datapos++);
-
+
size_t leftsize = LeftOffsetLen(flags);
size_t& leftOffset = Offsets[D_LEFT];
leftOffset = UnpackOffset(datapos, leftsize);
@@ -43,7 +43,7 @@ namespace NCompactTrie {
leftOffset += Offset;
}
datapos += leftsize;
-
+
size_t rightsize = RightOffsetLen(flags);
size_t& rightOffset = Offsets[D_RIGHT];
rightOffset = UnpackOffset(datapos, rightsize);
@@ -72,8 +72,8 @@ namespace NCompactTrie {
ythrow yexception() << "Corrupted epsilon link";
}
forwardOffset += epsilonOffset;
- }
- }
- }
+ }
+ }
+ }
-}
+}
diff --git a/library/cpp/containers/comptrie/node.h b/library/cpp/containers/comptrie/node.h
index 3194a1340f..d6f4317db0 100644
--- a/library/cpp/containers/comptrie/node.h
+++ b/library/cpp/containers/comptrie/node.h
@@ -4,7 +4,7 @@
namespace NCompactTrie {
class ILeafSkipper;
-
+
enum TDirection {
D_LEFT,
D_FINAL,
@@ -42,11 +42,11 @@ namespace NCompactTrie {
size_t GetCoreLength() const {
return CoreLength;
}
-
+
size_t GetOffsetByDirection(TDirection direction) const {
return Offsets[direction];
}
-
+
size_t GetForwardOffset() const {
return Offsets[D_NEXT];
}
@@ -63,7 +63,7 @@ namespace NCompactTrie {
bool IsFinal() const {
return GetLeafOffset() != 0;
}
-
+
bool HasEpsilonLinkForward() const {
return GetForwardOffset() > Offset + CoreLength;
}
@@ -76,5 +76,5 @@ namespace NCompactTrie {
char Label;
};
-
+
}
diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
index 356c6a72ff..5fd3914be6 100644
--- a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
+++ b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp
@@ -1,7 +1,7 @@
#include "opaque_trie_iterator.h"
#include "comptrie_impl.h"
#include "node.h"
-
+
namespace NCompactTrie {
TOpaqueTrieIterator::TOpaqueTrieIterator(const TOpaqueTrie& trie, const char* emptyValue, bool atend,
size_t maxKeyLength)
diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.h b/library/cpp/containers/comptrie/opaque_trie_iterator.h
index 04d7134b07..195da3c191 100644
--- a/library/cpp/containers/comptrie/opaque_trie_iterator.h
+++ b/library/cpp/containers/comptrie/opaque_trie_iterator.h
@@ -1,10 +1,10 @@
#pragma once
-
+
#include "comptrie_impl.h"
#include "node.h"
#include "key_selector.h"
#include "leaf_skipper.h"
-
+
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
@@ -17,7 +17,7 @@ namespace NCompactTrie {
const char* Data;
size_t Limit; // valid data is in range [Data + Node.GetOffset(), Data + Limit)
TDirection CurrentDirection;
-
+
public:
TFork(const char* data, size_t offset, size_t limit, const ILeafSkipper& skipper);
diff --git a/library/cpp/containers/comptrie/write_trie_backwards.cpp b/library/cpp/containers/comptrie/write_trie_backwards.cpp
index 3109e342a7..fd8c28b0ed 100644
--- a/library/cpp/containers/comptrie/write_trie_backwards.cpp
+++ b/library/cpp/containers/comptrie/write_trie_backwards.cpp
@@ -11,23 +11,23 @@ namespace NCompactTrie {
if (verbose) {
Cerr << "Writing down the trie..." << Endl;
}
-
+
// Rewrite everything from the back, removing unused pieces.
const size_t chunksize = 0x10000;
TVector<char*> resultData;
-
+
resultData.push_back(new char[chunksize]);
char* chunkend = resultData.back() + chunksize;
-
+
size_t resultLength = 0;
size_t chunkLength = 0;
-
+
size_t counter = 0;
TBuffer bufferHolder;
while (enumerator.Move()) {
if (verbose)
ShowProgress(++counter);
-
+
size_t bufferLength = 64 + enumerator.GetLeafLength(); // never know how big leaf data can be
bufferHolder.Clear();
bufferHolder.Resize(bufferLength);
@@ -35,7 +35,7 @@ namespace NCompactTrie {
size_t nodelength = enumerator.RecreateNode(buffer, resultLength);
Y_ASSERT(nodelength <= bufferLength);
-
+
resultLength += nodelength;
if (chunkLength + nodelength <= chunksize) {
@@ -44,14 +44,14 @@ namespace NCompactTrie {
} else { // allocate a new chunk
memcpy(chunkend - chunksize, buffer + nodelength - (chunksize - chunkLength), chunksize - chunkLength);
chunkLength = chunkLength + nodelength - chunksize;
-
+
resultData.push_back(new char[chunksize]);
chunkend = resultData.back() + chunksize;
-
+
while (chunkLength > chunksize) { // allocate a new chunks
chunkLength -= chunksize;
memcpy(chunkend - chunksize, buffer + chunkLength, chunksize);
-
+
resultData.push_back(new char[chunksize]);
chunkend = resultData.back() + chunksize;
}
@@ -70,11 +70,11 @@ namespace NCompactTrie {
chunkLength = chunksize;
delete[] chunk;
resultData.pop_back();
- }
+ }
return resultLength;
- }
-
+ }
+
size_t WriteTrieBackwardsNoAlloc(IOutputStream& os, TReverseNodeEnumerator& enumerator, TOpaqueTrie& trie, EMinimizeMode mode) {
char* data = const_cast<char*>(trie.Data);
char* end = data + trie.Length;
diff --git a/library/cpp/containers/comptrie/writeable_node.cpp b/library/cpp/containers/comptrie/writeable_node.cpp
index b45f9f47ce..404003dbbd 100644
--- a/library/cpp/containers/comptrie/writeable_node.cpp
+++ b/library/cpp/containers/comptrie/writeable_node.cpp
@@ -12,7 +12,7 @@ namespace NCompactTrie {
, Label(0)
{
}
-
+
static size_t GetOffsetFromEnd(const TNode& node, size_t absOffset) {
return absOffset ? absOffset - node.GetOffset() - node.GetCoreLength() : NPOS;
}
@@ -36,7 +36,7 @@ namespace NCompactTrie {
do {
lastLen = len;
lastFwdLen = fwdLen;
-
+
len = 2 + LeafLength;
len += MeasureOffset(LeftOffset != NPOS ? LeftOffset + lastLen : 0);
len += MeasureOffset(RightOffset != NPOS ? RightOffset + lastLen : 0);
@@ -48,15 +48,15 @@ namespace NCompactTrie {
fwdLen = MeasureOffset(ForwardOffset + lastFwdLen) + 1;
len += fwdLen;
}
-
+
} while (lastLen != len || lastFwdLen != fwdLen);
-
+
return len;
}
-
+
size_t TWriteableNode::Pack(char* buffer) const {
const size_t length = Measure();
-
+
char flags = 0;
if (LeafPos) {
flags |= MT_FINAL;
@@ -64,14 +64,14 @@ namespace NCompactTrie {
if (ForwardOffset != NPOS) {
flags |= MT_NEXT;
}
-
+
const size_t leftOffset = LeftOffset != NPOS ? LeftOffset + length : 0;
const size_t rightOffset = RightOffset != NPOS ? RightOffset + length : 0;
const size_t leftOffsetSize = MeasureOffset(leftOffset);
const size_t rightOffsetSize = MeasureOffset(rightOffset);
flags |= (leftOffsetSize << MT_LEFTSHIFT);
flags |= (rightOffsetSize << MT_RIGHTSHIFT);
-
+
buffer[0] = flags;
buffer[1] = Label;
size_t usedLen = 2;
@@ -91,6 +91,6 @@ namespace NCompactTrie {
}
Y_ASSERT(usedLen == length);
return usedLen;
- }
-
-}
+ }
+
+}