diff options
author | nga <nga@yandex-team.ru> | 2022-02-10 16:48:09 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:09 +0300 |
commit | 1f553f46fb4f3c5eec631352cdd900a0709016af (patch) | |
tree | a231fba2c03b440becaea6c86a2702d0bfb0336e /library/cpp/containers/comptrie | |
parent | c4de7efdedc25b49cbea74bd589eecb61b55b60a (diff) | |
download | ydb-1f553f46fb4f3c5eec631352cdd900a0709016af.tar.gz |
Restoring authorship annotation for <nga@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/containers/comptrie')
-rw-r--r-- | library/cpp/containers/comptrie/array_with_size.h | 112 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_builder.h | 20 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_builder.inl | 636 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_impl.cpp | 2 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_impl.h | 58 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_trie.h | 74 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/comptrie_ut.cpp | 118 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/make_fast_layout.h | 2 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/minimize.h | 4 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/opaque_trie_iterator.cpp | 6 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/opaque_trie_iterator.h | 2 | ||||
-rw-r--r-- | library/cpp/containers/comptrie/ya.make | 10 |
12 files changed, 522 insertions, 522 deletions
diff --git a/library/cpp/containers/comptrie/array_with_size.h b/library/cpp/containers/comptrie/array_with_size.h index 36e61c7410..4a1c85ce68 100644 --- a/library/cpp/containers/comptrie/array_with_size.h +++ b/library/cpp/containers/comptrie/array_with_size.h @@ -1,67 +1,67 @@ #pragma once - -#include <util/generic/ptr.h> -#include <util/generic/noncopyable.h> -#include <util/generic/utility.h> + +#include <util/generic/ptr.h> +#include <util/generic/noncopyable.h> +#include <util/generic/utility.h> #include <util/system/sys_alloc.h> - -template <typename T> + +template <typename T> class TArrayWithSizeHolder : TNonCopyable { - typedef TArrayWithSizeHolder<T> TThis; - - T* Data; - -public: + typedef TArrayWithSizeHolder<T> TThis; + + T* Data; + +public: TArrayWithSizeHolder() : Data(nullptr) { } - - ~TArrayWithSizeHolder() { - if (!Data) - return; - for (size_t i = 0; i < Size(); ++i) { - try { - Data[i].~T(); - } catch (...) { - } - } + + ~TArrayWithSizeHolder() { + if (!Data) + return; + for (size_t i = 0; i < Size(); ++i) { + try { + Data[i].~T(); + } catch (...) { + } + } y_deallocate(((size_t*)Data) - 1); - } - - void Swap(TThis& copy) { - DoSwap(Data, copy.Data); - } - - void Resize(size_t newSize) { - if (newSize == Size()) - return; - TThis copy; + } + + void Swap(TThis& copy) { + DoSwap(Data, copy.Data); + } + + void Resize(size_t newSize) { + if (newSize == Size()) + return; + TThis copy; copy.Data = (T*)(((size_t*)y_allocate(sizeof(size_t) + sizeof(T) * newSize)) + 1); - // does not handle constructor exceptions properly - for (size_t i = 0; i < Min(Size(), newSize); ++i) { - new (copy.Data + i) T(Data[i]); - } - for (size_t i = Min(Size(), newSize); i < newSize; ++i) { - new (copy.Data + i) T; - } + // does not handle constructor exceptions properly + for (size_t i = 0; i < Min(Size(), newSize); ++i) { + new (copy.Data + i) T(Data[i]); + } + for (size_t i = Min(Size(), newSize); i < newSize; ++i) { + new (copy.Data + i) T; + } ((size_t*)copy.Data)[-1] = newSize; - Swap(copy); - } - - size_t Size() const { + Swap(copy); + } + + size_t Size() const { return Data ? ((size_t*)Data)[-1] : 0; - } - - bool Empty() const { - return Size() == 0; - } - - T* Get() { - return Data; - } - - const T* Get() const { - return Data; - } -}; + } + + bool Empty() const { + return Size() == 0; + } + + T* Get() { + return Data; + } + + const T* Get() const { + return Data; + } +}; diff --git a/library/cpp/containers/comptrie/comptrie_builder.h b/library/cpp/containers/comptrie/comptrie_builder.h index cf7d2e39a3..652c3151c5 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.h +++ b/library/cpp/containers/comptrie/comptrie_builder.h @@ -53,19 +53,19 @@ public: bool Add(const TSymbol* key, size_t keylen, const TData& value); bool Add(const TKeyBuf& key, const TData& value) { return Add(key.data(), key.size(), value); - } + } - // add already serialized data + // add already serialized data bool AddPtr(const TSymbol* key, size_t keylen, const char* data); bool AddPtr(const TKeyBuf& key, const char* data) { return AddPtr(key.data(), key.size(), data); - } - + } + bool AddSubtreeInFile(const TSymbol* key, size_t keylen, const TString& filename); bool AddSubtreeInFile(const TKeyBuf& key, const TString& filename) { return AddSubtreeInFile(key.data(), key.size(), filename); - } - + } + bool AddSubtreeInBuffer(const TSymbol* key, size_t keylen, TArrayWithSizeHolder<char>&& buffer); bool AddSubtreeInBuffer(const TKeyBuf& key, TArrayWithSizeHolder<char>&& buffer) { return AddSubtreeInBuffer(key.data(), key.size(), std::move(buffer)); @@ -74,8 +74,8 @@ public: bool Find(const TSymbol* key, size_t keylen, TData* value) const; bool Find(const TKeyBuf& key, TData* value = nullptr) const { return Find(key.data(), key.size(), value); - } - + } + bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr) const; bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr) const { return FindLongestPrefix(key.data(), key.size(), prefixLen, value); @@ -85,8 +85,8 @@ public: size_t SaveAndDestroy(IOutputStream& os); size_t SaveToFile(const TString& fileName) const { TFixedBufferFileOutput out(fileName); - return Save(out); - } + return Save(out); + } void Clear(); // Returns all memory to the system and resets the builder state. diff --git a/library/cpp/containers/comptrie/comptrie_builder.inl b/library/cpp/containers/comptrie/comptrie_builder.inl index f273fa6571..1410d4590f 100644 --- a/library/cpp/containers/comptrie/comptrie_builder.inl +++ b/library/cpp/containers/comptrie/comptrie_builder.inl @@ -28,54 +28,54 @@ template <class T, class D, class S> class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl { protected: TMemoryPool Pool; - size_t PayloadSize; - THolder<TFixedSizeAllocator> NodeAllocator; + size_t PayloadSize; + THolder<TFixedSizeAllocator> NodeAllocator; class TNode; - class TArc; + class TArc; TNode* Root; TCompactTrieBuilderFlags Flags; size_t EntryCount; size_t NodeCount; - TPacker Packer; - - enum EPayload { - DATA_ABSENT, - DATA_INSIDE, - DATA_MALLOCED, - DATA_IN_MEMPOOL, - }; - + TPacker Packer; + + enum EPayload { + DATA_ABSENT, + DATA_INSIDE, + DATA_MALLOCED, + DATA_IN_MEMPOOL, + }; + protected: void ConvertSymbolArrayToChar(const TSymbol* key, size_t keylen, TTempBuf& buf, size_t ckeylen) const; - void NodeLinkTo(TNode* thiz, const TBlob& label, TNode* node); + void NodeLinkTo(TNode* thiz, const TBlob& label, TNode* node); TNode* NodeForwardAdd(TNode* thiz, const char* label, size_t len, size_t& passed, size_t* nodeCount); bool FindEntryImpl(const char* key, size_t keylen, TData* value) const; bool FindLongestPrefixImpl(const char* keyptr, size_t keylen, size_t* prefixLen, TData* value) const; - size_t NodeMeasureSubtree(TNode* thiz) const; + size_t NodeMeasureSubtree(TNode* thiz) const; ui64 NodeSaveSubtree(TNode* thiz, IOutputStream& os) const; ui64 NodeSaveSubtreeAndDestroy(TNode* thiz, IOutputStream& osy); - void NodeBufferSubtree(TNode* thiz); - - size_t NodeMeasureLeafValue(TNode* thiz) const; + void NodeBufferSubtree(TNode* thiz); + + size_t NodeMeasureLeafValue(TNode* thiz) const; ui64 NodeSaveLeafValue(TNode* thiz, IOutputStream& os) const; - + virtual ui64 ArcMeasure(const TArc* thiz, size_t leftsize, size_t rightsize) const; - + virtual ui64 ArcSaveSelf(const TArc* thiz, IOutputStream& os) const; ui64 ArcSave(const TArc* thiz, IOutputStream& os) const; ui64 ArcSaveAndDestroy(const TArc* thiz, IOutputStream& os); - + public: TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc); virtual ~TCompactTrieBuilderImpl(); - void DestroyNode(TNode* node); - void NodeReleasePayload(TNode* thiz); - + void DestroyNode(TNode* node); + void NodeReleasePayload(TNode* thiz); + char* AddEntryForData(const TSymbol* key, size_t keylen, size_t dataLen, bool& isNewAddition); TNode* AddEntryForSomething(const TSymbol* key, size_t keylen, bool& isNewAddition); - + bool AddEntry(const TSymbol* key, size_t keylen, const TData& value); bool AddEntryPtr(const TSymbol* key, size_t keylen, const char* value); bool AddSubtreeInFile(const TSymbol* key, size_t keylen, const TString& fileName); @@ -98,38 +98,38 @@ public: }; template <class T, class D, class S> -class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TArc { +class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TArc { public: - TBlob Label; - TNode* Node; - mutable size_t LeftOffset; - mutable size_t RightOffset; + TBlob Label; + TNode* Node; + mutable size_t LeftOffset; + mutable size_t RightOffset; - TArc(const TBlob& lbl, TNode* nd); -}; + TArc(const TBlob& lbl, TNode* nd); +}; -template <class T, class D, class S> -class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode { -public: - typedef typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl TBuilderImpl; - typedef typename TBuilderImpl::TArc TArc; +template <class T, class D, class S> +class TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode { +public: + typedef typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl TBuilderImpl; + typedef typename TBuilderImpl::TArc TArc; - struct ISubtree { + struct ISubtree { virtual ~ISubtree() = default; - virtual bool IsLast() const = 0; - virtual ui64 Measure(const TBuilderImpl* builder) const = 0; + virtual bool IsLast() const = 0; + virtual ui64 Measure(const TBuilderImpl* builder) const = 0; virtual ui64 Save(const TBuilderImpl* builder, IOutputStream& os) const = 0; virtual ui64 SaveAndDestroy(TBuilderImpl* builder, IOutputStream& os) = 0; - virtual void Destroy(TBuilderImpl*) { } + virtual void Destroy(TBuilderImpl*) { } // Tries to find key in subtree. // Returns next node to find the key in (to avoid recursive calls) // If it has end result, writes it to @value and @result arguments and returns nullptr virtual const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; virtual const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const = 0; - }; - - class TArcSet: public ISubtree, public TCompactVector<TArc> { + }; + + class TArcSet: public ISubtree, public TCompactVector<TArc> { public: typedef typename TCompactVector<TArc>::iterator iterator; typedef typename TCompactVector<TArc>::const_iterator const_iterator; @@ -141,35 +141,35 @@ public: iterator Find(char ch); const_iterator Find(char ch) const; void Add(const TBlob& s, TNode* node); - + bool IsLast() const override { - return this->Empty(); - } - + return this->Empty(); + } + const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override; const TNode* FindLongestPrefix(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override { return Find(key, value, result, packer); } ui64 Measure(const TBuilderImpl* builder) const override { - return MeasureRange(builder, 0, this->size()); - } - - ui64 MeasureRange(const TBuilderImpl* builder, size_t from, size_t to) const { - if (from >= to) - return 0; - - size_t median = (from + to) / 2; + return MeasureRange(builder, 0, this->size()); + } + + ui64 MeasureRange(const TBuilderImpl* builder, size_t from, size_t to) const { + if (from >= to) + return 0; + + size_t median = (from + to) / 2; size_t leftsize = (size_t)MeasureRange(builder, from, median); size_t rightsize = (size_t)MeasureRange(builder, median + 1, to); - - return builder->ArcMeasure(&(*this)[median], leftsize, rightsize); - } - + + return builder->ArcMeasure(&(*this)[median], leftsize, rightsize); + } + ui64 Save(const TBuilderImpl* builder, IOutputStream& os) const override { - return SaveRange(builder, 0, this->size(), os); - } - + return SaveRange(builder, 0, this->size(), os); + } + ui64 SaveAndDestroy(TBuilderImpl* builder, IOutputStream& os) override { ui64 result = SaveRangeAndDestroy(builder, 0, this->size(), os); Destroy(builder); @@ -177,17 +177,17 @@ public: } ui64 SaveRange(const TBuilderImpl* builder, size_t from, size_t to, IOutputStream& os) const { - if (from >= to) - return 0; - - size_t median = (from + to) / 2; - - ui64 written = builder->ArcSave(&(*this)[median], os); - written += SaveRange(builder, from, median, os); - written += SaveRange(builder, median + 1, to, os); - return written; - } - + if (from >= to) + return 0; + + size_t median = (from + to) / 2; + + ui64 written = builder->ArcSave(&(*this)[median], os); + written += SaveRange(builder, from, median, os); + written += SaveRange(builder, median + 1, to, os); + return written; + } + ui64 SaveRangeAndDestroy(TBuilderImpl* builder, size_t from, size_t to, IOutputStream& os) { if (from >= to) return 0; @@ -201,30 +201,30 @@ public: } void Destroy(TBuilderImpl* builder) override { - // Delete all nodes down the stream. - for (iterator it = this->begin(); it != this->end(); ++it) { - builder->DestroyNode(it->Node); - } - this->clear(); - } - + // Delete all nodes down the stream. + for (iterator it = this->begin(); it != this->end(); ++it) { + builder->DestroyNode(it->Node); + } + this->clear(); + } + ~TArcSet() override { Y_ASSERT(this->empty()); - } - + } + }; - struct TBufferedSubtree: public ISubtree { - TArrayWithSizeHolder<char> Buffer; - + struct TBufferedSubtree: public ISubtree { + TArrayWithSizeHolder<char> Buffer; + TBufferedSubtree() { Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree() } bool IsLast() const override { - return Buffer.Empty(); - } - + return Buffer.Empty(); + } + const TNode* Find(TStringBuf& key, TData* value, bool& result, const TPacker& packer) const override { if (Buffer.Empty()) { result = false; @@ -252,45 +252,45 @@ public: } ui64 Measure(const TBuilderImpl*) const override { - return Buffer.Size(); - } - + return Buffer.Size(); + } + ui64 Save(const TBuilderImpl*, IOutputStream& os) const override { - os.Write(Buffer.Get(), Buffer.Size()); - return Buffer.Size(); - } + os.Write(Buffer.Get(), Buffer.Size()); + return Buffer.Size(); + } ui64 SaveAndDestroy(TBuilderImpl* builder, IOutputStream& os) override { ui64 result = Save(builder, os); TArrayWithSizeHolder<char>().Swap(Buffer); return result; } - }; - - struct TSubtreeInFile: public ISubtree { - struct TData { + }; + + struct TSubtreeInFile: public ISubtree { + struct TData { TString FileName; - ui64 Size; - }; - THolder<TData> Data; - + ui64 Size; + }; + THolder<TData> Data; + TSubtreeInFile(const TString& fileName) { - // stupid API - TFile file(fileName, RdOnly); - i64 size = file.GetLength(); - if (size < 0) - ythrow yexception() << "unable to get file " << fileName.Quote() << " size for unknown reason"; - Data.Reset(new TData); - Data->FileName = fileName; - Data->Size = size; + // stupid API + TFile file(fileName, RdOnly); + i64 size = file.GetLength(); + if (size < 0) + ythrow yexception() << "unable to get file " << fileName.Quote() << " size for unknown reason"; + Data.Reset(new TData); + Data->FileName = fileName; + Data->Size = size; Y_ASSERT(reinterpret_cast<ISubtree*>(this) == static_cast<void*>(this)); // This assumption is used in TNode::Subtree() - } - + } + bool IsLast() const override { - return Data->Size == 0; - } - + return Data->Size == 0; + } + const TNode* Find(TStringBuf& key, typename TCompactTrieBuilder::TData* value, bool& result, const TPacker& packer) const override { if (!Data) { result = false; @@ -317,104 +317,104 @@ public: } ui64 Measure(const TBuilderImpl*) const override { - return Data->Size; - } - + return Data->Size; + } + ui64 Save(const TBuilderImpl*, IOutputStream& os) const override { TUnbufferedFileInput is(Data->FileName); - ui64 written = TransferData(&is, &os); - if (written != Data->Size) - ythrow yexception() << "file " << Data->FileName.Quote() << " size changed"; - return written; - } + ui64 written = TransferData(&is, &os); + if (written != Data->Size) + ythrow yexception() << "file " << Data->FileName.Quote() << " size changed"; + return written; + } ui64 SaveAndDestroy(TBuilderImpl* builder, IOutputStream& os) override { return Save(builder, os); } - }; - - union { + }; + + union { char ArcsData[CONSTEXPR_MAX3(sizeof(TArcSet), sizeof(TBufferedSubtree), sizeof(TSubtreeInFile))]; union { void* Data1; long long int Data2; } Aligner; - }; - - inline ISubtree* Subtree() { - return reinterpret_cast<ISubtree*>(ArcsData); - } - - inline const ISubtree* Subtree() const { - return reinterpret_cast<const ISubtree*>(ArcsData); - } - - EPayload PayloadType; + }; + + inline ISubtree* Subtree() { + return reinterpret_cast<ISubtree*>(ArcsData); + } + + inline const ISubtree* Subtree() const { + return reinterpret_cast<const ISubtree*>(ArcsData); + } + + EPayload PayloadType; inline const char* PayloadPtr() const { return ((const char*) this) + sizeof(TNode); } - inline char* PayloadPtr() { - return ((char*) this) + sizeof(TNode); - } - - // *Payload() + inline char* PayloadPtr() { + return ((char*) this) + sizeof(TNode); + } + + // *Payload() inline const char*& PayloadAsPtr() const { const char** payload = (const char**) PayloadPtr(); return *payload; } - inline char*& PayloadAsPtr() { - char** payload = (char**) PayloadPtr(); - return *payload; - } - + inline char*& PayloadAsPtr() { + char** payload = (char**) PayloadPtr(); + return *payload; + } + inline const char* GetPayload() const { - switch (PayloadType) { - case DATA_INSIDE: - return PayloadPtr(); - case DATA_MALLOCED: - case DATA_IN_MEMPOOL: - return PayloadAsPtr(); + switch (PayloadType) { + case DATA_INSIDE: + return PayloadPtr(); + case DATA_MALLOCED: + case DATA_IN_MEMPOOL: + return PayloadAsPtr(); case DATA_ABSENT: - default: - abort(); - } - } - + default: + abort(); + } + } + inline char* GetPayload() { const TNode* thiz = this; return const_cast<char*>(thiz->GetPayload()); // const_cast is to avoid copy-paste style } - bool IsFinal() const { - return PayloadType != DATA_ABSENT; - } - - bool IsLast() const { - return Subtree()->IsLast(); - } - - inline void* operator new(size_t, TFixedSizeAllocator& pool) { - return pool.Allocate(); - } - + bool IsFinal() const { + return PayloadType != DATA_ABSENT; + } + + bool IsLast() const { + return Subtree()->IsLast(); + } + + inline void* operator new(size_t, TFixedSizeAllocator& pool) { + return pool.Allocate(); + } + inline void operator delete(void* ptr, TFixedSizeAllocator& pool) noexcept { - pool.Release(ptr); - } - - TNode() - : PayloadType(DATA_ABSENT) - { - new (Subtree()) TArcSet; - } - - ~TNode() { - Subtree()->~ISubtree(); + pool.Release(ptr); + } + + TNode() + : PayloadType(DATA_ABSENT) + { + new (Subtree()) TArcSet; + } + + ~TNode() { + Subtree()->~ISubtree(); Y_ASSERT(PayloadType == DATA_ABSENT); - } - + } + }; // TCompactTrieBuilder @@ -433,14 +433,14 @@ bool TCompactTrieBuilder<T, D, S>::Add(const TSymbol* key, size_t keylen, const template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::AddPtr(const TSymbol* key, size_t keylen, const char* value) { return Impl->AddEntryPtr(key, keylen, value); -} - -template <class T, class D, class S> +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::AddSubtreeInFile(const TSymbol* key, size_t keylen, const TString& fileName) { return Impl->AddSubtreeInFile(key, keylen, fileName); -} - -template <class T, class D, class S> +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::AddSubtreeInBuffer(const TSymbol* key, size_t keylen, TArrayWithSizeHolder<char>&& buffer) { return Impl->AddSubtreeInBuffer(key, keylen, std::move(buffer)); } @@ -486,19 +486,19 @@ size_t TCompactTrieBuilder<T, D, S>::GetNodeCount() const { template <class T, class D, class S> TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TCompactTrieBuilderImpl(TCompactTrieBuilderFlags flags, TPacker packer, IAllocator* alloc) : Pool(1000000, TMemoryPool::TLinearGrow::Instance(), alloc) - , PayloadSize(sizeof(void*)) // XXX: find better value + , PayloadSize(sizeof(void*)) // XXX: find better value , NodeAllocator(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, alloc)) , Flags(flags) , EntryCount(0) , NodeCount(1) - , Packer(packer) + , Packer(packer) { - Root = new (*NodeAllocator) TNode; + Root = new (*NodeAllocator) TNode; } template <class T, class D, class S> TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::~TCompactTrieBuilderImpl() { - DestroyNode(Root); + DestroyNode(Root); } template <class T, class D, class S> @@ -519,67 +519,67 @@ void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ConvertSymbolArrayTo } template <class T, class D, class S> -void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::DestroyNode(TNode* thiz) { - thiz->Subtree()->Destroy(this); - NodeReleasePayload(thiz); - thiz->~TNode(); - NodeAllocator->Release(thiz); -} - -template <class T, class D, class S> -void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeReleasePayload(TNode* thiz) { - switch (thiz->PayloadType) { - case DATA_ABSENT: - case DATA_INSIDE: - case DATA_IN_MEMPOOL: - break; - case DATA_MALLOCED: - delete[] thiz->PayloadAsPtr(); - break; - default: - abort(); - } - thiz->PayloadType = DATA_ABSENT; -} - -template <class T, class D, class S> +void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::DestroyNode(TNode* thiz) { + thiz->Subtree()->Destroy(this); + NodeReleasePayload(thiz); + thiz->~TNode(); + NodeAllocator->Release(thiz); +} + +template <class T, class D, class S> +void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeReleasePayload(TNode* thiz) { + switch (thiz->PayloadType) { + case DATA_ABSENT: + case DATA_INSIDE: + case DATA_IN_MEMPOOL: + break; + case DATA_MALLOCED: + delete[] thiz->PayloadAsPtr(); + break; + default: + abort(); + } + thiz->PayloadType = DATA_ABSENT; +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::AddEntry( const TSymbol* key, size_t keylen, const TData& value) { - size_t datalen = Packer.MeasureLeaf(value); - + size_t datalen = Packer.MeasureLeaf(value); + bool isNewAddition = false; char* place = AddEntryForData(key, keylen, datalen, isNewAddition); - Packer.PackLeaf(place, value, datalen); + Packer.PackLeaf(place, value, datalen); return isNewAddition; -} - -template <class T, class D, class S> +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::AddEntryPtr( const TSymbol* key, size_t keylen, const char* value) { - size_t datalen = Packer.SkipLeaf(value); - + size_t datalen = Packer.SkipLeaf(value); + bool isNewAddition = false; char* place = AddEntryForData(key, keylen, datalen, isNewAddition); - memcpy(place, value, datalen); + memcpy(place, value, datalen); return isNewAddition; -} - -template <class T, class D, class S> +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::AddSubtreeInFile( const TSymbol* key, size_t keylen, const TString& fileName) { - typedef typename TNode::ISubtree ISubtree; - typedef typename TNode::TSubtreeInFile TSubtreeInFile; - + typedef typename TNode::ISubtree ISubtree; + typedef typename TNode::TSubtreeInFile TSubtreeInFile; + bool isNewAddition = false; TNode* node = AddEntryForSomething(key, keylen, isNewAddition); - node->Subtree()->Destroy(this); - node->Subtree()->~ISubtree(); - - new (node->Subtree()) TSubtreeInFile(fileName); + node->Subtree()->Destroy(this); + node->Subtree()->~ISubtree(); + + new (node->Subtree()) TSubtreeInFile(fileName); return isNewAddition; -} - -template <class T, class D, class S> +} + +template <class T, class D, class S> bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::AddSubtreeInBuffer( const TSymbol* key, size_t keylen, TArrayWithSizeHolder<char>&& buffer) { @@ -613,10 +613,10 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* // Special case of empty key: replace it by 1-byte "\0" key. size_t ckeylen = keylen ? keylen * sizeof(TSymbol) : 1; TTempBuf ckeybuf(ckeylen); - if (keylen == 0) { + if (keylen == 0) { ckeybuf.Append("\0", 1); - } else { - ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); + } else { + ConvertSymbolArrayToChar(key, keylen, ckeybuf, ckeylen); } char* ckey = ckeybuf.Data(); @@ -638,24 +638,24 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* isNewAddition = (current->PayloadType == DATA_ABSENT); if ((Flags & CTBF_UNIQUE) && !isNewAddition) ythrow yexception() << "Duplicate key"; - return current; -} + return current; +} -template <class T, class D, class S> +template <class T, class D, class S> char* TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::AddEntryForData(const TSymbol* key, size_t keylen, size_t datalen, bool& isNewAddition) { TNode* current = AddEntryForSomething(key, keylen, isNewAddition); - NodeReleasePayload(current); - if (datalen <= PayloadSize) { - current->PayloadType = DATA_INSIDE; + NodeReleasePayload(current); + if (datalen <= PayloadSize) { + current->PayloadType = DATA_INSIDE; } else if (Flags & CTBF_PREFIX_GROUPED) { - current->PayloadType = DATA_MALLOCED; - current->PayloadAsPtr() = new char[datalen]; - } else { - current->PayloadType = DATA_IN_MEMPOOL; - current->PayloadAsPtr() = (char*) Pool.Allocate(datalen); // XXX: allocate unaligned - } - return current->GetPayload(); + current->PayloadType = DATA_MALLOCED; + current->PayloadAsPtr() = new char[datalen]; + } else { + current->PayloadType = DATA_IN_MEMPOOL; + current->PayloadAsPtr() = (char*) Pool.Allocate(datalen); // XXX: allocate unaligned + } + return current->GetPayload(); } template <class T, class D, class S> @@ -741,19 +741,19 @@ bool TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::FindLongestPrefixImp template <class T, class D, class S> void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::Clear() { - DestroyNode(Root); + DestroyNode(Root); Pool.Clear(); - NodeAllocator.Reset(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, TDefaultAllocator::Instance())); - Root = new (*NodeAllocator) TNode; + NodeAllocator.Reset(new TFixedSizeAllocator(sizeof(TNode) + PayloadSize, TDefaultAllocator::Instance())); + Root = new (*NodeAllocator) TNode; EntryCount = 0; NodeCount = 1; } template <class T, class D, class S> size_t TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::Save(IOutputStream& os) const { - const size_t len = NodeMeasureSubtree(Root); - if (len != NodeSaveSubtree(Root, os)) - ythrow yexception() << "something wrong"; + const size_t len = NodeMeasureSubtree(Root); + if (len != NodeSaveSubtree(Root, os)) + ythrow yexception() << "something wrong"; return len; } @@ -781,13 +781,13 @@ template <class T, class D, class S> typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeForwardAdd( TNode* thiz, const char* label, size_t len, size_t& passed, size_t* nodeCount) { - typename TNode::TArcSet* arcSet = dynamic_cast<typename TNode::TArcSet*>(thiz->Subtree()); + typename TNode::TArcSet* arcSet = dynamic_cast<typename TNode::TArcSet*>(thiz->Subtree()); if (!arcSet) ythrow yexception() << "Bad input order - expected input strings to be prefix-grouped."; - typename TNode::TArcSet::iterator it = arcSet->Find(*label); + typename TNode::TArcSet::iterator it = arcSet->Find(*label); - if (it != arcSet->end()) { + if (it != arcSet->end()) { const char* arcLabel = it->Label.AsCharPtr(); size_t arcLabelLen = it->Label.Length(); @@ -797,8 +797,8 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* if (passed < arcLabelLen) { (*nodeCount)++; - TNode* node = new (*NodeAllocator) TNode(); - NodeLinkTo(node, it->Label.SubBlob(passed, arcLabelLen), it->Node); + TNode* node = new (*NodeAllocator) TNode(); + NodeLinkTo(node, it->Label.SubBlob(passed, arcLabelLen), it->Node); it->Node = node; it->Label = it->Label.SubBlob(passed); @@ -811,26 +811,26 @@ typename TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TNode* } template <class T, class D, class S> -void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeLinkTo(TNode* thiz, const TBlob& label, TNode* node) { - typename TNode::TArcSet* arcSet = dynamic_cast<typename TNode::TArcSet*>(thiz->Subtree()); - if (!arcSet) +void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeLinkTo(TNode* thiz, const TBlob& label, TNode* node) { + typename TNode::TArcSet* arcSet = dynamic_cast<typename TNode::TArcSet*>(thiz->Subtree()); + if (!arcSet) ythrow yexception() << "Bad input order - expected input strings to be prefix-grouped."; // Buffer the node at the last arc if ((Flags & CTBF_PREFIX_GROUPED) && !arcSet->empty()) - NodeBufferSubtree(arcSet->back().Node); + NodeBufferSubtree(arcSet->back().Node); - arcSet->Add(label, node); + arcSet->Add(label, node); } template <class T, class D, class S> -size_t TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeMeasureSubtree(TNode* thiz) const { +size_t TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeMeasureSubtree(TNode* thiz) const { return (size_t)thiz->Subtree()->Measure(this); } template <class T, class D, class S> ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeSaveSubtree(TNode* thiz, IOutputStream& os) const { - return thiz->Subtree()->Save(this, os); + return thiz->Subtree()->Save(this, os); } template <class T, class D, class S> @@ -839,52 +839,52 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeSaveSubtreeAndDe } template <class T, class D, class S> -void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeBufferSubtree(TNode* thiz) { - typedef typename TNode::TArcSet TArcSet; - - TArcSet* arcSet = dynamic_cast<TArcSet*>(thiz->Subtree()); - if (!arcSet) +void TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeBufferSubtree(TNode* thiz) { + typedef typename TNode::TArcSet TArcSet; + + TArcSet* arcSet = dynamic_cast<TArcSet*>(thiz->Subtree()); + if (!arcSet) return; size_t bufferLength = (size_t)arcSet->Measure(this); - TArrayWithSizeHolder<char> buffer; - buffer.Resize(bufferLength); - - TMemoryOutput bufout(buffer.Get(), buffer.Size()); + TArrayWithSizeHolder<char> buffer; + buffer.Resize(bufferLength); - ui64 written = arcSet->Save(this, bufout); + TMemoryOutput bufout(buffer.Get(), buffer.Size()); + + ui64 written = arcSet->Save(this, bufout); Y_ASSERT(written == bufferLength); + + arcSet->Destroy(this); + arcSet->~TArcSet(); - arcSet->Destroy(this); - arcSet->~TArcSet(); - - typename TNode::TBufferedSubtree* bufferedArcSet = new (thiz->Subtree()) typename TNode::TBufferedSubtree; - - bufferedArcSet->Buffer.Swap(buffer); + typename TNode::TBufferedSubtree* bufferedArcSet = new (thiz->Subtree()) typename TNode::TBufferedSubtree; + + bufferedArcSet->Buffer.Swap(buffer); } template <class T, class D, class S> -size_t TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeMeasureLeafValue(TNode* thiz) const { - if (!thiz->IsFinal()) +size_t TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeMeasureLeafValue(TNode* thiz) const { + if (!thiz->IsFinal()) return 0; - return Packer.SkipLeaf(thiz->GetPayload()); + return Packer.SkipLeaf(thiz->GetPayload()); } template <class T, class D, class S> ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::NodeSaveLeafValue(TNode* thiz, IOutputStream& os) const { - if (!thiz->IsFinal()) - return 0; + if (!thiz->IsFinal()) + return 0; - size_t len = Packer.SkipLeaf(thiz->GetPayload()); - os.Write(thiz->GetPayload(), len); - return len; + size_t len = Packer.SkipLeaf(thiz->GetPayload()); + os.Write(thiz->GetPayload(), len); + return len; } // TCompactTrieBuilder::TCompactTrieBuilderImpl::TNode::TArc template <class T, class D, class S> -TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TArc::TArc(const TBlob& lbl, TNode* nd) +TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::TArc::TArc(const TBlob& lbl, TNode* nd) : Label(lbl) , Node(nd) , LeftOffset(0) @@ -896,11 +896,11 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcMeasure( const TArc* thiz, size_t leftsize, size_t rightsize) const { using namespace NCompactTrie; - size_t coresize = 2 + NodeMeasureLeafValue(thiz->Node); // 2 == (char + flags) - size_t treesize = NodeMeasureSubtree(thiz->Node); + size_t coresize = 2 + NodeMeasureLeafValue(thiz->Node); // 2 == (char + flags) + size_t treesize = NodeMeasureSubtree(thiz->Node); - if (thiz->Label.Length() > 0) - treesize += 2 * (thiz->Label.Length() - 1); + if (thiz->Label.Length() > 0) + treesize += 2 * (thiz->Label.Length() - 1); // Triple measurements are needed because the space needed to store the offset // shall be added to the offset itself. Hence three iterations. @@ -912,8 +912,8 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcMeasure( rightoffsetsize = rightsize ? MeasureOffset(coresize + treesize + leftsize + leftoffsetsize + rightoffsetsize) : 0; coresize += leftoffsetsize + rightoffsetsize; - thiz->LeftOffset = leftsize ? coresize + treesize : 0; - thiz->RightOffset = rightsize ? coresize + treesize + leftsize : 0; + thiz->LeftOffset = leftsize ? coresize + treesize : 0; + thiz->RightOffset = rightsize ? coresize + treesize + leftsize : 0; return coresize + treesize + leftsize + rightsize; } @@ -922,12 +922,12 @@ template <class T, class D, class S> ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcSaveSelf(const TArc* thiz, IOutputStream& os) const { using namespace NCompactTrie; - ui64 written = 0; + ui64 written = 0; + + size_t leftoffsetsize = MeasureOffset(thiz->LeftOffset); + size_t rightoffsetsize = MeasureOffset(thiz->RightOffset); - size_t leftoffsetsize = MeasureOffset(thiz->LeftOffset); - size_t rightoffsetsize = MeasureOffset(thiz->RightOffset); - - size_t labelLen = thiz->Label.Length(); + size_t labelLen = thiz->Label.Length(); for (size_t i = 0; i < labelLen; ++i) { char flags = 0; @@ -938,34 +938,34 @@ ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcSaveSelf(const TA } if (i == labelLen-1) { - if (thiz->Node->IsFinal()) + if (thiz->Node->IsFinal()) flags |= MT_FINAL; - if (!thiz->Node->IsLast()) + if (!thiz->Node->IsLast()) flags |= MT_NEXT; } else { flags |= MT_NEXT; } os.Write(&flags, 1); - os.Write(&thiz->Label.AsCharPtr()[i], 1); - written += 2; + os.Write(&thiz->Label.AsCharPtr()[i], 1); + written += 2; if (i == 0) { - written += ArcSaveOffset(thiz->LeftOffset, os); - written += ArcSaveOffset(thiz->RightOffset, os); + written += ArcSaveOffset(thiz->LeftOffset, os); + written += ArcSaveOffset(thiz->RightOffset, os); } } - written += NodeSaveLeafValue(thiz->Node, os); + written += NodeSaveLeafValue(thiz->Node, os); return written; } template <class T, class D, class S> ui64 TCompactTrieBuilder<T, D, S>::TCompactTrieBuilderImpl::ArcSave(const TArc* thiz, IOutputStream& os) const { ui64 written = ArcSaveSelf(thiz, os); - written += NodeSaveSubtree(thiz->Node, os); - return written; + written += NodeSaveSubtree(thiz->Node, os); + return written; } template <class T, class D, class S> diff --git a/library/cpp/containers/comptrie/comptrie_impl.cpp b/library/cpp/containers/comptrie/comptrie_impl.cpp index a116ab6d1e..309176e0ec 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.cpp +++ b/library/cpp/containers/comptrie/comptrie_impl.cpp @@ -1,6 +1,6 @@ #include "comptrie_impl.h" -#include <util/system/rusage.h> +#include <util/system/rusage.h> #include <util/stream/output.h> // Unpack the leaf value. The algorithm can store up to 8 full bytes in leafs. diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h index f41c38311a..2cbae822bf 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.h +++ b/library/cpp/containers/comptrie/comptrie_impl.h @@ -1,6 +1,6 @@ #pragma once -#include <util/stream/output.h> +#include <util/stream/output.h> #ifndef COMPTRIE_DATA_CHECK #define COMPTRIE_DATA_CHECK 1 @@ -11,10 +11,10 @@ namespace NCompactTrie { const char MT_FINAL = '\x80'; const char MT_NEXT = '\x40'; - const char MT_SIZEMASK = '\x07'; + const char MT_SIZEMASK = '\x07'; const size_t MT_LEFTSHIFT = 3; - const size_t MT_RIGHTSHIFT = 0; - + const size_t MT_RIGHTSHIFT = 0; + Y_FORCE_INLINE size_t UnpackOffset(const char* p, size_t len); size_t MeasureOffset(size_t offset); size_t PackOffset(char* buffer, size_t offset); @@ -52,20 +52,20 @@ namespace NCompactTrie { void ShowProgress(size_t n); // just print dots } -namespace NCompTriePrivate { - template <typename TChar> - struct TStringForChar { - }; - - template <> - struct TStringForChar<char> { +namespace NCompTriePrivate { + template <typename TChar> + struct TStringForChar { + }; + + template <> + struct TStringForChar<char> { typedef TString TResult; - }; - - template <> - struct TStringForChar<wchar16> { + }; + + template <> + struct TStringForChar<wchar16> { typedef TUtf16String TResult; - }; + }; template <> struct TStringForChar<wchar32> { @@ -73,7 +73,7 @@ namespace NCompTriePrivate { }; } - + namespace NCompTriePrivate { struct TCmp { template <class T> @@ -88,18 +88,18 @@ namespace NCompTriePrivate { }; } -namespace NCompactTrie { +namespace NCompactTrie { static inline ui64 ArcSaveOffset(size_t offset, IOutputStream& os) { - using namespace NCompactTrie; - - if (!offset) - return 0; - - char buf[16]; - size_t len = PackOffset(buf, offset); - os.Write(buf, len); - return len; - } + using namespace NCompactTrie; + + if (!offset) + return 0; + + char buf[16]; + size_t len = PackOffset(buf, offset); + os.Write(buf, len); + return len; + } // Unpack the offset to the next node. The encoding scheme can store offsets // up to 7 bytes; whether they fit into size_t is another issue. @@ -218,4 +218,4 @@ namespace NCompactTrie { return false; } -} +} diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 40ec1e52b3..534f7d424f 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -14,9 +14,9 @@ #include <util/stream/input.h> #include <utility> -template <class T, class D, class S> -class TCompactTrieBuilder; - +template <class T, class D, class S> +class TCompactTrieBuilder; + namespace NCompactTrie { template <class TTrie> class TFirstSymbolIterator; @@ -38,17 +38,17 @@ public: typedef typename TCompactTrieKeySelector<TSymbol>::TKey TKey; typedef typename TCompactTrieKeySelector<TSymbol>::TKeyBuf TKeyBuf; - + typedef std::pair<TKey, TData> TValueType; typedef std::pair<size_t, TData> TPhraseMatch; typedef TVector<TPhraseMatch> TPhraseMatchVector; - typedef TCompactTrieBuilder<T, D, S> TBuilder; - + typedef TCompactTrieBuilder<T, D, S> TBuilder; + protected: TBlob DataHolder; const char* EmptyValue = nullptr; - TPacker Packer; + TPacker Packer; NCompactTrie::TPackerLeafSkipper<TPacker> Skipper = &Packer; // This should be true for every constructor. public: @@ -74,7 +74,7 @@ public: return !IsEmpty(); } - void Init(const char* d, size_t len, TPacker packer = TPacker()); + void Init(const char* d, size_t len, TPacker packer = TPacker()); void Init(const TBlob& data, TPacker packer = TPacker()); bool IsInitialized() const; @@ -83,17 +83,17 @@ public: bool Find(const TSymbol* key, size_t keylen, TData* value = nullptr) const; bool Find(const TKeyBuf& key, TData* value = nullptr) const { return Find(key.data(), key.size(), value); - } - - TData Get(const TSymbol* key, size_t keylen) const { - TData value; - if (!Find(key, keylen, &value)) - ythrow yexception() << "key " << TKey(key, keylen).Quote() << " not found in trie"; - return value; - } + } + + TData Get(const TSymbol* key, size_t keylen) const { + TData value; + if (!Find(key, keylen, &value)) + ythrow yexception() << "key " << TKey(key, keylen).Quote() << " not found in trie"; + return value; + } TData Get(const TKeyBuf& key) const { return Get(key.data(), key.size()); - } + } TData GetDefault(const TKeyBuf& key, const TData& def) const { TData value; if (!Find(key.data(), key.size(), &value)) @@ -101,7 +101,7 @@ public: else return value; } - + const TBlob& Data() const { return DataHolder; }; @@ -121,11 +121,11 @@ public: void FindPhrases(const TSymbol* key, size_t keylen, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const; void FindPhrases(const TKeyBuf& key, TPhraseMatchVector& matches, TSymbol separator = TSymbol(' ')) const { return FindPhrases(key.data(), key.size(), matches, separator); - } + } bool FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const; bool FindLongestPrefix(const TKeyBuf& key, size_t* prefixLen, TData* value = nullptr, bool* hasNext = nullptr) const { return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext); - } + } // Return trie, containing all tails for the given key inline TCompactTrie<T, D, S> FindTails(const TSymbol* key, size_t keylen) const; @@ -154,7 +154,7 @@ public: bool IsEmpty() const { return !Impl; } // Almost no other method can be called. - + bool operator==(const TConstIterator& other) const; bool operator!=(const TConstIterator& other) const; TConstIterator& operator++(); @@ -163,12 +163,12 @@ public: TConstIterator operator--(int /*unused*/); TValueType operator*(); - TKey GetKey() const; + TKey GetKey() const; size_t GetKeySize() const; - TData GetValue() const; + TData GetValue() const; void GetValue(TData& data) const; - const char* GetValuePtr() const; - + const char* GetValuePtr() const; + private: TPacker Packer; TCopyPtr<TOpaqueTrieIterator> Impl; @@ -186,7 +186,7 @@ public: TConstIterator UpperBound(const TKeyBuf& key) const; void Print(IOutputStream& os); - + size_t Size() const; friend class NCompactTrie::TFirstSymbolIterator<TCompactTrie>; @@ -224,7 +224,7 @@ public: template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, TPacker packer) : DataHolder(data) - , Packer(packer) + , Packer(packer) { Init(data, packer); } @@ -246,7 +246,7 @@ template <class T, class D, class S> TCompactTrie<T, D, S>::TCompactTrie(const TBlob& data, const char* emptyValue, TPacker packer) : DataHolder(data) , EmptyValue(emptyValue) - , Packer(packer) + , Packer(packer) { } @@ -296,7 +296,7 @@ void TCompactTrie<T, D, S>::Init(const TBlob& data, TPacker packer) { using namespace NCompactTrie; DataHolder = data; - Packer = packer; + Packer = packer; const char* datapos = DataHolder.AsCharPtr(); size_t len = DataHolder.Length(); @@ -454,11 +454,11 @@ typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::UpperBound template <class T, class D, class S> void TCompactTrie<T, D, S>::Print(IOutputStream& os) { typedef typename ::TCompactTrieKeySelector<T>::TKeyBuf TSBuffer; - for (TConstIterator it = Begin(); it != End(); ++it) { + for (TConstIterator it = Begin(); it != End(); ++it) { os << TSBuffer((*it).first.data(), (*it).first.size()) << "\t" << (*it).second << Endl; - } -} - + } +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::FindLongestPrefix(const TSymbol* key, size_t keylen, size_t* prefixLen, TData* value, bool* hasNext) const { const char* valuepos = nullptr; @@ -654,10 +654,10 @@ typename TCompactTrie<T, D, S>::TData TCompactTrie<T, D, S>::TConstIterator::Get template <class T, class D, class S> void TCompactTrie<T, D, S>::TConstIterator::GetValue(typename TCompactTrie<T, D, S>::TData& data) const { - const char* ptr = GetValuePtr(); + const char* ptr = GetValuePtr(); if (ptr) { Packer.UnpackLeaf(ptr, data); - } else { + } else { data = typename TCompactTrie<T, D, S>::TData(); - } -} + } +} diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 74bee09b5d..2db5143c60 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -12,8 +12,8 @@ #include <util/generic/ptr.h> #include <util/generic/ylimits.h> -#include <util/folder/dirut.h> - +#include <util/folder/dirut.h> + #include <util/random/random.h> #include <util/random/fast.h> @@ -369,31 +369,31 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) { size_t prefixLen = 0; typename TCompactTrie<T>::TKey key = MakeWideKey<T>(i, len); - UNIT_ASSERT(trie.Find(key, &value)); + UNIT_ASSERT(trie.Find(key, &value)); UNIT_ASSERT_EQUAL(len * 2, value); - UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); + UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(len, prefixLen); UNIT_ASSERT_EQUAL(len * 2, value); TString badkey("bb"); badkey += i; key = MakeWideKey<T>(badkey); - UNIT_ASSERT(!trie.Find(key)); + UNIT_ASSERT(!trie.Find(key)); value = 123; - UNIT_ASSERT(!trie.Find(key, &value)); + UNIT_ASSERT(!trie.Find(key, &value)); UNIT_ASSERT_EQUAL(123, value); - UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); + UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(1, prefixLen); UNIT_ASSERT_EQUAL(2, value); badkey = i; badkey += "x"; key = MakeWideKey<T>(badkey); - UNIT_ASSERT(!trie.Find(key)); + UNIT_ASSERT(!trie.Find(key)); value = 1234; - UNIT_ASSERT(!trie.Find(key, &value)); + UNIT_ASSERT(!trie.Find(key, &value)); UNIT_ASSERT_EQUAL(1234, value); - UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); + UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(len, prefixLen); UNIT_ASSERT_EQUAL(len * 2, value); UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, nullptr)); @@ -410,12 +410,12 @@ void TCompactTrieTest::CheckData(const char* data, size_t datalen) { testkey = "fbbax"; key = MakeWideKey<T>(testkey); - UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); + UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value)); UNIT_ASSERT_EQUAL(prefixLen, 3); UNIT_ASSERT_EQUAL(6, value); value = 12345678; - UNIT_ASSERT(!trie.Find(key, &value)); + UNIT_ASSERT(!trie.Find(key, &value)); UNIT_ASSERT_EQUAL(12345678, value); //Failed Find() should not change value } @@ -689,7 +689,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { TCompactTrie<char, typename T::TData, T> trieMin(buftmp.Buffer().Data(), buftmp.Buffer().Size()); TCompactTrieBuilder<char, typename T::TData, T> prefixGroupedBuilder(CTBF_PREFIX_GROUPED); - + for (typename TKeys::const_iterator i = keys.begin(), mi = keys.end(); i != mi; ++i) { UNIT_ASSERT(!prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); UNIT_ASSERT(trie.Find(i->first.c_str(), i->first.size(), &dummy)); @@ -698,7 +698,7 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { UNIT_ASSERT(trieMin.Find(i->first.c_str(), i->first.size(), &dummy)); UNIT_ASSERT(dummy == i->second); } - + prefixGroupedBuilder.Add(i->first.c_str(), i->first.size(), dummy); UNIT_ASSERT(prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy)); @@ -712,10 +712,10 @@ void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) { } } } - + TBufferStream prefixGroupedBuffer; prefixGroupedBuilder.Save(prefixGroupedBuffer); - + UNIT_ASSERT_VALUES_EQUAL(stream.Buffer().Size(), prefixGroupedBuffer.Buffer().Size()); UNIT_ASSERT(0 == memcmp(stream.Buffer().Data(), prefixGroupedBuffer.Buffer().Data(), stream.Buffer().Size())); } @@ -776,9 +776,9 @@ void TCompactTrieTest::TestFindTailsImpl(const TString& prefix) { } void TCompactTrieTest::TestPrefixGrouped() { - TBuffer b1b; + TBuffer b1b; TCompactTrieBuilder<char, ui32> b1(CTBF_PREFIX_GROUPED); - const char* data[] = { + const char* data[] = { "Kazan", "Moscow", "Monino", @@ -787,8 +787,8 @@ void TCompactTrieTest::TestPrefixGrouped() { "Fryazino", "Fryazevo", "Tumen", - }; - + }; + for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) { ui32 val = strlen(data[i]) + 1; b1.Add(data[i], strlen(data[i]), val); @@ -802,24 +802,24 @@ void TCompactTrieTest::TestPrefixGrouped() { UNIT_ASSERT(!b1.Find(data[j], strlen(data[j]), &found)); } } - } - - { - TBufferOutput b1bo(b1b); - b1.Save(b1bo); - } - - TCompactTrie<char, ui32> t1(TBlob::FromBuffer(b1b)); - - //t1.Print(Cerr); - + } + + { + TBufferOutput b1bo(b1b); + b1.Save(b1bo); + } + + TCompactTrie<char, ui32> t1(TBlob::FromBuffer(b1b)); + + //t1.Print(Cerr); + for (auto& i : data) { - ui32 v; + ui32 v; UNIT_ASSERT(t1.Find(i, strlen(i), &v)); UNIT_ASSERT_VALUES_EQUAL(strlen(i) + 1, v); - } -} - + } +} + void TCompactTrieTest::CrashTestPrefixGrouped() { TCompactTrieBuilder<char, ui32> builder(CTBF_PREFIX_GROUPED); const char* data[] = { @@ -842,33 +842,33 @@ void TCompactTrieTest::CrashTestPrefixGrouped() { } void TCompactTrieTest::TestMergeFromFile() { - { - TCompactTrieBuilder<> b; - b.Add("yandex", 12); - b.Add("google", 13); - b.Add("mail", 14); + { + TCompactTrieBuilder<> b; + b.Add("yandex", 12); + b.Add("google", 13); + b.Add("mail", 14); TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru"); - b.Save(out); - } - - { - TCompactTrieBuilder<> b; - b.Add("yandex", 112); - b.Add("google", 113); - b.Add("yahoo", 114); + b.Save(out); + } + + { + TCompactTrieBuilder<> b; + b.Add("yandex", 112); + b.Add("google", 113); + b.Add("yahoo", 114); TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com"); - b.Save(out); - } - - { - TCompactTrieBuilder<> b; + b.Save(out); + } + + { + TCompactTrieBuilder<> b; UNIT_ASSERT(b.AddSubtreeInFile("com.", GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com")); UNIT_ASSERT(b.Add("org.kernel", 22)); UNIT_ASSERT(b.AddSubtreeInFile("ru.", GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru")); TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res"); - b.Save(out); - } - + b.Save(out); + } + TCompactTrie<> trie(TBlob::FromFileSingleThreaded(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res")); UNIT_ASSERT_VALUES_EQUAL(12u, trie.Get("ru.yandex")); UNIT_ASSERT_VALUES_EQUAL(13u, trie.Get("ru.google")); @@ -877,12 +877,12 @@ void TCompactTrieTest::TestMergeFromFile() { UNIT_ASSERT_VALUES_EQUAL(112u, trie.Get("com.yandex")); UNIT_ASSERT_VALUES_EQUAL(113u, trie.Get("com.google")); UNIT_ASSERT_VALUES_EQUAL(114u, trie.Get("com.yahoo")); - + unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res").data()); unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com").data()); unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru").data()); -} - +} + void TCompactTrieTest::TestMergeFromBuffer() { TArrayWithSizeHolder<char> buffer1; { diff --git a/library/cpp/containers/comptrie/make_fast_layout.h b/library/cpp/containers/comptrie/make_fast_layout.h index b8fab5d65b..7325c284c6 100644 --- a/library/cpp/containers/comptrie/make_fast_layout.h +++ b/library/cpp/containers/comptrie/make_fast_layout.h @@ -4,7 +4,7 @@ #include <cstddef> class IOutputStream; - + namespace NCompactTrie { // Return value: size of the resulting trie. size_t RawCompactTrieFastLayoutImpl(IOutputStream& os, const NCompactTrie::TOpaqueTrie& trie, bool verbose); diff --git a/library/cpp/containers/comptrie/minimize.h b/library/cpp/containers/comptrie/minimize.h index baaa431d04..354c50334e 100644 --- a/library/cpp/containers/comptrie/minimize.h +++ b/library/cpp/containers/comptrie/minimize.h @@ -4,10 +4,10 @@ #include <cstddef> class IOutputStream; - + namespace NCompactTrie { size_t MeasureOffset(size_t offset); - + enum EMinimizeMode { MM_DEFAULT, // alollocate new memory for minimized tree MM_NOALLOC, // minimize tree in the same buffer diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp index 5fd3914be6..3b2ac3d157 100644 --- a/library/cpp/containers/comptrie/opaque_trie_iterator.cpp +++ b/library/cpp/containers/comptrie/opaque_trie_iterator.cpp @@ -13,7 +13,7 @@ namespace NCompactTrie { if (!AtEmptyValue && !atend) Forward(); } - + bool TOpaqueTrieIterator::operator==(const TOpaqueTrieIterator& rhs) const { return (Trie == rhs.Trie && Forks == rhs.Forks && @@ -52,7 +52,7 @@ namespace NCompactTrie { return false; topFork = &Forks.Top(); } - } + } Y_ASSERT(!Forks.Empty()); while (Forks.Top().CurrentDirection != D_FINAL && !HasMaxKeyLength()) { @@ -228,4 +228,4 @@ namespace NCompactTrie { return Node.GetLeafOffset(); } -} +} diff --git a/library/cpp/containers/comptrie/opaque_trie_iterator.h b/library/cpp/containers/comptrie/opaque_trie_iterator.h index 195da3c191..e2ed30a3cd 100644 --- a/library/cpp/containers/comptrie/opaque_trie_iterator.h +++ b/library/cpp/containers/comptrie/opaque_trie_iterator.h @@ -10,7 +10,7 @@ namespace NCompactTrie { class ILeafSkipper; - + class TFork { // Auxiliary class for a branching point in the iterator public: TNode Node; diff --git a/library/cpp/containers/comptrie/ya.make b/library/cpp/containers/comptrie/ya.make index 81352da4b2..fee6680aba 100644 --- a/library/cpp/containers/comptrie/ya.make +++ b/library/cpp/containers/comptrie/ya.make @@ -1,8 +1,8 @@ LIBRARY() - + OWNER(velavokr) -SRCS( +SRCS( array_with_size.h chunked_helpers_trie.h comptrie.h @@ -23,8 +23,8 @@ SRCS( search_iterator.cpp write_trie_backwards.cpp writeable_node.cpp -) - +) + PEERDIR( library/cpp/packers library/cpp/containers/compact_vector @@ -32,4 +32,4 @@ PEERDIR( util/draft ) -END() +END() |