diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-01 16:59:11 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-01 19:54:31 +0300 |
commit | 3715aa9254f65ae1058290101351a72a6d3a67d4 (patch) | |
tree | 9ac5a1cdab42dfc7cd095a06a362e0681cb1482f /library/cpp/on_disk | |
parent | b20a8c04fb7e595955ca9d1b943033342b6580cb (diff) | |
download | ydb-3715aa9254f65ae1058290101351a72a6d3a67d4.tar.gz |
Intermediate changes
Diffstat (limited to 'library/cpp/on_disk')
-rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob.cpp | 67 | ||||
-rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob.h | 77 | ||||
-rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob_builder.cpp | 146 | ||||
-rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob_builder.h | 64 | ||||
-rw-r--r-- | library/cpp/on_disk/multi_blob/ya.make | 13 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/fake.cpp | 4 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/save_stl.h | 84 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/static_hash.h | 420 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/static_hash_map.h | 59 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/sthash_iterators.h | 334 | ||||
-rw-r--r-- | library/cpp/on_disk/st_hash/ya.make | 15 |
11 files changed, 0 insertions, 1283 deletions
diff --git a/library/cpp/on_disk/multi_blob/multiblob.cpp b/library/cpp/on_disk/multi_blob/multiblob.cpp deleted file mode 100644 index d92b31e6135..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include <util/generic/yexception.h> -#include <util/system/align.h> - -#include <library/cpp/on_disk/chunks/reader.h> - -#include "multiblob.h" - -void TSubBlobs::ReadMultiBlob(const TBlob& multi) { - if (multi.Size() < sizeof(TMultiBlobHeader)) { - ythrow yexception() << "not a blob, too small"; - } - - Multi = multi; - memcpy((void*)&Header, Multi.Data(), sizeof(TMultiBlobHeader)); - - if (Header.BlobMetaSig != BLOBMETASIG) { - if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig) { - if (ReadChunkedData(multi)) - return; - } - ythrow yexception() << "is not a blob, MetaSig was read: " - << Header.BlobMetaSig - << ", must be" << BLOBMETASIG; - } - - if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig) - ythrow yexception() << "unknown multiblob RecordSig " - << Header.BlobRecordSig; - - reserve(size() + Header.Count); - if (Header.Flags & EMF_INTERLAY) { - size_t pos = Header.HeaderSize(); - for (size_t i = 0; i < Header.Count; ++i) { - pos = AlignUp<ui64>(pos, sizeof(ui64)); - ui64 size = *((ui64*)((const char*)multi.Data() + pos)); - pos = AlignUp<ui64>(pos + sizeof(ui64), Header.Align); - push_back(multi.SubBlob(pos, pos + size)); - pos += size; - } - } else { - const ui64* sizes = Header.Sizes(multi.Data()); - size_t pos = Header.HeaderSize() + Header.Count * sizeof(ui64); - for (size_t i = 0; i < Header.Count; ++i) { - pos = AlignUp<ui64>(pos, Header.Align); - push_back(multi.SubBlob(pos, pos + *sizes)); - pos += *sizes; - sizes++; - } - } -} - -bool TSubBlobs::ReadChunkedData(const TBlob& multi) noexcept { - Multi = multi; - memset((void*)&Header, 0, sizeof(Header)); - - TChunkedDataReader reader(Multi); - Header.Count = reader.GetBlocksCount(); - resize(GetHeader()->Count); - for (size_t i = 0; i < size(); ++i) - // We can use TBlob::NoCopy() because of reader.GetBlock(i) returns - // address into memory of multi blob. - // This knowledge was acquired from implementation of - // TChunkedDataReader, so we need care about any changes that. - (*this)[i] = TBlob::NoCopy(reader.GetBlock(i), reader.GetBlockLen(i)); - Header.Flags |= EMF_CHUNKED_DATA_READER; - return true; -} diff --git a/library/cpp/on_disk/multi_blob/multiblob.h b/library/cpp/on_disk/multi_blob/multiblob.h deleted file mode 100644 index b40a5ae6af9..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/memory/blob.h> - -#define BLOBMETASIG 0x3456789Au - -enum E_Multiblob_Flags { - // if EMF_INTERLAY is clear - // multiblob format - // HeaderSize() bytes for TMultiBlobHeader - // Count*sizeof(ui64) bytes for blob sizes - // blob1 - // (alignment) - // blob2 - // (alignment) - // ... - // (alignment) - // blobn - // if EMF_INTERLAY is set - // multiblob format - // HeaderSize() bytes for TMultiBlobHeader - // size1 ui64, the size of 1st blob - // blob1 - // (alignment) - // size2 ui64, the size of 2nd blob - // blob2 - // (alignment) - // ... - // (alignment) - // sizen ui64, the size of n'th blob - // blobn - EMF_INTERLAY = 1, - - // Means that multiblob contains blocks in TChunkedDataReader format - // Legacy, use it only for old files, created for TChunkedDataReader - EMF_CHUNKED_DATA_READER = 2, - - // Flags that may be configured for blobbuilder in client code - EMF_WRITEABLE = EMF_INTERLAY, -}; - -struct TMultiBlobHeader { - // data - ui32 BlobMetaSig; - ui32 BlobRecordSig; - ui64 Count; // count of sub blobs - ui32 Align; // alignment for every subblob - ui32 Flags; - static const ui32 RecordSig = 0x23456789; - static inline size_t HeaderSize() { - return 4 * sizeof(ui64); - } - inline const ui64* Sizes(const void* Data) const { - return (const ui64*)((const char*)Data + HeaderSize()); - } -}; - -class TSubBlobs: public TVector<TBlob> { -public: - TSubBlobs() { - } - TSubBlobs(const TBlob& multi) { - ReadMultiBlob(multi); - } - void ReadMultiBlob(const TBlob& multi); - const TMultiBlobHeader* GetHeader() const { - return (const TMultiBlobHeader*)&Header; - } - -protected: - TMultiBlobHeader Header; - TBlob Multi; - -private: - bool ReadChunkedData(const TBlob& multi) noexcept; -}; diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.cpp b/library/cpp/on_disk/multi_blob/multiblob_builder.cpp deleted file mode 100644 index 44aa4a6c2fd..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob_builder.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include <util/memory/tempbuf.h> -#include <util/system/align.h> - -#include "multiblob_builder.h" - -/* - * TBlobSaverMemory - */ -TBlobSaverMemory::TBlobSaverMemory(const void* ptr, size_t size) - : Blob(TBlob::NoCopy(ptr, size)) -{ -} - -TBlobSaverMemory::TBlobSaverMemory(const TBlob& blob) - : Blob(blob) -{ -} - -void TBlobSaverMemory::Save(IOutputStream& output, ui32 /*flags*/) { - output.Write((void*)Blob.Data(), Blob.Length()); -} - -size_t TBlobSaverMemory::GetLength() { - return Blob.Length(); -} - -/* - * TBlobSaverFile - */ - -TBlobSaverFile::TBlobSaverFile(TFile file) - : File(file) -{ - Y_ASSERT(File.IsOpen()); -} - -TBlobSaverFile::TBlobSaverFile(const char* filename, EOpenMode oMode) - : File(filename, oMode) -{ - Y_ASSERT(File.IsOpen()); -} - -void TBlobSaverFile::Save(IOutputStream& output, ui32 /*flags*/) { - TTempBuf buffer(1 << 20); - while (size_t size = File.Read((void*)buffer.Data(), buffer.Size())) - output.Write((void*)buffer.Data(), size); -} - -size_t TBlobSaverFile::GetLength() { - return File.GetLength(); -} - -/* - * TMultiBlobBuilder - */ - -TMultiBlobBuilder::TMultiBlobBuilder(bool isOwn) - : IsOwner(isOwn) -{ -} - -TMultiBlobBuilder::~TMultiBlobBuilder() { - if (IsOwner) - DeleteSubBlobs(); -} - -namespace { - ui64 PadToAlign(IOutputStream& output, ui64 fromPos, ui32 align) { - ui64 toPos = AlignUp<ui64>(fromPos, align); - for (; fromPos < toPos; ++fromPos) { - output << (char)0; - } - return toPos; - } -} - -void TMultiBlobBuilder::Save(IOutputStream& output, ui32 flags) { - TMultiBlobHeader header; - memset((void*)&header, 0, sizeof(header)); - header.BlobMetaSig = BLOBMETASIG; - header.BlobRecordSig = TMultiBlobHeader::RecordSig; - header.Count = Blobs.size(); - header.Align = ALIGN; - header.Flags = flags & EMF_WRITEABLE; - output.Write((void*)&header, sizeof(header)); - for (size_t i = sizeof(header); i < header.HeaderSize(); ++i) - output << (char)0; - ui64 pos = header.HeaderSize(); - if (header.Flags & EMF_INTERLAY) { - for (size_t i = 0; i < Blobs.size(); ++i) { - ui64 size = Blobs[i]->GetLength(); - pos = PadToAlign(output, pos, sizeof(ui64)); // Align size record - output.Write((void*)&size, sizeof(ui64)); - pos = PadToAlign(output, pos + sizeof(ui64), header.Align); // Align blob - Blobs[i]->Save(output, header.Flags); - pos += size; - } - } else { - for (size_t i = 0; i < Blobs.size(); ++i) { - ui64 size = Blobs[i]->GetLength(); - output.Write((void*)&size, sizeof(ui64)); - } - pos += Blobs.size() * sizeof(ui64); - for (size_t i = 0; i < Blobs.size(); ++i) { - pos = PadToAlign(output, pos, header.Align); - Blobs[i]->Save(output, header.Flags); - pos += Blobs[i]->GetLength(); - } - } - // Compensate for imprecise size - for (ui64 len = GetLength(); pos < len; ++pos) { - output << (char)0; - } -} - -size_t TMultiBlobBuilder::GetLength() { - // Sizes may be diferent with and without EMF_INTERLAY, so choose greater of 2 - size_t resNonInter = TMultiBlobHeader::HeaderSize() + Blobs.size() * sizeof(ui64); - size_t resInterlay = TMultiBlobHeader::HeaderSize(); - for (size_t i = 0; i < Blobs.size(); ++i) { - resInterlay = AlignUp<ui64>(resInterlay, sizeof(ui64)) + sizeof(ui64); - resInterlay = AlignUp<ui64>(resInterlay, ALIGN) + Blobs[i]->GetLength(); - resNonInter = AlignUp<ui64>(resNonInter, ALIGN) + Blobs[i]->GetLength(); - } - resInterlay = AlignUp<ui64>(resInterlay, ALIGN); - resNonInter = AlignUp<ui64>(resNonInter, ALIGN); - return Max(resNonInter, resInterlay); -} - -TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() { - return Blobs; -} - -const TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() const { - return Blobs; -} - -void TMultiBlobBuilder::AddBlob(IBlobSaverBase* blob) { - Blobs.push_back(blob); -} - -void TMultiBlobBuilder::DeleteSubBlobs() { - for (size_t i = 0; i < Blobs.size(); ++i) - delete Blobs[i]; - Blobs.clear(); -} diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.h b/library/cpp/on_disk/multi_blob/multiblob_builder.h deleted file mode 100644 index a8e3c6d35ee..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob_builder.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include <util/system/align.h> -#include <util/stream/output.h> -#include <util/stream/file.h> -#include <util/draft/holder_vector.h> - -#include "multiblob.h" - -class IBlobSaverBase { -public: - virtual ~IBlobSaverBase() { - } - virtual void Save(IOutputStream& output, ui32 flags = 0) = 0; - virtual size_t GetLength() = 0; -}; - -inline void MultiBlobSave(IOutputStream& output, IBlobSaverBase& saver) { - saver.Save(output); -} - -class TBlobSaverMemory: public IBlobSaverBase { -public: - TBlobSaverMemory(const void* ptr, size_t size); - TBlobSaverMemory(const TBlob& blob); - void Save(IOutputStream& output, ui32 flags = 0) override; - size_t GetLength() override; - -private: - TBlob Blob; -}; - -class TBlobSaverFile: public IBlobSaverBase { -public: - TBlobSaverFile(TFile file); - TBlobSaverFile(const char* filename, EOpenMode oMode = RdOnly); - void Save(IOutputStream& output, ui32 flags = 0) override; - size_t GetLength() override; - -protected: - TFile File; -}; - -class TMultiBlobBuilder: public IBlobSaverBase { -protected: - // Data will be stored with default alignment DEVTOOLS-4548 - static const size_t ALIGN = 16; - -public: - typedef TVector<IBlobSaverBase*> TSavers; - - TMultiBlobBuilder(bool isOwn = true); - ~TMultiBlobBuilder() override; - void Save(IOutputStream& output, ui32 flags = 0) override; - size_t GetLength() override; - TSavers& GetBlobs(); - const TSavers& GetBlobs() const; - void AddBlob(IBlobSaverBase* blob); - void DeleteSubBlobs(); - -protected: - TSavers Blobs; - bool IsOwner; -}; diff --git a/library/cpp/on_disk/multi_blob/ya.make b/library/cpp/on_disk/multi_blob/ya.make deleted file mode 100644 index 50615fc9012..00000000000 --- a/library/cpp/on_disk/multi_blob/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -LIBRARY() - -SRCS( - multiblob.cpp - multiblob_builder.cpp -) - -PEERDIR( - library/cpp/on_disk/chunks - util/draft -) - -END() diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp deleted file mode 100644 index ef5af4d432b..00000000000 --- a/library/cpp/on_disk/st_hash/fake.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "save_stl.h" -#include "static_hash.h" -#include "static_hash_map.h" -#include "sthash_iterators.h" diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h deleted file mode 100644 index 00f8f0e20db..00000000000 --- a/library/cpp/on_disk/st_hash/save_stl.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include <util/generic/hash.h> -#include <util/system/yassert.h> -#include <util/stream/output.h> - -// this structure might be replaced with sthashtable class -template <class HF, class Eq, class size_type> -struct sthashtable_nvm_sv { - sthashtable_nvm_sv() { - if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) { - memset(this, 0, sizeof(sthashtable_nvm_sv)); - } - } - - sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd) - : sthashtable_nvm_sv() - { - hf = phf; - eq = peq; - num_buckets = pnb; - num_elements = pne; - data_end_off = pnd; - } - - HF hf; - Eq eq; - size_type num_buckets; - size_type num_elements; - size_type data_end_off; -}; - -/** - * Some hack to save both THashMap and sthash. - * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method. - */ -template <class V, class K, class HF, class Ex, class Eq, class A> -template <class KeySaver> -inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const { - Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count()); - typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type; - sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0}; - // to do: m.b. use just the size of corresponding object? - typename KeySaver::TSizeType cur_off = sizeof(sv_type) + - (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType); - sv.data_end_off = cur_off; - const_iterator n; - for (n = begin(); n != end(); ++n) { - sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n)); - } - typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr; - if (stHash) - sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]); - //saver.Align(sizeof(char*)); - stream->Write(&sv, sizeof(sv)); - - size_type i; - //save vector - for (i = 0; i < buckets.size(); ++i) { - node* cur = buckets[i]; - stream->Write(&cur_off, sizeof(cur_off)); - if (cur) { - while (!((uintptr_t)cur & 1)) { - cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val)); - cur = cur->next; - } - } - if (stHash) - cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]); - } - stream->Write(&cur_off, sizeof(cur_off)); // end mark - for (i = 0; i < buckets.size(); ++i) { - node* cur = buckets[i]; - if (cur) { - while (!((uintptr_t)cur & 1)) { - ks.SaveRecord(stream, cur->val); - cur = cur->next; - } - } - if (stHash) - stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]); - } - return 0; -} diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h deleted file mode 100644 index ca7a6ccd369..00000000000 --- a/library/cpp/on_disk/st_hash/static_hash.h +++ /dev/null @@ -1,420 +0,0 @@ -#pragma once - -#include "save_stl.h" -#include "sthash_iterators.h" - -#include <util/generic/hash.h> -#include <util/generic/vector.h> -#include <util/generic/buffer.h> -#include <util/generic/cast.h> -#include <util/generic/yexception.h> // for save/load only -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <utility> - -#include <memory> -#include <algorithm> -#include <functional> - -#include <cstdlib> -#include <cstddef> - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible' -#endif - -template <class HashType, class KeySaver> -inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) { - KeySaver ks; - if (hash.save_for_st(stream, ks)) - ythrow yexception() << "Could not save hash to stream"; -} - -template <class HashType> -inline void SaveHashToStream(HashType& hash, IOutputStream* stream) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - return SaveHashToStreamEx<HashType, KeySaver>(hash, stream); -} - -template <class HashType, class KeySaver> -inline void SaveHashToFileEx(HashType& hash, const char* fileName) { - TFileOutput output(fileName); - SaveHashToStreamEx<HashType, KeySaver>(hash, &output); -} - -template <class HashType> -inline void SaveHashToFile(HashType& hash, const char* fileName) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashSetToFile(HashType& hash, const char* fileName) { - typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashToFile32(HashType& hash, const char* fileName) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; - return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType, class KeySaver> -inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) { - TBufferOutput stream(buffer); - KeySaver ks; - if (hash.save_for_st(&stream, ks, stHash)) - ythrow yexception() << "Could not save hash to memory"; -} - -template <class HashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -/** - * Some hack to save both THashMap and sthash. - * THashMap and sthash must have same bucket_count(). - */ -template <class HashType, class StHashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; - typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH; - - SH sh = reinterpret_cast<SH>(stHash); - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh); -} - -template <class HashType> -inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) { - typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; - SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -template <class Iter, typename size_type_f = ui64> -class sthashtable { -public: - typedef typename Iter::TKeyType key_type; - typedef typename Iter::TValueType value_type; - typedef typename Iter::THasherType hasher; - typedef typename Iter::TKeyEqualType key_equal; - - typedef size_type_f size_type; - typedef ptrdiff_t difference_type; - typedef const value_type* const_pointer; - typedef const value_type& const_reference; - - typedef Iter const_iterator; - - const hasher hash_funct() const { - return hash; - } - const key_equal key_eq() const { - return equals; - } - -private: - const hasher hash; - const key_equal equals; - -private: - const_iterator iter_at_bucket(size_type bucket) const { - return (const_iterator)(((char*)this + buckets()[bucket])); - } - - const_iterator iter_at_bucket_or_end(size_type bucket) const { - if (bucket < num_buckets) - return (const_iterator)(((char*)this + buckets()[bucket])); - else - return end(); - } - - const size_type num_buckets; - const size_type num_elements; - const size_type data_end_off; - -protected: //shut up gcc warning - // we can't construct/destroy this object at all! - sthashtable(); - sthashtable(const sthashtable& ht); - ~sthashtable(); - -public: - // const size_type *buckets; - const size_type* buckets() const { - return (size_type*)((char*)this + sizeof(*this)); - } - const size_type buckets(size_type n) const { - return buckets()[n]; - } - - size_type size() const { - return num_elements; - } - size_type max_size() const { - return size_type(-1); - } - bool empty() const { - return size() == 0; - } - - const_iterator begin() const { - return num_buckets ? iter_at_bucket(0) : end(); - } - - const_iterator end() const { - return (const_iterator)(((char*)this + data_end_off)); - } - -public: - size_type size_in_bytes() const { - return data_end_off; - } - - size_type bucket_count() const { - return num_buckets; - } - - size_type elems_in_bucket(size_type bucket) const { - size_type result = 0; - const_iterator first = iter_at_bucket(bucket); - const_iterator last = iter_at_bucket_or_end(bucket + 1); - - for (; first != last; ++first) - ++result; - return result; - } - - template <class TheKey> - const_iterator find(const TheKey& key) const { - size_type n = bkt_num_key(key); - const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1)); - for (; - first != last && !first.KeyEquals(equals, key); - ++first) { - } - if (first != last) - return first; - return end(); - } - - size_type count(const key_type& key) const { - const size_type n = bkt_num_key(key); - size_type result = 0; - const_iterator first = iter_at_bucket(n); - const_iterator last = iter_at_bucket_or_end(n + 1); - - for (; first != last; ++first) - if (first.KeyEquals(equals, key)) - ++result; - return result; - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const; - -private: - template <class TheKey> - size_type bkt_num_key(const TheKey& key) const { - return hash(key) % num_buckets; - } -}; - -template <class I, class size_type_f> -std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const { - typedef std::pair<const_iterator, const_iterator> pii; - const size_type n = bkt_num_key(key); - const_iterator first = iter_at_bucket(n); - const_iterator last = iter_at_bucket_or_end(n + 1); - - for (; first != last; ++first) { - if (first.KeyEquals(equals, key)) { - const_iterator cur = first; - ++cur; - for (; cur != last; ++cur) - if (!cur.KeyEquals(equals, key)) - return pii(const_iterator(first), - const_iterator(cur)); - return pii(const_iterator(first), - const_iterator(last)); - } - } - return pii(end(), end()); -} - -/* end __SGI_STL_HASHTABLE_H */ - -template <class Key, class T, class HashFcn /*= hash<Key>*/, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash { -private: - typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht; - ht rep; - -public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef T mapped_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator const_iterator; - - const hasher hash_funct() const { - return rep.hash_funct(); - } - const key_equal key_eq() const { - return rep.key_eq(); - } - -public: - size_type size() const { - return rep.size(); - } - size_type max_size() const { - return rep.max_size(); - } - bool empty() const { - return rep.empty(); - } - - const_iterator begin() const { - return rep.begin(); - } - const_iterator end() const { - return rep.end(); - } - -public: - template <class TheKey> - const_iterator find(const TheKey& key) const { - return rep.find(key); - } - template <class TheKey> - bool has(const TheKey& key) const { - return rep.find(key) != rep.end(); - } - - size_type count(const key_type& key) const { - return rep.count(key); - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - size_type size_in_bytes() const { - return rep.size_in_bytes(); - } - - size_type bucket_count() const { - return rep.bucket_count(); - } - size_type max_bucket_count() const { - return rep.max_bucket_count(); - } - size_type elems_in_bucket(size_type n) const { - return rep.elems_in_bucket(n); - } - - const size_type* buckets() const { - return rep.buckets(); - } - const size_type buckets(size_type n) const { - return rep.buckets()[n]; - } -}; - -template <class Key, class HashFcn, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> { - typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base; - -public: - using Base::const_iterator; - using Base::hasher; - using Base::key_equal; - using Base::key_type; - using Base::size_type; - using Base::value_type; -}; - -template <class Key, class T, class HashFcn /*= hash<Key>*/, - class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_mm { -private: - typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht; - ht rep; - -public: - typedef typename ht::key_type key_type; - typedef typename ht::value_type value_type; - typedef typename ht::hasher hasher; - typedef typename ht::key_equal key_equal; - typedef T mapped_type; - - typedef typename ht::size_type size_type; - typedef typename ht::difference_type difference_type; - typedef typename ht::const_pointer const_pointer; - typedef typename ht::const_reference const_reference; - - typedef typename ht::const_iterator const_iterator; - - const hasher hash_funct() const { - return rep.hash_funct(); - } - const key_equal key_eq() const { - return rep.key_eq(); - } - -public: - size_type size() const { - return rep.size(); - } - size_type max_size() const { - return rep.max_size(); - } - bool empty() const { - return rep.empty(); - } - - const_iterator begin() const { - return rep.begin(); - } - const_iterator end() const { - return rep.end(); - } - - const_iterator find(const key_type& key) const { - return rep.find(key); - } - - size_type count(const key_type& key) const { - return rep.count(key); - } - - std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { - return rep.equal_range(key); - } - - size_type bucket_count() const { - return rep.bucket_count(); - } - size_type max_bucket_count() const { - return rep.max_bucket_count(); - } - size_type elems_in_bucket(size_type n) const { - return rep.elems_in_bucket(n); - } -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h deleted file mode 100644 index 5dc50abd392..00000000000 --- a/library/cpp/on_disk/st_hash/static_hash_map.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#include "static_hash.h" - -#include <library/cpp/deprecated/mapped_file/mapped_file.h> - -#include <util/system/filemap.h> - -template <class SH> -struct sthash_mapped_c { - typedef SH H; - typedef typename H::const_iterator const_iterator; - TMappedFile M; - H* hsh; - sthash_mapped_c() - : M() - , hsh(nullptr) - { - } - sthash_mapped_c(const char* fname, bool precharge) - : M() - , hsh(nullptr) - { - Open(fname, precharge); - } - void Open(const char* fname, bool precharge) { - M.init(fname); - if (precharge) - M.precharge(); - hsh = (H*)M.getData(); - if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh) - ythrow yexception() << "Could not map hash: " << fname << " is damaged"; - } - H* operator->() { - return hsh; - } - const H* operator->() const { - return hsh; - } - H* GetSthash() { - return hsh; - } - const H* GetSthash() const { - return hsh; - } -}; - -template <class Key, class T, class Hash> -struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> { - typedef sthash<Key, T, Hash> H; - sthash_mapped(const char* fname, bool precharge) - : sthash_mapped_c<H>(fname, precharge) - { - } - sthash_mapped() - : sthash_mapped_c<H>() - { - } -}; diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h deleted file mode 100644 index 6a9ebdd6c3f..00000000000 --- a/library/cpp/on_disk/st_hash/sthash_iterators.h +++ /dev/null @@ -1,334 +0,0 @@ -#pragma once - -#include "save_stl.h" - -#include <util/system/align.h> - -/** - This file provides functionality for saving some relatively simple THashMap object - to disk in a form that can be mapped read-only (via mmap) at any address. - That saved object is accessed via pointer to sthash object (that must have - the same parameters as original THashMap object) - - If either key or value are variable-sized (i.e. contain pointers), user must - write his own instantiation of TSthashIterator (read iterator for sthash) and - TSthashWriter (write iterator for THashMap). - An example for <const char *, B> pair is in here. -**/ - -// TEmptyValue and SizeOfEx are helpers for sthash_set -struct TEmptyValue { - TEmptyValue() = default; -}; - -template <class T> -inline size_t SizeOfEx() { - return sizeof(T); -} - -template <> -inline size_t SizeOfEx<TEmptyValue>() { - return 0; -} -template <> -inline size_t SizeOfEx<const TEmptyValue>() { - return 0; -} - -template <class TKey, class TValue, class HashFcn, class EqualKey> -struct TSthashIterator { - // Implementation for simple types - typedef const TKey TKeyType; - typedef const TValue TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - explicit TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - TKey& Key() const { - return *(TKey*)Data; - } - TValue& Value() { - return *(TValue*)(Data + sizeof(TKey)); - } - const TValue& Value() const { - return *(const TValue*)(Data + sizeof(TKey)); - } - - template <class AnotherKeyType> - bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const { - return eq(*(TKey*)Data, key); - } - - size_t GetLength() const { - return sizeof(TKey) + SizeOfEx<TValue>(); - } -}; - -template <class Key, class Value, typename size_type_o = ui64> -struct TSthashWriter { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const Key, const Value>&) const { - return sizeof(Key) + SizeOfEx<Value>(); - } - int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const { - stream->Write(&record.first, sizeof(Key)); - stream->Write(&record.second, SizeOfEx<Value>()); - return 0; - } -}; - -// Remember that this simplified implementation makes a copy of `key' in std::make_pair. -// It can also waste some memory on undesired alignment. -template <class Key, typename size_type_o = ui64> -struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> { - typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter; - size_t GetRecordSize(const Key& key) const { - return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue())); - } - int SaveRecord(IOutputStream* stream, const Key& key) const { - return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue())); - } -}; - -// we can't save something with pointers without additional tricks - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A*, B, HashFcn, EqualKey> {}; - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A, B*, HashFcn, EqualKey> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B*, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A, B*, size_type_o> {}; - -template <class T> -inline size_t AlignForChrKey() { - return 4; // TODO: change this (requeres rebuilt of a few existing files) -} - -template <> -inline size_t AlignForChrKey<TEmptyValue>() { - return 1; -} - -template <> -inline size_t AlignForChrKey<const TEmptyValue>() { - return AlignForChrKey<TEmptyValue>(); -} - -// !! note that for char*, physical placement of key and value is swapped -template <class TValue, class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> { - typedef const TValue TValueType; - typedef const char* TKeyType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - const char* Key() const { - return Data + SizeOfEx<TValue>(); - } - TValue& Value() { - return *(TValue*)Data; - } - const TValue& Value() const { - return *(const TValue*)Data; - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(Data + SizeOfEx<TValue>(), k); - } - - size_t GetLength() const { - size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>(); - length = AlignUp(length, AlignForChrKey<TValue>()); - return length; - } -}; - -template <class Value, typename size_type_o> -struct TSthashWriter<const char*, Value, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const char*, const Value>& record) const { - size_t length = strlen(record.first) + 1 + SizeOfEx<Value>(); - length = AlignUp(length, AlignForChrKey<Value>()); - return length; - } - int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const { - const char* alignBuffer = "qqqq"; - stream->Write(&record.second, SizeOfEx<Value>()); - size_t length = strlen(record.first) + 1; - stream->Write(record.first, length); - length = AlignUpSpace(length, AlignForChrKey<Value>()); - if (length) - stream->Write(alignBuffer, length); - return 0; - } -}; - -template <class TKey, class HashFcn, class EqualKey> -struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> { - typedef const TKey TKeyType; - typedef const char* TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - TKey& Key() { - return *(TKey*)Data; - } - const char* Value() const { - return Data + sizeof(TKey); - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(*(TKey*)Data, k); - } - - size_t GetLength() const { - size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey); - length = AlignUp(length, (size_t)4); - return length; - } -}; - -template <class Key, typename size_type_o> -struct TSthashWriter<Key, const char*, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const Key, const char*>& record) const { - size_t length = strlen(record.second) + 1 + sizeof(Key); - length = AlignUp(length, (size_t)4); - return length; - } - int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const { - const char* alignBuffer = "qqqq"; - stream->Write(&record.first, sizeof(Key)); - size_t length = strlen(record.second) + 1; - stream->Write(record.second, length); - length = AlignUpSpace(length, (size_t)4); - if (length) - stream->Write(alignBuffer, length); - return 0; - } -}; - -template <class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> { - typedef const char* TKeyType; - typedef const char* TValueType; - typedef EqualKey TKeyEqualType; - typedef HashFcn THasherType; - - const char* Data; - TSthashIterator() - : Data(nullptr) - { - } - TSthashIterator(const char* data) - : Data(data) - { - } - void operator++() { - Data += GetLength(); - } - - bool operator!=(const TSthashIterator& that) const { - return Data != that.Data; - } - bool operator==(const TSthashIterator& that) const { - return Data == that.Data; - } - const char* Key() const { - return Data; - } - const char* Value() const { - return Data + strlen(Data) + 1; - } - - template <class K> - bool KeyEquals(const EqualKey& eq, const K& k) const { - return eq(Data, k); - } - - size_t GetLength() const { - size_t length = strlen(Data) + 1; - length += strlen(Data + length) + 1; - return length; - } -}; - -template <typename size_type_o> -struct TSthashWriter<const char*, const char*, size_type_o> { - typedef size_type_o TSizeType; - size_t GetRecordSize(const std::pair<const char*, const char*>& record) const { - size_t size = strlen(record.first) + strlen(record.second) + 2; - return size; - } - int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const { - stream->Write(record.first, strlen(record.first) + 1); - stream->Write(record.second, strlen(record.second) + 1); - return 0; - } -}; diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make deleted file mode 100644 index 8c6d05711c3..00000000000 --- a/library/cpp/on_disk/st_hash/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - fake.cpp - save_stl.h - static_hash.h - static_hash_map.h - sthash_iterators.h -) - -PEERDIR( - library/cpp/deprecated/mapped_file -) - -END() |