diff options
| author | robot-piglet <[email protected]> | 2023-12-04 15:32:14 +0300 | 
|---|---|---|
| committer | robot-piglet <[email protected]> | 2023-12-05 01:22:50 +0300 | 
| commit | c21ed9eedf73010bc81342518177dfdfb0d56bd7 (patch) | |
| tree | 72f8fde4463080cfe5a38eb0babc051cfe32c51e /library/cpp/on_disk | |
| parent | ec1311bf2e8cc231723b8b5e484ca576663a1309 (diff) | |
Intermediate changes
Diffstat (limited to 'library/cpp/on_disk')
| -rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob.cpp | 67 | ||||
| -rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob.h | 77 | ||||
| -rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob_builder.cpp | 146 | ||||
| -rw-r--r-- | library/cpp/on_disk/multi_blob/multiblob_builder.h | 64 | ||||
| -rw-r--r-- | library/cpp/on_disk/multi_blob/ya.make | 13 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/fake.cpp | 4 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/save_stl.h | 84 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/static_hash.h | 420 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/static_hash_map.h | 59 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/sthash_iterators.h | 334 | ||||
| -rw-r--r-- | library/cpp/on_disk/st_hash/ya.make | 15 | 
11 files changed, 0 insertions, 1283 deletions
| diff --git a/library/cpp/on_disk/multi_blob/multiblob.cpp b/library/cpp/on_disk/multi_blob/multiblob.cpp deleted file mode 100644 index d92b31e6135..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include <util/generic/yexception.h> -#include <util/system/align.h> - -#include <library/cpp/on_disk/chunks/reader.h> - -#include "multiblob.h" - -void TSubBlobs::ReadMultiBlob(const TBlob& multi) { -    if (multi.Size() < sizeof(TMultiBlobHeader)) { -        ythrow yexception() << "not a blob, too small"; -    } - -    Multi = multi; -    memcpy((void*)&Header, Multi.Data(), sizeof(TMultiBlobHeader)); - -    if (Header.BlobMetaSig != BLOBMETASIG) { -        if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig) { -            if (ReadChunkedData(multi)) -                return; -        } -        ythrow yexception() << "is not a blob, MetaSig was read: " -                            << Header.BlobMetaSig -                            << ", must be" << BLOBMETASIG; -    } - -    if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig) -        ythrow yexception() << "unknown multiblob RecordSig " -                            << Header.BlobRecordSig; - -    reserve(size() + Header.Count); -    if (Header.Flags & EMF_INTERLAY) { -        size_t pos = Header.HeaderSize(); -        for (size_t i = 0; i < Header.Count; ++i) { -            pos = AlignUp<ui64>(pos, sizeof(ui64)); -            ui64 size = *((ui64*)((const char*)multi.Data() + pos)); -            pos = AlignUp<ui64>(pos + sizeof(ui64), Header.Align); -            push_back(multi.SubBlob(pos, pos + size)); -            pos += size; -        } -    } else { -        const ui64* sizes = Header.Sizes(multi.Data()); -        size_t pos = Header.HeaderSize() + Header.Count * sizeof(ui64); -        for (size_t i = 0; i < Header.Count; ++i) { -            pos = AlignUp<ui64>(pos, Header.Align); -            push_back(multi.SubBlob(pos, pos + *sizes)); -            pos += *sizes; -            sizes++; -        } -    } -} - -bool TSubBlobs::ReadChunkedData(const TBlob& multi) noexcept { -    Multi = multi; -    memset((void*)&Header, 0, sizeof(Header)); - -    TChunkedDataReader reader(Multi); -    Header.Count = reader.GetBlocksCount(); -    resize(GetHeader()->Count); -    for (size_t i = 0; i < size(); ++i) -        // We can use TBlob::NoCopy() because of reader.GetBlock(i) returns -        // address into memory of multi blob. -        // This knowledge was acquired from implementation of -        // TChunkedDataReader, so we need care about any changes that. -        (*this)[i] = TBlob::NoCopy(reader.GetBlock(i), reader.GetBlockLen(i)); -    Header.Flags |= EMF_CHUNKED_DATA_READER; -    return true; -} diff --git a/library/cpp/on_disk/multi_blob/multiblob.h b/library/cpp/on_disk/multi_blob/multiblob.h deleted file mode 100644 index b40a5ae6af9..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/memory/blob.h> - -#define BLOBMETASIG 0x3456789Au - -enum E_Multiblob_Flags { -    // if EMF_INTERLAY is clear -    //     multiblob format -    //       HeaderSize()       bytes for TMultiBlobHeader -    //       Count*sizeof(ui64) bytes for blob sizes -    //       blob1 -    //       (alignment) -    //       blob2 -    //       (alignment) -    //       ... -    //       (alignment) -    //       blobn -    // if EMF_INTERLAY is set -    //     multiblob format -    //       HeaderSize()       bytes for TMultiBlobHeader -    //       size1              ui64, the size of 1st blob -    //       blob1 -    //       (alignment) -    //       size2              ui64, the size of 2nd blob -    //       blob2 -    //       (alignment) -    //       ... -    //       (alignment) -    //       sizen              ui64, the size of n'th blob -    //       blobn -    EMF_INTERLAY = 1, - -    // Means that multiblob contains blocks in TChunkedDataReader format -    // Legacy, use it only for old files, created for TChunkedDataReader -    EMF_CHUNKED_DATA_READER = 2, - -    // Flags that may be configured for blobbuilder in client code -    EMF_WRITEABLE = EMF_INTERLAY, -}; - -struct TMultiBlobHeader { -    // data -    ui32 BlobMetaSig; -    ui32 BlobRecordSig; -    ui64 Count; // count of sub blobs -    ui32 Align; // alignment for every subblob -    ui32 Flags; -    static const ui32 RecordSig = 0x23456789; -    static inline size_t HeaderSize() { -        return 4 * sizeof(ui64); -    } -    inline const ui64* Sizes(const void* Data) const { -        return (const ui64*)((const char*)Data + HeaderSize()); -    } -}; - -class TSubBlobs: public TVector<TBlob> { -public: -    TSubBlobs() { -    } -    TSubBlobs(const TBlob& multi) { -        ReadMultiBlob(multi); -    } -    void ReadMultiBlob(const TBlob& multi); -    const TMultiBlobHeader* GetHeader() const { -        return (const TMultiBlobHeader*)&Header; -    } - -protected: -    TMultiBlobHeader Header; -    TBlob Multi; - -private: -    bool ReadChunkedData(const TBlob& multi) noexcept; -}; diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.cpp b/library/cpp/on_disk/multi_blob/multiblob_builder.cpp deleted file mode 100644 index 44aa4a6c2fd..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob_builder.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include <util/memory/tempbuf.h> -#include <util/system/align.h> - -#include "multiblob_builder.h" - -/* - * TBlobSaverMemory - */ -TBlobSaverMemory::TBlobSaverMemory(const void* ptr, size_t size) -    : Blob(TBlob::NoCopy(ptr, size)) -{ -} - -TBlobSaverMemory::TBlobSaverMemory(const TBlob& blob) -    : Blob(blob) -{ -} - -void TBlobSaverMemory::Save(IOutputStream& output, ui32 /*flags*/) { -    output.Write((void*)Blob.Data(), Blob.Length()); -} - -size_t TBlobSaverMemory::GetLength() { -    return Blob.Length(); -} - -/* - * TBlobSaverFile - */ - -TBlobSaverFile::TBlobSaverFile(TFile file) -    : File(file) -{ -    Y_ASSERT(File.IsOpen()); -} - -TBlobSaverFile::TBlobSaverFile(const char* filename, EOpenMode oMode) -    : File(filename, oMode) -{ -    Y_ASSERT(File.IsOpen()); -} - -void TBlobSaverFile::Save(IOutputStream& output, ui32 /*flags*/) { -    TTempBuf buffer(1 << 20); -    while (size_t size = File.Read((void*)buffer.Data(), buffer.Size())) -        output.Write((void*)buffer.Data(), size); -} - -size_t TBlobSaverFile::GetLength() { -    return File.GetLength(); -} - -/* - * TMultiBlobBuilder - */ - -TMultiBlobBuilder::TMultiBlobBuilder(bool isOwn) -    : IsOwner(isOwn) -{ -} - -TMultiBlobBuilder::~TMultiBlobBuilder() { -    if (IsOwner) -        DeleteSubBlobs(); -} - -namespace { -    ui64 PadToAlign(IOutputStream& output, ui64 fromPos, ui32 align) { -        ui64 toPos = AlignUp<ui64>(fromPos, align); -        for (; fromPos < toPos; ++fromPos) { -            output << (char)0; -        } -        return toPos; -    } -} - -void TMultiBlobBuilder::Save(IOutputStream& output, ui32 flags) { -    TMultiBlobHeader header; -    memset((void*)&header, 0, sizeof(header)); -    header.BlobMetaSig = BLOBMETASIG; -    header.BlobRecordSig = TMultiBlobHeader::RecordSig; -    header.Count = Blobs.size(); -    header.Align = ALIGN; -    header.Flags = flags & EMF_WRITEABLE; -    output.Write((void*)&header, sizeof(header)); -    for (size_t i = sizeof(header); i < header.HeaderSize(); ++i) -        output << (char)0; -    ui64 pos = header.HeaderSize(); -    if (header.Flags & EMF_INTERLAY) { -        for (size_t i = 0; i < Blobs.size(); ++i) { -            ui64 size = Blobs[i]->GetLength(); -            pos = PadToAlign(output, pos, sizeof(ui64));                // Align size record -            output.Write((void*)&size, sizeof(ui64)); -            pos = PadToAlign(output, pos + sizeof(ui64), header.Align); // Align blob -            Blobs[i]->Save(output, header.Flags); -            pos += size; -        } -    } else { -        for (size_t i = 0; i < Blobs.size(); ++i) { -            ui64 size = Blobs[i]->GetLength(); -            output.Write((void*)&size, sizeof(ui64)); -        } -        pos += Blobs.size() * sizeof(ui64); -        for (size_t i = 0; i < Blobs.size(); ++i) { -            pos = PadToAlign(output, pos, header.Align); -            Blobs[i]->Save(output, header.Flags); -            pos += Blobs[i]->GetLength(); -        } -    } -    // Compensate for imprecise size -    for (ui64 len = GetLength(); pos < len; ++pos) { -        output << (char)0; -    } -} - -size_t TMultiBlobBuilder::GetLength() { -    // Sizes may be diferent with and without EMF_INTERLAY, so choose greater of 2 -    size_t resNonInter = TMultiBlobHeader::HeaderSize() + Blobs.size() * sizeof(ui64); -    size_t resInterlay = TMultiBlobHeader::HeaderSize(); -    for (size_t i = 0; i < Blobs.size(); ++i) { -        resInterlay = AlignUp<ui64>(resInterlay, sizeof(ui64)) + sizeof(ui64); -        resInterlay = AlignUp<ui64>(resInterlay, ALIGN) + Blobs[i]->GetLength(); -        resNonInter = AlignUp<ui64>(resNonInter, ALIGN) + Blobs[i]->GetLength(); -    } -    resInterlay = AlignUp<ui64>(resInterlay, ALIGN); -    resNonInter = AlignUp<ui64>(resNonInter, ALIGN); -    return Max(resNonInter, resInterlay); -} - -TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() { -    return Blobs; -} - -const TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() const { -    return Blobs; -} - -void TMultiBlobBuilder::AddBlob(IBlobSaverBase* blob) { -    Blobs.push_back(blob); -} - -void TMultiBlobBuilder::DeleteSubBlobs() { -    for (size_t i = 0; i < Blobs.size(); ++i) -        delete Blobs[i]; -    Blobs.clear(); -} diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.h b/library/cpp/on_disk/multi_blob/multiblob_builder.h deleted file mode 100644 index a8e3c6d35ee..00000000000 --- a/library/cpp/on_disk/multi_blob/multiblob_builder.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include <util/system/align.h> -#include <util/stream/output.h> -#include <util/stream/file.h> -#include <util/draft/holder_vector.h> - -#include "multiblob.h" - -class IBlobSaverBase { -public: -    virtual ~IBlobSaverBase() { -    } -    virtual void Save(IOutputStream& output, ui32 flags = 0) = 0; -    virtual size_t GetLength() = 0; -}; - -inline void MultiBlobSave(IOutputStream& output, IBlobSaverBase& saver) { -    saver.Save(output); -} - -class TBlobSaverMemory: public IBlobSaverBase { -public: -    TBlobSaverMemory(const void* ptr, size_t size); -    TBlobSaverMemory(const TBlob& blob); -    void Save(IOutputStream& output, ui32 flags = 0) override; -    size_t GetLength() override; - -private: -    TBlob Blob; -}; - -class TBlobSaverFile: public IBlobSaverBase { -public: -    TBlobSaverFile(TFile file); -    TBlobSaverFile(const char* filename, EOpenMode oMode = RdOnly); -    void Save(IOutputStream& output, ui32 flags = 0) override; -    size_t GetLength() override; - -protected: -    TFile File; -}; - -class TMultiBlobBuilder: public IBlobSaverBase { -protected: -    // Data will be stored with default alignment DEVTOOLS-4548 -    static const size_t ALIGN = 16; - -public: -    typedef TVector<IBlobSaverBase*> TSavers; - -    TMultiBlobBuilder(bool isOwn = true); -    ~TMultiBlobBuilder() override; -    void Save(IOutputStream& output, ui32 flags = 0) override; -    size_t GetLength() override; -    TSavers& GetBlobs(); -    const TSavers& GetBlobs() const; -    void AddBlob(IBlobSaverBase* blob); -    void DeleteSubBlobs(); - -protected: -    TSavers Blobs; -    bool IsOwner; -}; diff --git a/library/cpp/on_disk/multi_blob/ya.make b/library/cpp/on_disk/multi_blob/ya.make deleted file mode 100644 index 50615fc9012..00000000000 --- a/library/cpp/on_disk/multi_blob/ya.make +++ /dev/null @@ -1,13 +0,0 @@ -LIBRARY() - -SRCS( -    multiblob.cpp -    multiblob_builder.cpp -) - -PEERDIR( -    library/cpp/on_disk/chunks -    util/draft -) - -END() diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp deleted file mode 100644 index ef5af4d432b..00000000000 --- a/library/cpp/on_disk/st_hash/fake.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "save_stl.h" -#include "static_hash.h" -#include "static_hash_map.h" -#include "sthash_iterators.h" diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h deleted file mode 100644 index 00f8f0e20db..00000000000 --- a/library/cpp/on_disk/st_hash/save_stl.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include <util/generic/hash.h> -#include <util/system/yassert.h> -#include <util/stream/output.h> - -// this structure might be replaced with sthashtable class -template <class HF, class Eq, class size_type> -struct sthashtable_nvm_sv { -    sthashtable_nvm_sv() { -        if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) { -            memset(this, 0, sizeof(sthashtable_nvm_sv)); -        } -    } - -    sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd) -        : sthashtable_nvm_sv() -    { -        hf = phf; -        eq = peq; -        num_buckets = pnb; -        num_elements = pne; -        data_end_off = pnd; -    } - -    HF hf; -    Eq eq; -    size_type num_buckets; -    size_type num_elements; -    size_type data_end_off; -}; - -/** - * Some hack to save both THashMap and sthash. - * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method. - */ -template <class V, class K, class HF, class Ex, class Eq, class A> -template <class KeySaver> -inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const { -    Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count()); -    typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type; -    sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0}; -    // to do: m.b. use just the size of corresponding object? -    typename KeySaver::TSizeType cur_off = sizeof(sv_type) + -                                           (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType); -    sv.data_end_off = cur_off; -    const_iterator n; -    for (n = begin(); n != end(); ++n) { -        sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n)); -    } -    typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr; -    if (stHash) -        sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]); -    //saver.Align(sizeof(char*)); -    stream->Write(&sv, sizeof(sv)); - -    size_type i; -    //save vector -    for (i = 0; i < buckets.size(); ++i) { -        node* cur = buckets[i]; -        stream->Write(&cur_off, sizeof(cur_off)); -        if (cur) { -            while (!((uintptr_t)cur & 1)) { -                cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val)); -                cur = cur->next; -            } -        } -        if (stHash) -            cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]); -    } -    stream->Write(&cur_off, sizeof(cur_off)); // end mark -    for (i = 0; i < buckets.size(); ++i) { -        node* cur = buckets[i]; -        if (cur) { -            while (!((uintptr_t)cur & 1)) { -                ks.SaveRecord(stream, cur->val); -                cur = cur->next; -            } -        } -        if (stHash) -            stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]); -    } -    return 0; -} diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h deleted file mode 100644 index ca7a6ccd369..00000000000 --- a/library/cpp/on_disk/st_hash/static_hash.h +++ /dev/null @@ -1,420 +0,0 @@ -#pragma once - -#include "save_stl.h" -#include "sthash_iterators.h" - -#include <util/generic/hash.h> -#include <util/generic/vector.h> -#include <util/generic/buffer.h> -#include <util/generic/cast.h> -#include <util/generic/yexception.h> // for save/load only -#include <util/stream/file.h> -#include <util/stream/buffer.h> -#include <utility> - -#include <memory> -#include <algorithm> -#include <functional> - -#include <cstdlib> -#include <cstddef> - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible' -#endif - -template <class HashType, class KeySaver> -inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) { -    KeySaver ks; -    if (hash.save_for_st(stream, ks)) -        ythrow yexception() << "Could not save hash to stream"; -} - -template <class HashType> -inline void SaveHashToStream(HashType& hash, IOutputStream* stream) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; -    return SaveHashToStreamEx<HashType, KeySaver>(hash, stream); -} - -template <class HashType, class KeySaver> -inline void SaveHashToFileEx(HashType& hash, const char* fileName) { -    TFileOutput output(fileName); -    SaveHashToStreamEx<HashType, KeySaver>(hash, &output); -} - -template <class HashType> -inline void SaveHashToFile(HashType& hash, const char* fileName) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; -    return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashSetToFile(HashType& hash, const char* fileName) { -    typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver; -    return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType> -inline void SaveHashToFile32(HashType& hash, const char* fileName) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; -    return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); -} - -template <class HashType, class KeySaver> -inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) { -    TBufferOutput stream(buffer); -    KeySaver ks; -    if (hash.save_for_st(&stream, ks, stHash)) -        ythrow yexception() << "Could not save hash to memory"; -} - -template <class HashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; -    SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -/** - * Some hack to save both THashMap and sthash. - * THashMap and sthash must have same bucket_count(). - */ -template <class HashType, class StHashType> -inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; -    typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH; - -    SH sh = reinterpret_cast<SH>(stHash); -    SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh); -} - -template <class HashType> -inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) { -    typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; -    SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); -} - -template <class Iter, typename size_type_f = ui64> -class sthashtable { -public: -    typedef typename Iter::TKeyType key_type; -    typedef typename Iter::TValueType value_type; -    typedef typename Iter::THasherType hasher; -    typedef typename Iter::TKeyEqualType key_equal; - -    typedef size_type_f size_type; -    typedef ptrdiff_t difference_type; -    typedef const value_type* const_pointer; -    typedef const value_type& const_reference; - -    typedef Iter const_iterator; - -    const hasher hash_funct() const { -        return hash; -    } -    const key_equal key_eq() const { -        return equals; -    } - -private: -    const hasher hash; -    const key_equal equals; - -private: -    const_iterator iter_at_bucket(size_type bucket) const { -        return (const_iterator)(((char*)this + buckets()[bucket])); -    } - -    const_iterator iter_at_bucket_or_end(size_type bucket) const { -        if (bucket < num_buckets) -            return (const_iterator)(((char*)this + buckets()[bucket])); -        else -            return end(); -    } - -    const size_type num_buckets; -    const size_type num_elements; -    const size_type data_end_off; - -protected: //shut up gcc warning -    // we can't construct/destroy this object at all! -    sthashtable(); -    sthashtable(const sthashtable& ht); -    ~sthashtable(); - -public: -    //  const size_type *buckets; -    const size_type* buckets() const { -        return (size_type*)((char*)this + sizeof(*this)); -    } -    const size_type buckets(size_type n) const { -        return buckets()[n]; -    } - -    size_type size() const { -        return num_elements; -    } -    size_type max_size() const { -        return size_type(-1); -    } -    bool empty() const { -        return size() == 0; -    } - -    const_iterator begin() const { -        return num_buckets ? iter_at_bucket(0) : end(); -    } - -    const_iterator end() const { -        return (const_iterator)(((char*)this + data_end_off)); -    } - -public: -    size_type size_in_bytes() const { -        return data_end_off; -    } - -    size_type bucket_count() const { -        return num_buckets; -    } - -    size_type elems_in_bucket(size_type bucket) const { -        size_type result = 0; -        const_iterator first = iter_at_bucket(bucket); -        const_iterator last = iter_at_bucket_or_end(bucket + 1); - -        for (; first != last; ++first) -            ++result; -        return result; -    } - -    template <class TheKey> -    const_iterator find(const TheKey& key) const { -        size_type n = bkt_num_key(key); -        const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1)); -        for (; -             first != last && !first.KeyEquals(equals, key); -             ++first) { -        } -        if (first != last) -            return first; -        return end(); -    } - -    size_type count(const key_type& key) const { -        const size_type n = bkt_num_key(key); -        size_type result = 0; -        const_iterator first = iter_at_bucket(n); -        const_iterator last = iter_at_bucket_or_end(n + 1); - -        for (; first != last; ++first) -            if (first.KeyEquals(equals, key)) -                ++result; -        return result; -    } - -    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const; - -private: -    template <class TheKey> -    size_type bkt_num_key(const TheKey& key) const { -        return hash(key) % num_buckets; -    } -}; - -template <class I, class size_type_f> -std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const { -    typedef std::pair<const_iterator, const_iterator> pii; -    const size_type n = bkt_num_key(key); -    const_iterator first = iter_at_bucket(n); -    const_iterator last = iter_at_bucket_or_end(n + 1); - -    for (; first != last; ++first) { -        if (first.KeyEquals(equals, key)) { -            const_iterator cur = first; -            ++cur; -            for (; cur != last; ++cur) -                if (!cur.KeyEquals(equals, key)) -                    return pii(const_iterator(first), -                               const_iterator(cur)); -            return pii(const_iterator(first), -                       const_iterator(last)); -        } -    } -    return pii(end(), end()); -} - -/* end __SGI_STL_HASHTABLE_H */ - -template <class Key, class T, class HashFcn /*= hash<Key>*/, -          class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash { -private: -    typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht; -    ht rep; - -public: -    typedef typename ht::key_type key_type; -    typedef typename ht::value_type value_type; -    typedef typename ht::hasher hasher; -    typedef typename ht::key_equal key_equal; -    typedef T mapped_type; - -    typedef typename ht::size_type size_type; -    typedef typename ht::difference_type difference_type; -    typedef typename ht::const_pointer const_pointer; -    typedef typename ht::const_reference const_reference; - -    typedef typename ht::const_iterator const_iterator; - -    const hasher hash_funct() const { -        return rep.hash_funct(); -    } -    const key_equal key_eq() const { -        return rep.key_eq(); -    } - -public: -    size_type size() const { -        return rep.size(); -    } -    size_type max_size() const { -        return rep.max_size(); -    } -    bool empty() const { -        return rep.empty(); -    } - -    const_iterator begin() const { -        return rep.begin(); -    } -    const_iterator end() const { -        return rep.end(); -    } - -public: -    template <class TheKey> -    const_iterator find(const TheKey& key) const { -        return rep.find(key); -    } -    template <class TheKey> -    bool has(const TheKey& key) const { -        return rep.find(key) != rep.end(); -    } - -    size_type count(const key_type& key) const { -        return rep.count(key); -    } - -    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { -        return rep.equal_range(key); -    } - -    size_type size_in_bytes() const { -        return rep.size_in_bytes(); -    } - -    size_type bucket_count() const { -        return rep.bucket_count(); -    } -    size_type max_bucket_count() const { -        return rep.max_bucket_count(); -    } -    size_type elems_in_bucket(size_type n) const { -        return rep.elems_in_bucket(n); -    } - -    const size_type* buckets() const { -        return rep.buckets(); -    } -    const size_type buckets(size_type n) const { -        return rep.buckets()[n]; -    } -}; - -template <class Key, class HashFcn, -          class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> { -    typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base; - -public: -    using Base::const_iterator; -    using Base::hasher; -    using Base::key_equal; -    using Base::key_type; -    using Base::size_type; -    using Base::value_type; -}; - -template <class Key, class T, class HashFcn /*= hash<Key>*/, -          class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> -class sthash_mm { -private: -    typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht; -    ht rep; - -public: -    typedef typename ht::key_type key_type; -    typedef typename ht::value_type value_type; -    typedef typename ht::hasher hasher; -    typedef typename ht::key_equal key_equal; -    typedef T mapped_type; - -    typedef typename ht::size_type size_type; -    typedef typename ht::difference_type difference_type; -    typedef typename ht::const_pointer const_pointer; -    typedef typename ht::const_reference const_reference; - -    typedef typename ht::const_iterator const_iterator; - -    const hasher hash_funct() const { -        return rep.hash_funct(); -    } -    const key_equal key_eq() const { -        return rep.key_eq(); -    } - -public: -    size_type size() const { -        return rep.size(); -    } -    size_type max_size() const { -        return rep.max_size(); -    } -    bool empty() const { -        return rep.empty(); -    } - -    const_iterator begin() const { -        return rep.begin(); -    } -    const_iterator end() const { -        return rep.end(); -    } - -    const_iterator find(const key_type& key) const { -        return rep.find(key); -    } - -    size_type count(const key_type& key) const { -        return rep.count(key); -    } - -    std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { -        return rep.equal_range(key); -    } - -    size_type bucket_count() const { -        return rep.bucket_count(); -    } -    size_type max_bucket_count() const { -        return rep.max_bucket_count(); -    } -    size_type elems_in_bucket(size_type n) const { -        return rep.elems_in_bucket(n); -    } -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h deleted file mode 100644 index 5dc50abd392..00000000000 --- a/library/cpp/on_disk/st_hash/static_hash_map.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#include "static_hash.h" - -#include <library/cpp/deprecated/mapped_file/mapped_file.h> - -#include <util/system/filemap.h> - -template <class SH> -struct sthash_mapped_c { -    typedef SH H; -    typedef typename H::const_iterator const_iterator; -    TMappedFile M; -    H* hsh; -    sthash_mapped_c() -        : M() -        , hsh(nullptr) -    { -    } -    sthash_mapped_c(const char* fname, bool precharge) -        : M() -        , hsh(nullptr) -    { -        Open(fname, precharge); -    } -    void Open(const char* fname, bool precharge) { -        M.init(fname); -        if (precharge) -            M.precharge(); -        hsh = (H*)M.getData(); -        if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh) -            ythrow yexception() << "Could not map hash: " << fname << " is damaged"; -    } -    H* operator->() { -        return hsh; -    } -    const H* operator->() const { -        return hsh; -    } -    H* GetSthash() { -        return hsh; -    } -    const H* GetSthash() const { -        return hsh; -    } -}; - -template <class Key, class T, class Hash> -struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> { -    typedef sthash<Key, T, Hash> H; -    sthash_mapped(const char* fname, bool precharge) -        : sthash_mapped_c<H>(fname, precharge) -    { -    } -    sthash_mapped() -        : sthash_mapped_c<H>() -    { -    } -}; diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h deleted file mode 100644 index 6a9ebdd6c3f..00000000000 --- a/library/cpp/on_disk/st_hash/sthash_iterators.h +++ /dev/null @@ -1,334 +0,0 @@ -#pragma once - -#include "save_stl.h" - -#include <util/system/align.h> - -/** -    This file provides functionality for saving some relatively simple THashMap object -    to disk in a form that can be mapped read-only (via mmap) at any address. -    That saved object is accessed via pointer to sthash object (that must have -    the same parameters as original THashMap object) - -    If either key or value are variable-sized (i.e. contain pointers), user must -    write his own instantiation of TSthashIterator (read iterator for sthash) and -    TSthashWriter (write iterator for THashMap). -    An example for <const char *, B> pair is in here. -**/ - -// TEmptyValue and SizeOfEx are helpers for sthash_set -struct TEmptyValue { -    TEmptyValue() = default; -}; - -template <class T> -inline size_t SizeOfEx() { -    return sizeof(T); -} - -template <> -inline size_t SizeOfEx<TEmptyValue>() { -    return 0; -} -template <> -inline size_t SizeOfEx<const TEmptyValue>() { -    return 0; -} - -template <class TKey, class TValue, class HashFcn, class EqualKey> -struct TSthashIterator { -    // Implementation for simple types -    typedef const TKey TKeyType; -    typedef const TValue TValueType; -    typedef EqualKey TKeyEqualType; -    typedef HashFcn THasherType; - -    const char* Data; -    TSthashIterator() -        : Data(nullptr) -    { -    } -    explicit TSthashIterator(const char* data) -        : Data(data) -    { -    } -    void operator++() { -        Data += GetLength(); -    } - -    bool operator!=(const TSthashIterator& that) const { -        return Data != that.Data; -    } -    bool operator==(const TSthashIterator& that) const { -        return Data == that.Data; -    } -    TKey& Key() const { -        return *(TKey*)Data; -    } -    TValue& Value() { -        return *(TValue*)(Data + sizeof(TKey)); -    } -    const TValue& Value() const { -        return *(const TValue*)(Data + sizeof(TKey)); -    } - -    template <class AnotherKeyType> -    bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const { -        return eq(*(TKey*)Data, key); -    } - -    size_t GetLength() const { -        return sizeof(TKey) + SizeOfEx<TValue>(); -    } -}; - -template <class Key, class Value, typename size_type_o = ui64> -struct TSthashWriter { -    typedef size_type_o TSizeType; -    size_t GetRecordSize(const std::pair<const Key, const Value>&) const { -        return sizeof(Key) + SizeOfEx<Value>(); -    } -    int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const { -        stream->Write(&record.first, sizeof(Key)); -        stream->Write(&record.second, SizeOfEx<Value>()); -        return 0; -    } -}; - -// Remember that this simplified implementation makes a copy of `key' in std::make_pair. -// It can also waste some memory on undesired alignment. -template <class Key, typename size_type_o = ui64> -struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> { -    typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter; -    size_t GetRecordSize(const Key& key) const { -        return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue())); -    } -    int SaveRecord(IOutputStream* stream, const Key& key) const { -        return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue())); -    } -}; - -// we can't save something with pointers without additional tricks - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A*, B, HashFcn, EqualKey> {}; - -template <class A, class B, class HashFcn, class EqualKey> -struct TSthashIterator<A, B*, HashFcn, EqualKey> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B*, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A*, B, size_type_o> {}; - -template <class A, class B, typename size_type_o> -struct TSthashWriter<A, B*, size_type_o> {}; - -template <class T> -inline size_t AlignForChrKey() { -    return 4; // TODO: change this (requeres rebuilt of a few existing files) -} - -template <> -inline size_t AlignForChrKey<TEmptyValue>() { -    return 1; -} - -template <> -inline size_t AlignForChrKey<const TEmptyValue>() { -    return AlignForChrKey<TEmptyValue>(); -} - -// !! note that for char*, physical placement of key and value is swapped -template <class TValue, class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> { -    typedef const TValue TValueType; -    typedef const char* TKeyType; -    typedef EqualKey TKeyEqualType; -    typedef HashFcn THasherType; - -    const char* Data; -    TSthashIterator() -        : Data(nullptr) -    { -    } -    TSthashIterator(const char* data) -        : Data(data) -    { -    } -    void operator++() { -        Data += GetLength(); -    } - -    bool operator!=(const TSthashIterator& that) const { -        return Data != that.Data; -    } -    bool operator==(const TSthashIterator& that) const { -        return Data == that.Data; -    } -    const char* Key() const { -        return Data + SizeOfEx<TValue>(); -    } -    TValue& Value() { -        return *(TValue*)Data; -    } -    const TValue& Value() const { -        return *(const TValue*)Data; -    } - -    template <class K> -    bool KeyEquals(const EqualKey& eq, const K& k) const { -        return eq(Data + SizeOfEx<TValue>(), k); -    } - -    size_t GetLength() const { -        size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>(); -        length = AlignUp(length, AlignForChrKey<TValue>()); -        return length; -    } -}; - -template <class Value, typename size_type_o> -struct TSthashWriter<const char*, Value, size_type_o> { -    typedef size_type_o TSizeType; -    size_t GetRecordSize(const std::pair<const char*, const Value>& record) const { -        size_t length = strlen(record.first) + 1 + SizeOfEx<Value>(); -        length = AlignUp(length, AlignForChrKey<Value>()); -        return length; -    } -    int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const { -        const char* alignBuffer = "qqqq"; -        stream->Write(&record.second, SizeOfEx<Value>()); -        size_t length = strlen(record.first) + 1; -        stream->Write(record.first, length); -        length = AlignUpSpace(length, AlignForChrKey<Value>()); -        if (length) -            stream->Write(alignBuffer, length); -        return 0; -    } -}; - -template <class TKey, class HashFcn, class EqualKey> -struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> { -    typedef const TKey TKeyType; -    typedef const char* TValueType; -    typedef EqualKey TKeyEqualType; -    typedef HashFcn THasherType; - -    const char* Data; -    TSthashIterator() -        : Data(nullptr) -    { -    } -    TSthashIterator(const char* data) -        : Data(data) -    { -    } -    void operator++() { -        Data += GetLength(); -    } - -    bool operator!=(const TSthashIterator& that) const { -        return Data != that.Data; -    } -    bool operator==(const TSthashIterator& that) const { -        return Data == that.Data; -    } -    TKey& Key() { -        return *(TKey*)Data; -    } -    const char* Value() const { -        return Data + sizeof(TKey); -    } - -    template <class K> -    bool KeyEquals(const EqualKey& eq, const K& k) const { -        return eq(*(TKey*)Data, k); -    } - -    size_t GetLength() const { -        size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey); -        length = AlignUp(length, (size_t)4); -        return length; -    } -}; - -template <class Key, typename size_type_o> -struct TSthashWriter<Key, const char*, size_type_o> { -    typedef size_type_o TSizeType; -    size_t GetRecordSize(const std::pair<const Key, const char*>& record) const { -        size_t length = strlen(record.second) + 1 + sizeof(Key); -        length = AlignUp(length, (size_t)4); -        return length; -    } -    int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const { -        const char* alignBuffer = "qqqq"; -        stream->Write(&record.first, sizeof(Key)); -        size_t length = strlen(record.second) + 1; -        stream->Write(record.second, length); -        length = AlignUpSpace(length, (size_t)4); -        if (length) -            stream->Write(alignBuffer, length); -        return 0; -    } -}; - -template <class HashFcn, class EqualKey> -struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> { -    typedef const char* TKeyType; -    typedef const char* TValueType; -    typedef EqualKey TKeyEqualType; -    typedef HashFcn THasherType; - -    const char* Data; -    TSthashIterator() -        : Data(nullptr) -    { -    } -    TSthashIterator(const char* data) -        : Data(data) -    { -    } -    void operator++() { -        Data += GetLength(); -    } - -    bool operator!=(const TSthashIterator& that) const { -        return Data != that.Data; -    } -    bool operator==(const TSthashIterator& that) const { -        return Data == that.Data; -    } -    const char* Key() const { -        return Data; -    } -    const char* Value() const { -        return Data + strlen(Data) + 1; -    } - -    template <class K> -    bool KeyEquals(const EqualKey& eq, const K& k) const { -        return eq(Data, k); -    } - -    size_t GetLength() const { -        size_t length = strlen(Data) + 1; -        length += strlen(Data + length) + 1; -        return length; -    } -}; - -template <typename size_type_o> -struct TSthashWriter<const char*, const char*, size_type_o> { -    typedef size_type_o TSizeType; -    size_t GetRecordSize(const std::pair<const char*, const char*>& record) const { -        size_t size = strlen(record.first) + strlen(record.second) + 2; -        return size; -    } -    int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const { -        stream->Write(record.first, strlen(record.first) + 1); -        stream->Write(record.second, strlen(record.second) + 1); -        return 0; -    } -}; diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make deleted file mode 100644 index 8c6d05711c3..00000000000 --- a/library/cpp/on_disk/st_hash/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( -    fake.cpp -    save_stl.h -    static_hash.h -    static_hash_map.h -    sthash_iterators.h -) - -PEERDIR( -    library/cpp/deprecated/mapped_file -) - -END() | 
